Actual source code: nepdefl.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/nepimpl.h>         /*I "slepcnep.h" I*/
 12: #include <slepcblaslapack.h>
 13: #include "nepdefl.h"

 15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
 16: {

 20:   if (X) *X = extop->X;
 21:   if (H) {
 22:     MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
 23:   }
 24:   return(0);
 25: }

 27: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
 28: {
 30:   PetscMPIInt    np,rk,count;
 31:   PetscScalar    *array1,*array2;
 32:   PetscInt       nloc;

 35:   if (extop->szd) {
 36:     MPI_Comm_rank(PetscObjectComm((PetscObject)v),&rk);
 37:     MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);
 38:     BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
 39:     if (v) {
 40:       VecGetArray(v,&array1);
 41:       VecGetArray(vex,&array2);
 42:       if (back) {
 43:         PetscMemcpy(array1,array2,nloc*sizeof(PetscScalar));
 44:       } else {
 45:         PetscMemcpy(array2,array1,nloc*sizeof(PetscScalar));
 46:       }
 47:       VecRestoreArray(v,&array1);
 48:       VecRestoreArray(vex,&array2);
 49:     }
 50:     if (a) {
 51:       VecGetArray(vex,&array2);
 52:       if (back) {
 53:         PetscMemcpy(a,array2+nloc,extop->szd*sizeof(PetscScalar));
 54:         PetscMPIIntCast(extop->szd,&count);
 55:         MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)v));
 56:       } else {
 57:         PetscMemcpy(array2+nloc,a,extop->szd*sizeof(PetscScalar));
 58:         PetscMPIIntCast(extop->szd,&count);
 59:         MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)v));
 60:       }
 61:       VecRestoreArray(vex,&array2);
 62:     }
 63:   } else {
 64:     if (back) {VecCopy(vex,v);}
 65:     else {VecCopy(v,vex);}
 66:   }
 67:   return(0);
 68: }

 70: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
 71: {
 73:   PetscInt       nloc;
 74:   Vec            u;
 75:   VecType        type;

 78:   if (extop->szd) {
 79:     BVGetColumn(extop->nep->V,0,&u);
 80:     VecGetType(u,&type);
 81:     BVRestoreColumn(extop->nep->V,0,&u);
 82:     VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
 83:     VecSetType(*v,type);
 84:     BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
 85:     nloc += extop->szd;
 86:     VecSetSizes(*v,nloc,PETSC_DECIDE);
 87:   } else {
 88:     BVCreateVec(extop->nep->V,v);
 89:   }
 90:   return(0);
 91: }

 93: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
 94: {
 95:   PetscErrorCode     ierr;
 96:   PetscInt           nloc;
 97:   BVType             type;
 98:   BVOrthogType       otype;
 99:   BVOrthogRefineType oref;
100:   PetscReal          oeta;
101:   BVOrthogBlockType  oblock;
102:   NEP                nep=extop->nep;

105:   if (extop->szd) {
106:     BVGetSizes(nep->V,&nloc,NULL,NULL);
107:     BVCreate(PetscObjectComm((PetscObject)nep),V);
108:     BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
109:     BVGetType(nep->V,&type);
110:     BVSetType(*V,type);
111:     BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
112:     BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
113:     PetscObjectStateIncrease((PetscObject)*V);
114:     PetscLogObjectParent((PetscObject)nep,(PetscObject)*V);
115:   } else {
116:     BVDuplicateResize(nep->V,sz,V);
117:   }
118:   return(0);
119: }

121: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
122: {
124:   PetscInt       n,next,i;
125:   PetscRandom    rand;
126:   PetscScalar    *array;
127:   PetscMPIInt    nn,np;

130:   BVGetRandomContext(extop->nep->V,&rand);
131:   VecSetRandom(v,rand);
132:   if (extop->szd) {
133:     MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);    
134:     BVGetSizes(extop->nep->V,&n,NULL,NULL);
135:     VecGetLocalSize(v,&next);
136:     VecGetArray(v,&array);
137:     for (i=n+extop->n;i<next;i++) array[i] = 0.0;
138:     for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
139:     PetscMPIIntCast(extop->n,&nn);
140:     MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PETSC_COMM_WORLD);
141:     VecRestoreArray(v,&array);
142:   }
143:   return(0);
144: }

146: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
147: {
149:   PetscInt       i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
150:   PetscScalar    sone=1.0,zero=0.0;
151:   PetscBLASInt   ldh_,ldhj_,n_;

154:   i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
155:   PetscMemzero(Hj,i*sizeof(PetscScalar));
156:   PetscBLASIntCast(ldhj+1,&ldh_);
157:   PetscBLASIntCast(ldhj,&ldhj_);
158:   PetscBLASIntCast(n,&n_);
159:   if (idx<1) {
160:     if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
161:     else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
162:   } else {
163:       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
164:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
165:       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
166:       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
167:   }
168:   if (idx<0) {
169:     idx = -idx;
170:     for (k=1;k<idx;k++) {
171:       for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
172:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
173:       for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
174:       if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
175:     }
176:   }
177:   return(0);
178: }

180: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
181: {
183:   Vec            uu;
184:   PetscInt       ld,i;
185:   PetscReal      norm;
186:   PetscMPIInt    np;

189:   BVGetColumn(extop->X,extop->n,&uu);
190:   ld = extop->szd+1;
191:   NEPDeflationCopyToExtendedVec(extop,uu,extop->H+extop->n*ld,u,PETSC_TRUE);
192:   BVRestoreColumn(extop->X,extop->n,&uu);
193:   BVNormColumn(extop->X,extop->n,NORM_2,&norm);
194:   BVScaleColumn(extop->X,extop->n,1.0/norm);
195:   MPI_Comm_size(PetscObjectComm((PetscObject)u),&np);    
196:   for (i=0;i<extop->n;i++) extop->H[extop->n*ld+i] *= PetscSqrtReal(np)/norm;
197:   extop->H[extop->n*(ld+1)] = lambda;
198:   extop->n++;
199:   BVSetActiveColumns(extop->X,0,extop->n);
200:   if (extop->n <= extop->szd) {
201:     /* update XpX */
202:     BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
203:     extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
204:     for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
205:     /* determine minimality index */
206:     extop->midx = PetscMin(extop->max_midx,extop->n);
207:     /* polynominal basis coeficients */
208:     for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
209:     /* evaluate the polynomial basis in H */
210:     NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
211:   }
212:   return(0);
213: }

215: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
216: {
218:   PetscInt       i,j,k,off,ini,fin,sz,ldh,n=extop->n;
219:   Mat            A,B;
220:   PetscScalar    *array;

223:   if (idx<0) {ini = 0; fin = extop->nep->nt;}
224:   else {ini = idx; fin = idx+1;}
225:   if (y) sz = hfjp?n+2:n+1;
226:   else sz = hfjp?3*n:2*n;
227:   ldh = extop->szd+1;
228:   MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
229:   MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
230:   MatDenseGetArray(A,&array);
231:   for (j=0;j<n;j++)
232:     for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
233:   MatDenseRestoreArray(A,&array);
234:   if (y) {
235:     MatDenseGetArray(A,&array);
236:     array[extop->n*(sz+1)] = lambda;
237:     if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
238:     for (i=0;i<n;i++) array[n*sz+i] = y[i];
239:     MatDenseRestoreArray(A,&array);
240:     for (j=ini;j<fin;j++) {
241:       FNEvaluateFunctionMat(extop->nep->f[j],A,B);
242:       MatDenseGetArray(B,&array);
243:       for (i=0;i<n;i++) hfj[j*ld+i] = array[n*sz+i];
244:       if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = array[(n+1)*sz+i];
245:       MatDenseRestoreArray(B,&array);
246:     }
247:   } else {
248:     off = idx<0?ld*n:0;
249:     MatDenseGetArray(A,&array);
250:     for (k=0;k<n;k++) {
251:       array[(n+k)*sz+k] = 1.0;
252:       array[(n+k)*sz+n+k] = lambda;
253:     }
254:     if (hfjp) for (k=0;k<n;k++) {
255:       array[(2*n+k)*sz+n+k] = 1.0;
256:       array[(2*n+k)*sz+2*n+k] = lambda;
257:     }
258:     MatDenseRestoreArray(A,&array);
259:     for (j=ini;j<fin;j++) {
260:       FNEvaluateFunctionMat(extop->nep->f[j],A,B);
261:       MatDenseGetArray(B,&array);
262:       for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = array[n*sz+i*sz+k];
263:       if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = array[2*n*sz+i*sz+k];
264:       MatDenseRestoreArray(B,&array);
265:     }
266:   }
267:   MatDestroy(&A);
268:   MatDestroy(&B);
269:   return(0);
270: }

272: static PetscErrorCode NEPDeflationMatShell_MatMult(Mat M,Vec x,Vec y)
273: {
274:   NEP_DEF_MATSHELL  *matctx;
275:   PetscErrorCode    ierr;
276:   NEP_EXT_OP        extop;
277:   Vec               x1,y1;
278:   PetscScalar       *yy,sone=1.0,zero=0.0;
279:   const PetscScalar *xx;
280:   PetscInt          nloc,i;
281:   PetscMPIInt       np;
282:   PetscBLASInt      n_,one=1,szd_;

285:   MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
286:   MatShellGetContext(M,(void**)&matctx);
287:   extop = matctx->extop;
288:   if (extop->szd) {
289:     x1 = matctx->w[0]; y1 = matctx->w[1];
290:     VecGetArrayRead(x,&xx);
291:     VecPlaceArray(x1,xx);
292:     VecGetArray(y,&yy);
293:     VecPlaceArray(y1,yy);
294:     MatMult(matctx->T,x1,y1);
295:     if (extop->n) {
296:       VecGetLocalSize(x1,&nloc);
297:       /* copy for avoiding warning of constant array xx */
298:       for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
299:       BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
300:       BVDotVec(extop->X,x1,matctx->work);
301:       PetscBLASIntCast(extop->n,&n_);
302:       PetscBLASIntCast(extop->szd,&szd_);
303:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
304:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
305:       for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
306:     }
307:     VecResetArray(x1);
308:     VecRestoreArrayRead(x,&xx);
309:     VecResetArray(y1);
310:     VecRestoreArray(y,&yy);
311:   } else {
312:     MatMult(matctx->T,x,y);
313:   }
314:   return(0);
315: }

317: static PetscErrorCode NEPDeflationMatShell_CreateVecs(Mat M,Vec *right,Vec *left)
318: {
319:   PetscErrorCode   ierr;
320:   NEP_DEF_MATSHELL *matctx;

323:   MatShellGetContext(M,(void**)&matctx);
324:   if (right) {
325:     VecDuplicate(matctx->w[0],right);
326:   }
327:   if (left) {
328:     VecDuplicate(matctx->w[0],left);
329:   }
330:   return(0);
331: }

333: static PetscErrorCode NEPDeflationMatShell_Destroy(Mat M)
334: {
335:   PetscErrorCode   ierr;
336:   NEP_DEF_MATSHELL *matctx;

339:   MatShellGetContext(M,(void**)&matctx);
340:   if (matctx->extop->szd) {
341:     BVDestroy(&matctx->U);
342:     PetscFree2(matctx->hfj,matctx->work);
343:     PetscFree2(matctx->A,matctx->B);
344:     VecDestroy(&matctx->w[0]);
345:     VecDestroy(&matctx->w[1]);
346:   }
347:   MatDestroy(&matctx->T);
348:   PetscFree(matctx);
349:   return(0);
350: }

352: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
353: {
354:   PetscScalar p;
355:   PetscInt    i;

358:   if (!jacobian) {
359:     val[0] = 1.0;
360:     for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
361:   } else {
362:     val[0] = 0.0;
363:     p = 1.0;
364:     for (i=1;i<extop->n;i++) {
365:       val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
366:       p *= (lambda-extop->bc[i-1]);
367:     }
368:   }
369:   return(0);
370: }

372: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
373: {
374:   PetscErrorCode   ierr;
375:   NEP_DEF_MATSHELL *matctx,*matctxC;
376:   PetscInt         nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
377:   Mat              F;
378:   Mat              Mshell,Mcomp;
379:   PetscBool        ini=PETSC_FALSE;
380:   PetscScalar      *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
381:   PetscBLASInt     n_,info,szd_;

384:   if (!M) {
385:     Mshell = jacobian?extop->MJ:extop->MF;
386:   } else Mshell = *M;
387:   Mcomp  = jacobian?extop->MF:extop->MJ;
388:   if (!Mshell) {
389:     ini = PETSC_TRUE;
390:     PetscNew(&matctx);
391:     MatGetLocalSize(extop->nep->function,&mloc,&nloc);
392:     nloc += szd; mloc += szd;
393:     MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
394:     MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))NEPDeflationMatShell_MatMult);
395:     MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))NEPDeflationMatShell_CreateVecs);
396:     MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))NEPDeflationMatShell_Destroy);
397:     matctx->nep = extop->nep;
398:     matctx->extop = extop;
399:     if (!M) {
400:       if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
401:       else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
402:       PetscObjectReference((PetscObject)matctx->T);
403:     } else {
404:       matctx->jacob = jacobian;
405:       MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES, &matctx->T);
406:       *M = Mshell;
407:     }
408:     if (szd) {
409:       BVCreateVec(extop->nep->V,matctx->w);
410:       VecDuplicate(matctx->w[0],matctx->w+1);
411:       BVDuplicateResize(extop->nep->V,szd,&matctx->U);
412:       PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
413:       PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
414:     }
415:   } else {
416:     MatShellGetContext(Mshell,(void**)&matctx);
417:   }
418:   if (ini || matctx->theta != lambda || matctx->n != extop->n) {
419:     if (ini || matctx->theta != lambda) {
420:       if (jacobian) {
421:         NEPComputeJacobian(extop->nep,lambda,matctx->T);
422:       } else {
423:         NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->T);
424:       }
425:     }
426:     if (n) {
427:       matctx->hfjset = PETSC_FALSE;
428:       if (!extop->simpU) {
429:         /* likely hfjp has been already computed */
430:         if (Mcomp) {
431:           MatShellGetContext(Mcomp,(void**)&matctxC);
432:           if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
433:             PetscMemcpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt*sizeof(PetscScalar));
434:             matctx->hfjset = PETSC_TRUE;
435:           }
436:         }
437:         hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
438:         if (!matctx->hfjset) {
439:           NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
440:           matctx->hfjset = PETSC_TRUE;
441:         }
442:         BVSetActiveColumns(matctx->U,0,n);
443:         hf = jacobian?hfjp:hfj;
444:         MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
445:         BVMatMult(extop->X,extop->nep->A[0],matctx->U);
446:         BVMultInPlace(matctx->U,F,0,n);
447:         BVSetActiveColumns(extop->W,0,extop->n);
448:         for (j=1;j<extop->nep->nt;j++) {
449:           BVMatMult(extop->X,extop->nep->A[j],extop->W);
450:           MatDensePlaceArray(F,hf+j*n*n);
451:           BVMult(matctx->U,1.0,1.0,extop->W,F);
452:           MatDenseResetArray(F);
453:         }
454:         MatDestroy(&F);
455:       } else {
456:         hfj = matctx->hfj;
457:         BVSetActiveColumns(matctx->U,0,n);
458:         BVMatMult(extop->X,matctx->T,matctx->U);
459:         for (j=0;j<n;j++) {
460:           for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
461:           hfj[j*(n+1)] += lambda;
462:         }
463:         PetscBLASIntCast(n,&n_);
464:         PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
465:         SlepcCheckLapackInfo("trtri",info);
466:         MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
467:         BVMultInPlace(matctx->U,F,0,n);
468:         if (jacobian) {
469:           NEPDeflationComputeFunction(extop,lambda,NULL);
470:           MatShellGetContext(extop->MF,(void**)&matctxC);
471:           BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
472:         }
473:         MatDestroy(&F);
474:       }
475:       PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
476:       NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
477:       A = matctx->A;
478:       PetscMemzero(A,szd*szd*sizeof(PetscScalar));
479:       if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
480:       for (j=0;j<n;j++)
481:         for (i=0;i<n;i++)
482:           for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
483:       PetscBLASIntCast(n,&n_);
484:       PetscBLASIntCast(szd,&szd_);
485:       B = matctx->B;
486:       PetscMemzero(B,szd*szd*sizeof(PetscScalar));
487:       for (i=1;i<extop->midx;i++) {
488:         NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
489:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
490:         PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
491:         pts = hHprev; hHprev = hH; hH = pts;
492:       }
493:       PetscFree3(basisv,hH,hHprev);
494:     }
495:     matctx->theta = lambda;
496:     matctx->n = extop->n;
497:   }
498:   return(0);
499: }

501: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
502: {

506:   NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
507:   if (F) *F = extop->MF;
508:   return(0);
509: }

511: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
512: {

516:   NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
517:   if (J) *J = extop->MJ;
518:   return(0);
519: }

521: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
522: {
523:   PetscErrorCode    ierr;
524:   NEP_DEF_MATSHELL  *matctx;
525:   NEP_DEF_FUN_SOLVE solve;
526:   PetscInt          i,j,n=extop->n;
527:   Vec               u,tu;
528:   Mat               F;
529:   PetscScalar       snone=-1.0,sone=1.0;
530:   PetscBLASInt      n_,szd_,ldh_,*p,info;
531:   Mat               Mshell;

534:   solve = extop->solve;
535:   if (lambda!=solve->theta || n!=solve->n) {
536:     NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
537:     Mshell = (solve->sincf)?extop->MF:solve->T;
538:     MatShellGetContext(Mshell,(void**)&matctx);
539:     KSPSetOperators(solve->ksp,matctx->T,matctx->T);
540:     if (n) {
541:       PetscBLASIntCast(n,&n_);
542:       PetscBLASIntCast(extop->szd,&szd_);
543:       PetscBLASIntCast(extop->szd+1,&ldh_);
544:       if (!extop->simpU) {
545:         BVSetActiveColumns(solve->T_1U,0,n);
546:         for (i=0;i<n;i++) {
547:           BVGetColumn(matctx->U,i,&u);
548:           BVGetColumn(solve->T_1U,i,&tu);
549:           KSPSolve(solve->ksp,u,tu);
550:           BVRestoreColumn(solve->T_1U,i,&tu);
551:           BVRestoreColumn(matctx->U,i,&u);
552:         }
553:         MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
554:         BVDot(solve->T_1U,extop->X,F);
555:         MatDestroy(&F);
556:       } else {
557:         for (j=0;j<n;j++)
558:           for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
559:         for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
560:         PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
561:         for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
562:       }
563:       PetscMemcpy(solve->M,matctx->B,extop->szd*extop->szd*sizeof(PetscScalar));
564:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
565:       PetscMalloc1(n,&p);
566:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
567:       SlepcCheckLapackInfo("getrf",info);
568:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
569:       SlepcCheckLapackInfo("getri",info);
570:       PetscFree(p);
571:     }
572:     solve->theta = lambda;
573:     solve->n = n;
574:   }
575:   return(0);
576: }

578: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
579: {
580:   PetscErrorCode    ierr;
581:   Vec               b1,x1;
582:   PetscScalar       *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
583:   NEP_DEF_MATSHELL  *matctx;
584:   NEP_DEF_FUN_SOLVE solve=extop->solve;
585:   PetscBLASInt      one=1,szd_,n_,ldh_;
586:   PetscInt          nloc,i;
587:   PetscMPIInt       np,count;

590:   if (extop->szd) {
591:     x1 = solve->w[0]; b1 = solve->w[1];
592:     VecGetArray(x,&xx);
593:     VecPlaceArray(x1,xx);
594:     VecGetArray(b,&bb);
595:     VecPlaceArray(b1,bb);
596:   } else {
597:     b1 = b; x1 = x;
598:   }
599:   KSPSolve(extop->solve->ksp,b1,x1);
600:   if (extop->n) {
601:     PetscBLASIntCast(extop->szd,&szd_);
602:     PetscBLASIntCast(extop->n,&n_);
603:     PetscBLASIntCast(extop->szd+1,&ldh_);
604:     BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
605:     PetscMalloc2(extop->n,&b2,extop->n,&x2);
606:     MPI_Comm_size(PetscObjectComm((PetscObject)b),&np);
607:     for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np); 
608:     w = solve->work; w2 = solve->work+extop->n;
609:     MatShellGetContext(solve->sincf?extop->MF:solve->T,(void**)&matctx);
610:     PetscMemcpy(w2,b2,extop->n*sizeof(PetscScalar));
611:     BVDotVec(extop->X,x1,w);
612:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
613:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
614:     if (extop->simpU) {
615:       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
616:       for (i=0;i<extop->n;i++) w[i] = x2[i];
617:       PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
618:       for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
619:       BVMultVec(extop->X,-1.0,1.0,x1,w);
620:     } else {
621:       BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
622:     }
623:     for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
624:     PetscMPIIntCast(extop->n,&count);
625:     MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b));
626:   }
627:   if (extop->szd) {
628:     VecResetArray(x1);
629:     VecRestoreArray(x,&xx);
630:     VecResetArray(b1);
631:     VecRestoreArray(b,&bb);
632:     if (extop->n) { PetscFree2(b2,x2);}
633:   }
634:   return(0);
635: }

637: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
638: {
639:   PetscErrorCode    ierr;
640:   PetscInt          j;
641:   NEP_DEF_FUN_SOLVE solve;

644:   if (!extop) return(0);
645:   PetscFree(extop->H);
646:   BVDestroy(&extop->X);
647:   if (extop->szd) {
648:     PetscFree3(extop->Hj,extop->XpX,extop->bc);
649:     BVDestroy(&extop->W);
650:   }
651:   MatDestroy(&extop->MF);
652:   MatDestroy(&extop->MJ);
653:   if (extop->solve) {
654:     solve = extop->solve;
655:     if (extop->szd) {
656:       if (!extop->simpU) {BVDestroy(&solve->T_1U);}
657:       PetscFree2(solve->M,solve->work);
658:       VecDestroy(&solve->w[0]);
659:       VecDestroy(&solve->w[1]);
660:     }
661:     if (!solve->sincf) {
662:       MatDestroy(&solve->T);
663:     }
664:     PetscFree(extop->solve);
665:   }
666:   if (extop->proj) {
667:     if (extop->szd) {
668:       for (j=0;j<extop->nep->nt;j++) {MatDestroy(&extop->proj->V1pApX[j]);}
669:       MatDestroy(&extop->proj->XpV1);
670:       PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
671:       VecDestroy(&extop->proj->w);
672:       BVDestroy(&extop->proj->V1);
673:     }
674:     PetscFree(extop->proj);
675:   }
676:   PetscFree(extop);
677:   return(0);
678: }

680: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
681: {
682:   PetscErrorCode    ierr;
683:   NEP_EXT_OP        op;
684:   NEP_DEF_FUN_SOLVE solve;
685:   PetscInt          szd;

688:   NEPDeflationReset(*extop);
689:   PetscNew(&op);
690:   *extop  = op;
691:   op->nep = nep;
692:   op->n   = 0;
693:   op->szd = szd = sz-1;
694:   op->max_midx = PetscMin(MAX_MINIDX,szd);
695:   op->X = X;
696:   if (!X) { BVDuplicateResize(nep->V,sz,&op->X); }
697:   else { PetscObjectReference((PetscObject)X); }
698:   PetscCalloc1(sz*sz,&(op)->H);
699:   if (op->szd) {
700:     op->simpU = PETSC_FALSE;
701:     if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
702:       PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
703:     } else {
704:       op->simpU = PETSC_TRUE;
705:     }
706:     PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
707:     BVDuplicateResize(op->X,op->szd,&op->W);
708:   }
709:   if (ksp) {
710:     PetscNew(&solve);
711:     op->solve    = solve;
712:     solve->ksp   = ksp;
713:     solve->sincf = sincfun;
714:     solve->n     = -1;
715:     if (op->szd) {
716:       if (!op->simpU) {
717:         BVDuplicateResize(nep->V,szd,&solve->T_1U);
718:       }
719:       PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
720:       BVCreateVec(nep->V,&solve->w[0]);
721:       VecDuplicate(solve->w[0],&solve->w[1]);
722:     }
723:   }
724:   return(0);
725: }

727: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
728: {
729:   PetscScalar     *T,*E,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
730:   PetscScalar     alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
731:   PetscInt        i,ldds,nwork=0,szd,nv,j,k,n;
732:   PetscBLASInt    inc=1,nv_,ldds_,dim_,dim2,szdk,szd_,n_,ldh_;
733:   PetscMPIInt     np;
734:   NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
735:   NEP_EXT_OP      extop=proj->extop;
736:   NEP             nep=extop->nep;
737:   PetscErrorCode  ierr;

740:   DSGetDimensions(ds,&nv,NULL,NULL,NULL,NULL);
741:   DSGetLeadingDimension(ds,&ldds);
742:   DSGetArray(ds,mat,&T);
743:   PetscMemzero(T,ldds*nv*sizeof(PetscScalar));
744:   PetscBLASIntCast(ldds*nv,&dim2);
745:   /* mat = V1^*T(lambda)V1 */
746:   for (i=0;i<nep->nt;i++) {
747:     if (deriv) {
748:       FNEvaluateDerivative(nep->f[i],lambda,&alpha);
749:     } else {
750:       FNEvaluateFunction(nep->f[i],lambda,&alpha);
751:     }
752:     DSGetArray(ds,DSMatExtra[i],&E);
753:     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dim2,&alpha,E,&inc,T,&inc));
754:     DSRestoreArray(ds,DSMatExtra[i],&E);
755:   }
756:   if (extop->n) {
757:     n = extop->n;
758:     szd = extop->szd;
759:     PetscMemzero(proj->work,proj->lwork*sizeof(PetscScalar));
760:     PetscBLASIntCast(nv,&nv_);
761:     PetscBLASIntCast(n,&n_);
762:     PetscBLASIntCast(ldds,&ldds_);
763:     PetscBLASIntCast(szd,&szd_);
764:     PetscBLASIntCast(proj->dim,&dim_);
765:     PetscBLASIntCast(extop->szd+1,&ldh_);
766:     w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
767:     nwork += 2*proj->dim*proj->dim;

769:     /* mat = mat + V1^*U(lambda)V2 */
770:     for (i=0;i<nep->nt;i++) {
771:       MatDenseGetArray(proj->V1pApX[i],&E);
772:       if (extop->simpU) {
773:         if (deriv) {
774:           FNEvaluateDerivative(nep->f[i],lambda,&alpha);
775:         } else {
776:           FNEvaluateFunction(nep->f[i],lambda,&alpha);
777:         }
778:         ww = w1; w = w2;
779:         PetscMemcpy(ww,proj->V2,szd*nv*sizeof(PetscScalar));
780:         MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
781:         for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
782:         for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
783:         alpha = -alpha;
784:         PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
785:         if (deriv) {
786:           PetscBLASIntCast(szd*nv,&szdk);
787:           FNEvaluateFunction(nep->f[i],lambda,&alpha2);
788:           PetscMemcpy(w,proj->V2,szd*nv*sizeof(PetscScalar));
789:           for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
790:           alpha2 = -alpha2;
791:           PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
792:           alpha2 = 1.0;
793:           PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
794:           PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
795:         }
796:         for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
797:       } else {
798:         NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
799:         w = deriv?w2:w1; ww = deriv?w1:w2;
800:         MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
801:         s = PetscSqrtReal(np);
802:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
803:       }
804:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
805:       MatDenseRestoreArray(proj->V1pApX[i],&E);
806:     }

808:     /* mat = mat + V2^*A(lambda)V1 */
809:     basisv = proj->work+nwork; nwork += szd;
810:     hH     = proj->work+nwork; nwork += szd*szd;
811:     hHprev = proj->work+nwork; nwork += szd*szd;
812:     AB     = proj->work+nwork; nwork += szd*szd;
813:     NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
814:     if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
815:     for (j=0;j<n;j++)
816:       for (i=0;i<n;i++)
817:         for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
818:     MatDenseGetArray(proj->XpV1,&E);
819:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
820:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
821:     MatDenseRestoreArray(proj->XpV1,&E);

823:     /* mat = mat + V2^*B(lambda)V2 */
824:     PetscMemzero(AB,szd*szd*sizeof(PetscScalar));
825:     for (i=1;i<extop->midx;i++) {
826:       NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
827:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
828:       PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
829:       pts = hHprev; hHprev = hH; hH = pts;
830:     }
831:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
832:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
833:   }
834:   DSRestoreArray(ds,mat,&T);
835:   return(0);
836: }

838: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
839: {
840:   PetscErrorCode  ierr;
841:   PetscInt        k,j,n=extop->n,dim;
842:   Vec             v,ve;
843:   BV              V1;
844:   Mat             G;
845:   NEP             nep=extop->nep;
846:   NEP_DEF_PROJECT proj;

849:   NEPCheckSplit(extop->nep,1);
850:   proj = extop->proj;
851:   if (!proj) {
852:     /* Initialize the projection data structure */
853:     PetscNew(&proj);
854:     extop->proj = proj;
855:     proj->extop = extop;
856:     BVGetSizes(Vext,NULL,NULL,&dim);
857:     proj->dim = dim;
858:     if (extop->szd) {
859:       proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
860:       PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
861:       for (j=0;j<nep->nt;j++) {
862:          MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
863:       }
864:        MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
865:       BVCreateVec(extop->X,&proj->w);
866:       BVDuplicateResize(extop->X,proj->dim,&proj->V1);
867:     }
868:     DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
869:   }

871:   /* Split Vext in V1 and V2 */
872:   if (extop->szd) {
873:     for (j=j0;j<j1;j++) {
874:       BVGetColumn(Vext,j,&ve);
875:       BVGetColumn(proj->V1,j,&v);
876:       NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
877:       BVRestoreColumn(proj->V1,j,&v);
878:       BVRestoreColumn(Vext,j,&ve);
879:     }
880:     V1 = proj->V1;
881:   } else V1 = Vext;

883:   /* Compute matrices V1^* A_i V1 */
884:   BVSetActiveColumns(V1,j0,j1);
885:   for (k=0;k<nep->nt;k++) {
886:     DSGetMat(ds,DSMatExtra[k],&G);
887:     BVMatProject(V1,nep->A[k],V1,G);
888:     DSRestoreMat(ds,DSMatExtra[k],&G);
889:   }

891:   if (extop->n) {
892:     if (extop->szd) {
893:       /* Compute matrices V1^* A_i X  and V1^* X */
894:       BVSetActiveColumns(extop->W,0,n);
895:       for (k=0;k<nep->nt;k++) {
896:         BVMatMult(extop->X,nep->A[k],extop->W);
897:         BVDot(extop->W,V1,proj->V1pApX[k]);
898:       }
899:       BVDot(V1,extop->X,proj->XpV1);
900:     }
901:   }
902:   return(0);
903: }