Actual source code: nepdefl.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/nepimpl.h> /*I "slepcnep.h" I*/
12: #include <slepcblaslapack.h>
13: #include "nepdefl.h"
15: PetscErrorCode NEPDeflationGetInvariantPair(NEP_EXT_OP extop,BV *X,Mat *H)
16: {
20: if (X) *X = extop->X;
21: if (H) {
22: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd+1,extop->szd+1,extop->H,H);
23: }
24: return(0);
25: }
27: PetscErrorCode NEPDeflationCopyToExtendedVec(NEP_EXT_OP extop,Vec v,PetscScalar *a,Vec vex,PetscBool back)
28: {
30: PetscMPIInt np,rk,count;
31: PetscScalar *array1,*array2;
32: PetscInt nloc;
35: if (extop->szd) {
36: MPI_Comm_rank(PetscObjectComm((PetscObject)v),&rk);
37: MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);
38: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
39: if (v) {
40: VecGetArray(v,&array1);
41: VecGetArray(vex,&array2);
42: if (back) {
43: PetscMemcpy(array1,array2,nloc*sizeof(PetscScalar));
44: } else {
45: PetscMemcpy(array2,array1,nloc*sizeof(PetscScalar));
46: }
47: VecRestoreArray(v,&array1);
48: VecRestoreArray(vex,&array2);
49: }
50: if (a) {
51: VecGetArray(vex,&array2);
52: if (back) {
53: PetscMemcpy(a,array2+nloc,extop->szd*sizeof(PetscScalar));
54: PetscMPIIntCast(extop->szd,&count);
55: MPI_Bcast(a,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)v));
56: } else {
57: PetscMemcpy(array2+nloc,a,extop->szd*sizeof(PetscScalar));
58: PetscMPIIntCast(extop->szd,&count);
59: MPI_Bcast(array2+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)v));
60: }
61: VecRestoreArray(vex,&array2);
62: }
63: } else {
64: if (back) {VecCopy(vex,v);}
65: else {VecCopy(v,vex);}
66: }
67: return(0);
68: }
70: PetscErrorCode NEPDeflationCreateVec(NEP_EXT_OP extop,Vec *v)
71: {
73: PetscInt nloc;
74: Vec u;
75: VecType type;
78: if (extop->szd) {
79: BVGetColumn(extop->nep->V,0,&u);
80: VecGetType(u,&type);
81: BVRestoreColumn(extop->nep->V,0,&u);
82: VecCreate(PetscObjectComm((PetscObject)extop->nep),v);
83: VecSetType(*v,type);
84: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
85: nloc += extop->szd;
86: VecSetSizes(*v,nloc,PETSC_DECIDE);
87: } else {
88: BVCreateVec(extop->nep->V,v);
89: }
90: return(0);
91: }
93: PetscErrorCode NEPDeflationCreateBV(NEP_EXT_OP extop,PetscInt sz,BV *V)
94: {
95: PetscErrorCode ierr;
96: PetscInt nloc;
97: BVType type;
98: BVOrthogType otype;
99: BVOrthogRefineType oref;
100: PetscReal oeta;
101: BVOrthogBlockType oblock;
102: NEP nep=extop->nep;
105: if (extop->szd) {
106: BVGetSizes(nep->V,&nloc,NULL,NULL);
107: BVCreate(PetscObjectComm((PetscObject)nep),V);
108: BVSetSizes(*V,nloc+extop->szd,PETSC_DECIDE,sz);
109: BVGetType(nep->V,&type);
110: BVSetType(*V,type);
111: BVGetOrthogonalization(nep->V,&otype,&oref,&oeta,&oblock);
112: BVSetOrthogonalization(*V,otype,oref,oeta,oblock);
113: PetscObjectStateIncrease((PetscObject)*V);
114: PetscLogObjectParent((PetscObject)nep,(PetscObject)*V);
115: } else {
116: BVDuplicateResize(nep->V,sz,V);
117: }
118: return(0);
119: }
121: PetscErrorCode NEPDeflationSetRandomVec(NEP_EXT_OP extop,Vec v)
122: {
124: PetscInt n,next,i;
125: PetscRandom rand;
126: PetscScalar *array;
127: PetscMPIInt nn,np;
130: BVGetRandomContext(extop->nep->V,&rand);
131: VecSetRandom(v,rand);
132: if (extop->szd) {
133: MPI_Comm_size(PetscObjectComm((PetscObject)v),&np);
134: BVGetSizes(extop->nep->V,&n,NULL,NULL);
135: VecGetLocalSize(v,&next);
136: VecGetArray(v,&array);
137: for (i=n+extop->n;i<next;i++) array[i] = 0.0;
138: for (i=n;i<n+extop->n;i++) array[i] /= PetscSqrtReal(np);
139: PetscMPIIntCast(extop->n,&nn);
140: MPI_Bcast(array+n,nn,MPIU_SCALAR,0,PETSC_COMM_WORLD);
141: VecRestoreArray(v,&array);
142: }
143: return(0);
144: }
146: static PetscErrorCode NEPDeflationEvaluateBasisMat(NEP_EXT_OP extop,PetscInt idx,PetscBool hat,PetscScalar *bval,PetscScalar *Hj,PetscScalar *Hjprev)
147: {
149: PetscInt i,k,n=extop->n,ldhj=extop->szd,ldh=extop->szd+1;
150: PetscScalar sone=1.0,zero=0.0;
151: PetscBLASInt ldh_,ldhj_,n_;
154: i = (idx<0)?extop->szd*extop->szd*(-idx):extop->szd*extop->szd;
155: PetscMemzero(Hj,i*sizeof(PetscScalar));
156: PetscBLASIntCast(ldhj+1,&ldh_);
157: PetscBLASIntCast(ldhj,&ldhj_);
158: PetscBLASIntCast(n,&n_);
159: if (idx<1) {
160: if (!hat) for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 1.0;
161: else for (i=0;i<extop->n;i++) Hj[i+i*ldhj] = 0.0;
162: } else {
163: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[idx-1];
164: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hjprev,&ldhj_,&zero,Hj,&ldhj_));
165: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[idx-1];
166: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[idx-1];
167: }
168: if (idx<0) {
169: idx = -idx;
170: for (k=1;k<idx;k++) {
171: for (i=0;i<n;i++) extop->H[i*ldh+i] -= extop->bc[k-1];
172: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->H,&ldh_,Hj+(k-1)*ldhj*ldhj,&ldhj_,&zero,Hj+k*ldhj*ldhj,&ldhj_));
173: for (i=0;i<n;i++) extop->H[i*ldh+i] += extop->bc[k-1];
174: if (hat) for (i=0;i<n;i++) Hj[i*(ldhj+1)] += bval[k-1];
175: }
176: }
177: return(0);
178: }
180: PetscErrorCode NEPDeflationLocking(NEP_EXT_OP extop,Vec u,PetscScalar lambda)
181: {
183: Vec uu;
184: PetscInt ld,i;
185: PetscReal norm;
186: PetscMPIInt np;
189: BVGetColumn(extop->X,extop->n,&uu);
190: ld = extop->szd+1;
191: NEPDeflationCopyToExtendedVec(extop,uu,extop->H+extop->n*ld,u,PETSC_TRUE);
192: BVRestoreColumn(extop->X,extop->n,&uu);
193: BVNormColumn(extop->X,extop->n,NORM_2,&norm);
194: BVScaleColumn(extop->X,extop->n,1.0/norm);
195: MPI_Comm_size(PetscObjectComm((PetscObject)u),&np);
196: for (i=0;i<extop->n;i++) extop->H[extop->n*ld+i] *= PetscSqrtReal(np)/norm;
197: extop->H[extop->n*(ld+1)] = lambda;
198: extop->n++;
199: BVSetActiveColumns(extop->X,0,extop->n);
200: if (extop->n <= extop->szd) {
201: /* update XpX */
202: BVDotColumn(extop->X,extop->n-1,extop->XpX+(extop->n-1)*extop->szd);
203: extop->XpX[(extop->n-1)*(1+extop->szd)] = 1.0;
204: for (i=0;i<extop->n-1;i++) extop->XpX[i*extop->szd+extop->n-1] = PetscConj(extop->XpX[(extop->n-1)*extop->szd+i]);
205: /* determine minimality index */
206: extop->midx = PetscMin(extop->max_midx,extop->n);
207: /* polynominal basis coeficients */
208: for (i=0;i<extop->midx;i++) extop->bc[i] = extop->nep->target;
209: /* evaluate the polynomial basis in H */
210: NEPDeflationEvaluateBasisMat(extop,-extop->midx,PETSC_FALSE,NULL,extop->Hj,NULL);
211: }
212: return(0);
213: }
215: static PetscErrorCode NEPDeflationEvaluateHatFunction(NEP_EXT_OP extop, PetscInt idx,PetscScalar lambda,PetscScalar *y,PetscScalar *hfj,PetscScalar *hfjp,PetscInt ld)
216: {
218: PetscInt i,j,k,off,ini,fin,sz,ldh,n=extop->n;
219: Mat A,B;
220: PetscScalar *array;
223: if (idx<0) {ini = 0; fin = extop->nep->nt;}
224: else {ini = idx; fin = idx+1;}
225: if (y) sz = hfjp?n+2:n+1;
226: else sz = hfjp?3*n:2*n;
227: ldh = extop->szd+1;
228: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&A);
229: MatCreateSeqDense(PETSC_COMM_SELF,sz,sz,NULL,&B);
230: MatDenseGetArray(A,&array);
231: for (j=0;j<n;j++)
232: for (i=0;i<n;i++) array[j*sz+i] = extop->H[j*ldh+i];
233: MatDenseRestoreArray(A,&array);
234: if (y) {
235: MatDenseGetArray(A,&array);
236: array[extop->n*(sz+1)] = lambda;
237: if (hfjp) { array[(n+1)*sz+n] = 1.0; array[(n+1)*sz+n+1] = lambda;}
238: for (i=0;i<n;i++) array[n*sz+i] = y[i];
239: MatDenseRestoreArray(A,&array);
240: for (j=ini;j<fin;j++) {
241: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
242: MatDenseGetArray(B,&array);
243: for (i=0;i<n;i++) hfj[j*ld+i] = array[n*sz+i];
244: if (hfjp) for (i=0;i<n;i++) hfjp[j*ld+i] = array[(n+1)*sz+i];
245: MatDenseRestoreArray(B,&array);
246: }
247: } else {
248: off = idx<0?ld*n:0;
249: MatDenseGetArray(A,&array);
250: for (k=0;k<n;k++) {
251: array[(n+k)*sz+k] = 1.0;
252: array[(n+k)*sz+n+k] = lambda;
253: }
254: if (hfjp) for (k=0;k<n;k++) {
255: array[(2*n+k)*sz+n+k] = 1.0;
256: array[(2*n+k)*sz+2*n+k] = lambda;
257: }
258: MatDenseRestoreArray(A,&array);
259: for (j=ini;j<fin;j++) {
260: FNEvaluateFunctionMat(extop->nep->f[j],A,B);
261: MatDenseGetArray(B,&array);
262: for (i=0;i<n;i++) for (k=0;k<n;k++) hfj[j*off+i*ld+k] = array[n*sz+i*sz+k];
263: if (hfjp) for (k=0;k<n;k++) for (i=0;i<n;i++) hfjp[j*off+i*ld+k] = array[2*n*sz+i*sz+k];
264: MatDenseRestoreArray(B,&array);
265: }
266: }
267: MatDestroy(&A);
268: MatDestroy(&B);
269: return(0);
270: }
272: static PetscErrorCode NEPDeflationMatShell_MatMult(Mat M,Vec x,Vec y)
273: {
274: NEP_DEF_MATSHELL *matctx;
275: PetscErrorCode ierr;
276: NEP_EXT_OP extop;
277: Vec x1,y1;
278: PetscScalar *yy,sone=1.0,zero=0.0;
279: const PetscScalar *xx;
280: PetscInt nloc,i;
281: PetscMPIInt np;
282: PetscBLASInt n_,one=1,szd_;
285: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
286: MatShellGetContext(M,(void**)&matctx);
287: extop = matctx->extop;
288: if (extop->szd) {
289: x1 = matctx->w[0]; y1 = matctx->w[1];
290: VecGetArrayRead(x,&xx);
291: VecPlaceArray(x1,xx);
292: VecGetArray(y,&yy);
293: VecPlaceArray(y1,yy);
294: MatMult(matctx->T,x1,y1);
295: if (extop->n) {
296: VecGetLocalSize(x1,&nloc);
297: /* copy for avoiding warning of constant array xx */
298: for (i=0;i<extop->n;i++) matctx->work[i] = xx[nloc+i]*PetscSqrtReal(np);
299: BVMultVec(matctx->U,1.0,1.0,y1,matctx->work);
300: BVDotVec(extop->X,x1,matctx->work);
301: PetscBLASIntCast(extop->n,&n_);
302: PetscBLASIntCast(extop->szd,&szd_);
303: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->A,&szd_,matctx->work,&one,&zero,yy+nloc,&one));
304: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,matctx->B,&szd_,xx+nloc,&one,&sone,yy+nloc,&one));
305: for (i=0;i<extop->n;i++) yy[nloc+i] /= PetscSqrtReal(np);
306: }
307: VecResetArray(x1);
308: VecRestoreArrayRead(x,&xx);
309: VecResetArray(y1);
310: VecRestoreArray(y,&yy);
311: } else {
312: MatMult(matctx->T,x,y);
313: }
314: return(0);
315: }
317: static PetscErrorCode NEPDeflationMatShell_CreateVecs(Mat M,Vec *right,Vec *left)
318: {
319: PetscErrorCode ierr;
320: NEP_DEF_MATSHELL *matctx;
323: MatShellGetContext(M,(void**)&matctx);
324: if (right) {
325: VecDuplicate(matctx->w[0],right);
326: }
327: if (left) {
328: VecDuplicate(matctx->w[0],left);
329: }
330: return(0);
331: }
333: static PetscErrorCode NEPDeflationMatShell_Destroy(Mat M)
334: {
335: PetscErrorCode ierr;
336: NEP_DEF_MATSHELL *matctx;
339: MatShellGetContext(M,(void**)&matctx);
340: if (matctx->extop->szd) {
341: BVDestroy(&matctx->U);
342: PetscFree2(matctx->hfj,matctx->work);
343: PetscFree2(matctx->A,matctx->B);
344: VecDestroy(&matctx->w[0]);
345: VecDestroy(&matctx->w[1]);
346: }
347: MatDestroy(&matctx->T);
348: PetscFree(matctx);
349: return(0);
350: }
352: static PetscErrorCode NEPDeflationEvaluateBasis(NEP_EXT_OP extop,PetscScalar lambda,PetscInt n,PetscScalar *val,PetscBool jacobian)
353: {
354: PetscScalar p;
355: PetscInt i;
358: if (!jacobian) {
359: val[0] = 1.0;
360: for (i=1;i<extop->n;i++) val[i] = val[i-1]*(lambda-extop->bc[i-1]);
361: } else {
362: val[0] = 0.0;
363: p = 1.0;
364: for (i=1;i<extop->n;i++) {
365: val[i] = val[i-1]*(lambda-extop->bc[i-1])+p;
366: p *= (lambda-extop->bc[i-1]);
367: }
368: }
369: return(0);
370: }
372: static PetscErrorCode NEPDeflationComputeShellMat(NEP_EXT_OP extop,PetscScalar lambda,PetscBool jacobian,Mat *M)
373: {
374: PetscErrorCode ierr;
375: NEP_DEF_MATSHELL *matctx,*matctxC;
376: PetscInt nloc,mloc,n=extop->n,j,i,szd=extop->szd,ldh=szd+1,k;
377: Mat F;
378: Mat Mshell,Mcomp;
379: PetscBool ini=PETSC_FALSE;
380: PetscScalar *hf,*hfj,*hfjp,sone=1.0,*hH,*hHprev,*pts,*B,*A,*Hj=extop->Hj,*basisv,zero=0.0;
381: PetscBLASInt n_,info,szd_;
384: if (!M) {
385: Mshell = jacobian?extop->MJ:extop->MF;
386: } else Mshell = *M;
387: Mcomp = jacobian?extop->MF:extop->MJ;
388: if (!Mshell) {
389: ini = PETSC_TRUE;
390: PetscNew(&matctx);
391: MatGetLocalSize(extop->nep->function,&mloc,&nloc);
392: nloc += szd; mloc += szd;
393: MatCreateShell(PetscObjectComm((PetscObject)extop->nep),nloc,mloc,PETSC_DETERMINE,PETSC_DETERMINE,matctx,&Mshell);
394: MatShellSetOperation(Mshell,MATOP_MULT,(void(*)(void))NEPDeflationMatShell_MatMult);
395: MatShellSetOperation(Mshell,MATOP_CREATE_VECS,(void(*)(void))NEPDeflationMatShell_CreateVecs);
396: MatShellSetOperation(Mshell,MATOP_DESTROY,(void(*)(void))NEPDeflationMatShell_Destroy);
397: matctx->nep = extop->nep;
398: matctx->extop = extop;
399: if (!M) {
400: if (jacobian) { matctx->jacob = PETSC_TRUE; matctx->T = extop->nep->jacobian; extop->MJ = Mshell; }
401: else { matctx->jacob = PETSC_FALSE; matctx->T = extop->nep->function; extop->MF = Mshell; }
402: PetscObjectReference((PetscObject)matctx->T);
403: } else {
404: matctx->jacob = jacobian;
405: MatDuplicate(jacobian?extop->nep->jacobian:extop->nep->function,MAT_DO_NOT_COPY_VALUES, &matctx->T);
406: *M = Mshell;
407: }
408: if (szd) {
409: BVCreateVec(extop->nep->V,matctx->w);
410: VecDuplicate(matctx->w[0],matctx->w+1);
411: BVDuplicateResize(extop->nep->V,szd,&matctx->U);
412: PetscMalloc2(extop->simpU?2*(szd)*(szd):2*(szd)*(szd)*extop->nep->nt,&matctx->hfj,szd,&matctx->work);
413: PetscMalloc2(szd*szd,&matctx->A,szd*szd,&matctx->B);
414: }
415: } else {
416: MatShellGetContext(Mshell,(void**)&matctx);
417: }
418: if (ini || matctx->theta != lambda || matctx->n != extop->n) {
419: if (ini || matctx->theta != lambda) {
420: if (jacobian) {
421: NEPComputeJacobian(extop->nep,lambda,matctx->T);
422: } else {
423: NEPComputeFunction(extop->nep,lambda,matctx->T,matctx->T);
424: }
425: }
426: if (n) {
427: matctx->hfjset = PETSC_FALSE;
428: if (!extop->simpU) {
429: /* likely hfjp has been already computed */
430: if (Mcomp) {
431: MatShellGetContext(Mcomp,(void**)&matctxC);
432: if (matctxC->hfjset && matctxC->theta == lambda && matctxC->n == extop->n) {
433: PetscMemcpy(matctx->hfj,matctxC->hfj,2*extop->szd*extop->szd*extop->nep->nt*sizeof(PetscScalar));
434: matctx->hfjset = PETSC_TRUE;
435: }
436: }
437: hfj = matctx->hfj; hfjp = matctx->hfj+extop->szd*extop->szd*extop->nep->nt;
438: if (!matctx->hfjset) {
439: NEPDeflationEvaluateHatFunction(extop,-1,lambda,NULL,hfj,hfjp,n);
440: matctx->hfjset = PETSC_TRUE;
441: }
442: BVSetActiveColumns(matctx->U,0,n);
443: hf = jacobian?hfjp:hfj;
444: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hf,&F);
445: BVMatMult(extop->X,extop->nep->A[0],matctx->U);
446: BVMultInPlace(matctx->U,F,0,n);
447: BVSetActiveColumns(extop->W,0,extop->n);
448: for (j=1;j<extop->nep->nt;j++) {
449: BVMatMult(extop->X,extop->nep->A[j],extop->W);
450: MatDensePlaceArray(F,hf+j*n*n);
451: BVMult(matctx->U,1.0,1.0,extop->W,F);
452: MatDenseResetArray(F);
453: }
454: MatDestroy(&F);
455: } else {
456: hfj = matctx->hfj;
457: BVSetActiveColumns(matctx->U,0,n);
458: BVMatMult(extop->X,matctx->T,matctx->U);
459: for (j=0;j<n;j++) {
460: for (i=0;i<n;i++) hfj[j*n+i] = -extop->H[j*ldh+i];
461: hfj[j*(n+1)] += lambda;
462: }
463: PetscBLASIntCast(n,&n_);
464: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,hfj,&n_,&info));
465: SlepcCheckLapackInfo("trtri",info);
466: MatCreateSeqDense(PETSC_COMM_SELF,n,n,hfj,&F);
467: BVMultInPlace(matctx->U,F,0,n);
468: if (jacobian) {
469: NEPDeflationComputeFunction(extop,lambda,NULL);
470: MatShellGetContext(extop->MF,(void**)&matctxC);
471: BVMult(matctx->U,-1.0,1.0,matctxC->U,F);
472: }
473: MatDestroy(&F);
474: }
475: PetscCalloc3(n,&basisv,szd*szd,&hH,szd*szd,&hHprev);
476: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,jacobian);
477: A = matctx->A;
478: PetscMemzero(A,szd*szd*sizeof(PetscScalar));
479: if (!jacobian) for (i=0;i<n;i++) A[i*(szd+1)] = 1.0;
480: for (j=0;j<n;j++)
481: for (i=0;i<n;i++)
482: for (k=1;k<extop->midx;k++) A[j*szd+i] += basisv[k]*PetscConj(Hj[k*szd*szd+i*szd+j]);
483: PetscBLASIntCast(n,&n_);
484: PetscBLASIntCast(szd,&szd_);
485: B = matctx->B;
486: PetscMemzero(B,szd*szd*sizeof(PetscScalar));
487: for (i=1;i<extop->midx;i++) {
488: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
489: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
490: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,B,&szd_));
491: pts = hHprev; hHprev = hH; hH = pts;
492: }
493: PetscFree3(basisv,hH,hHprev);
494: }
495: matctx->theta = lambda;
496: matctx->n = extop->n;
497: }
498: return(0);
499: }
501: PetscErrorCode NEPDeflationComputeFunction(NEP_EXT_OP extop,PetscScalar lambda,Mat *F)
502: {
506: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,NULL);
507: if (F) *F = extop->MF;
508: return(0);
509: }
511: PetscErrorCode NEPDeflationComputeJacobian(NEP_EXT_OP extop,PetscScalar lambda,Mat *J)
512: {
516: NEPDeflationComputeShellMat(extop,lambda,PETSC_TRUE,NULL);
517: if (J) *J = extop->MJ;
518: return(0);
519: }
521: PetscErrorCode NEPDeflationSolveSetUp(NEP_EXT_OP extop,PetscScalar lambda)
522: {
523: PetscErrorCode ierr;
524: NEP_DEF_MATSHELL *matctx;
525: NEP_DEF_FUN_SOLVE solve;
526: PetscInt i,j,n=extop->n;
527: Vec u,tu;
528: Mat F;
529: PetscScalar snone=-1.0,sone=1.0;
530: PetscBLASInt n_,szd_,ldh_,*p,info;
531: Mat Mshell;
534: solve = extop->solve;
535: if (lambda!=solve->theta || n!=solve->n) {
536: NEPDeflationComputeShellMat(extop,lambda,PETSC_FALSE,solve->sincf?NULL:&solve->T);
537: Mshell = (solve->sincf)?extop->MF:solve->T;
538: MatShellGetContext(Mshell,(void**)&matctx);
539: KSPSetOperators(solve->ksp,matctx->T,matctx->T);
540: if (n) {
541: PetscBLASIntCast(n,&n_);
542: PetscBLASIntCast(extop->szd,&szd_);
543: PetscBLASIntCast(extop->szd+1,&ldh_);
544: if (!extop->simpU) {
545: BVSetActiveColumns(solve->T_1U,0,n);
546: for (i=0;i<n;i++) {
547: BVGetColumn(matctx->U,i,&u);
548: BVGetColumn(solve->T_1U,i,&tu);
549: KSPSolve(solve->ksp,u,tu);
550: BVRestoreColumn(solve->T_1U,i,&tu);
551: BVRestoreColumn(matctx->U,i,&u);
552: }
553: MatCreateSeqDense(PETSC_COMM_SELF,n,n,solve->work,&F);
554: BVDot(solve->T_1U,extop->X,F);
555: MatDestroy(&F);
556: } else {
557: for (j=0;j<n;j++)
558: for (i=0;i<n;i++) solve->work[j*n+i] = extop->XpX[j*extop->szd+i];
559: for (i=0;i<n;i++) extop->H[i*ldh_+i] -= lambda;
560: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n_,&n_,&snone,extop->H,&ldh_,solve->work,&n_));
561: for (i=0;i<n;i++) extop->H[i*ldh_+i] += lambda;
562: }
563: PetscMemcpy(solve->M,matctx->B,extop->szd*extop->szd*sizeof(PetscScalar));
564: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&snone,matctx->A,&szd_,solve->work,&n_,&sone,solve->M,&szd_));
565: PetscMalloc1(n,&p);
566: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,solve->M,&szd_,p,&info));
567: SlepcCheckLapackInfo("getrf",info);
568: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n_,solve->M,&szd_,p,solve->work,&n_,&info));
569: SlepcCheckLapackInfo("getri",info);
570: PetscFree(p);
571: }
572: solve->theta = lambda;
573: solve->n = n;
574: }
575: return(0);
576: }
578: PetscErrorCode NEPDeflationFunctionSolve(NEP_EXT_OP extop,Vec b,Vec x)
579: {
580: PetscErrorCode ierr;
581: Vec b1,x1;
582: PetscScalar *xx,*bb,*x2,*b2,*w,*w2,snone=-1.0,sone=1.0,zero=0.0;
583: NEP_DEF_MATSHELL *matctx;
584: NEP_DEF_FUN_SOLVE solve=extop->solve;
585: PetscBLASInt one=1,szd_,n_,ldh_;
586: PetscInt nloc,i;
587: PetscMPIInt np,count;
590: if (extop->szd) {
591: x1 = solve->w[0]; b1 = solve->w[1];
592: VecGetArray(x,&xx);
593: VecPlaceArray(x1,xx);
594: VecGetArray(b,&bb);
595: VecPlaceArray(b1,bb);
596: } else {
597: b1 = b; x1 = x;
598: }
599: KSPSolve(extop->solve->ksp,b1,x1);
600: if (extop->n) {
601: PetscBLASIntCast(extop->szd,&szd_);
602: PetscBLASIntCast(extop->n,&n_);
603: PetscBLASIntCast(extop->szd+1,&ldh_);
604: BVGetSizes(extop->nep->V,&nloc,NULL,NULL);
605: PetscMalloc2(extop->n,&b2,extop->n,&x2);
606: MPI_Comm_size(PetscObjectComm((PetscObject)b),&np);
607: for (i=0;i<extop->n;i++) b2[i] = bb[nloc+i]*PetscSqrtReal(np);
608: w = solve->work; w2 = solve->work+extop->n;
609: MatShellGetContext(solve->sincf?extop->MF:solve->T,(void**)&matctx);
610: PetscMemcpy(w2,b2,extop->n*sizeof(PetscScalar));
611: BVDotVec(extop->X,x1,w);
612: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&snone,matctx->A,&szd_,w,&one,&sone,w2,&one));
613: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n_,&n_,&sone,solve->M,&szd_,w2,&one,&zero,x2,&one));
614: if (extop->simpU) {
615: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] -= solve->theta;
616: for (i=0;i<extop->n;i++) w[i] = x2[i];
617: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&one,&snone,extop->H,&ldh_,w,&n_));
618: for (i=0;i<extop->n;i++) extop->H[i+i*(extop->szd+1)] += solve->theta;
619: BVMultVec(extop->X,-1.0,1.0,x1,w);
620: } else {
621: BVMultVec(solve->T_1U,-1.0,1.0,x1,x2);
622: }
623: for (i=0;i<extop->n;i++) xx[i+nloc] = x2[i]/PetscSqrtReal(np);
624: PetscMPIIntCast(extop->n,&count);
625: MPI_Bcast(xx+nloc,count,MPIU_SCALAR,np-1,PetscObjectComm((PetscObject)b));
626: }
627: if (extop->szd) {
628: VecResetArray(x1);
629: VecRestoreArray(x,&xx);
630: VecResetArray(b1);
631: VecRestoreArray(b,&bb);
632: if (extop->n) { PetscFree2(b2,x2);}
633: }
634: return(0);
635: }
637: PetscErrorCode NEPDeflationReset(NEP_EXT_OP extop)
638: {
639: PetscErrorCode ierr;
640: PetscInt j;
641: NEP_DEF_FUN_SOLVE solve;
644: if (!extop) return(0);
645: PetscFree(extop->H);
646: BVDestroy(&extop->X);
647: if (extop->szd) {
648: PetscFree3(extop->Hj,extop->XpX,extop->bc);
649: BVDestroy(&extop->W);
650: }
651: MatDestroy(&extop->MF);
652: MatDestroy(&extop->MJ);
653: if (extop->solve) {
654: solve = extop->solve;
655: if (extop->szd) {
656: if (!extop->simpU) {BVDestroy(&solve->T_1U);}
657: PetscFree2(solve->M,solve->work);
658: VecDestroy(&solve->w[0]);
659: VecDestroy(&solve->w[1]);
660: }
661: if (!solve->sincf) {
662: MatDestroy(&solve->T);
663: }
664: PetscFree(extop->solve);
665: }
666: if (extop->proj) {
667: if (extop->szd) {
668: for (j=0;j<extop->nep->nt;j++) {MatDestroy(&extop->proj->V1pApX[j]);}
669: MatDestroy(&extop->proj->XpV1);
670: PetscFree3(extop->proj->V2,extop->proj->V1pApX,extop->proj->work);
671: VecDestroy(&extop->proj->w);
672: BVDestroy(&extop->proj->V1);
673: }
674: PetscFree(extop->proj);
675: }
676: PetscFree(extop);
677: return(0);
678: }
680: PetscErrorCode NEPDeflationInitialize(NEP nep,BV X,KSP ksp,PetscBool sincfun,PetscInt sz,NEP_EXT_OP *extop)
681: {
682: PetscErrorCode ierr;
683: NEP_EXT_OP op;
684: NEP_DEF_FUN_SOLVE solve;
685: PetscInt szd;
688: NEPDeflationReset(*extop);
689: PetscNew(&op);
690: *extop = op;
691: op->nep = nep;
692: op->n = 0;
693: op->szd = szd = sz-1;
694: op->max_midx = PetscMin(MAX_MINIDX,szd);
695: op->X = X;
696: if (!X) { BVDuplicateResize(nep->V,sz,&op->X); }
697: else { PetscObjectReference((PetscObject)X); }
698: PetscCalloc1(sz*sz,&(op)->H);
699: if (op->szd) {
700: op->simpU = PETSC_FALSE;
701: if (nep->fui==NEP_USER_INTERFACE_SPLIT) {
702: PetscOptionsGetBool(NULL,NULL,"-nep_deflation_simpleu",&op->simpU,NULL);
703: } else {
704: op->simpU = PETSC_TRUE;
705: }
706: PetscCalloc3(szd*szd*op->max_midx,&(op)->Hj,szd*szd,&(op)->XpX,szd,&op->bc);
707: BVDuplicateResize(op->X,op->szd,&op->W);
708: }
709: if (ksp) {
710: PetscNew(&solve);
711: op->solve = solve;
712: solve->ksp = ksp;
713: solve->sincf = sincfun;
714: solve->n = -1;
715: if (op->szd) {
716: if (!op->simpU) {
717: BVDuplicateResize(nep->V,szd,&solve->T_1U);
718: }
719: PetscMalloc2(szd*szd,&solve->M,2*szd*szd,&solve->work);
720: BVCreateVec(nep->V,&solve->w[0]);
721: VecDuplicate(solve->w[0],&solve->w[1]);
722: }
723: }
724: return(0);
725: }
727: PetscErrorCode NEPDeflationDSNEPComputeMatrix(DS ds,PetscScalar lambda,PetscBool deriv,DSMatType mat,void *ctx)
728: {
729: PetscScalar *T,*E,*w1,*w2,*w=NULL,*ww,*hH,*hHprev,*pts;
730: PetscScalar alpha,alpha2,*AB,sone=1.0,zero=0.0,*basisv,s;
731: PetscInt i,ldds,nwork=0,szd,nv,j,k,n;
732: PetscBLASInt inc=1,nv_,ldds_,dim_,dim2,szdk,szd_,n_,ldh_;
733: PetscMPIInt np;
734: NEP_DEF_PROJECT proj=(NEP_DEF_PROJECT)ctx;
735: NEP_EXT_OP extop=proj->extop;
736: NEP nep=extop->nep;
737: PetscErrorCode ierr;
740: DSGetDimensions(ds,&nv,NULL,NULL,NULL,NULL);
741: DSGetLeadingDimension(ds,&ldds);
742: DSGetArray(ds,mat,&T);
743: PetscMemzero(T,ldds*nv*sizeof(PetscScalar));
744: PetscBLASIntCast(ldds*nv,&dim2);
745: /* mat = V1^*T(lambda)V1 */
746: for (i=0;i<nep->nt;i++) {
747: if (deriv) {
748: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
749: } else {
750: FNEvaluateFunction(nep->f[i],lambda,&alpha);
751: }
752: DSGetArray(ds,DSMatExtra[i],&E);
753: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&dim2,&alpha,E,&inc,T,&inc));
754: DSRestoreArray(ds,DSMatExtra[i],&E);
755: }
756: if (extop->n) {
757: n = extop->n;
758: szd = extop->szd;
759: PetscMemzero(proj->work,proj->lwork*sizeof(PetscScalar));
760: PetscBLASIntCast(nv,&nv_);
761: PetscBLASIntCast(n,&n_);
762: PetscBLASIntCast(ldds,&ldds_);
763: PetscBLASIntCast(szd,&szd_);
764: PetscBLASIntCast(proj->dim,&dim_);
765: PetscBLASIntCast(extop->szd+1,&ldh_);
766: w1 = proj->work; w2 = proj->work+proj->dim*proj->dim;
767: nwork += 2*proj->dim*proj->dim;
769: /* mat = mat + V1^*U(lambda)V2 */
770: for (i=0;i<nep->nt;i++) {
771: MatDenseGetArray(proj->V1pApX[i],&E);
772: if (extop->simpU) {
773: if (deriv) {
774: FNEvaluateDerivative(nep->f[i],lambda,&alpha);
775: } else {
776: FNEvaluateFunction(nep->f[i],lambda,&alpha);
777: }
778: ww = w1; w = w2;
779: PetscMemcpy(ww,proj->V2,szd*nv*sizeof(PetscScalar));
780: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
781: for (j=0;j<szd*nv;j++) ww[j] *= PetscSqrtReal(np);
782: for (j=0;j<n;j++) extop->H[j*ldh_+j] -= lambda;
783: alpha = -alpha;
784: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha,extop->H,&ldh_,ww,&szd_));
785: if (deriv) {
786: PetscBLASIntCast(szd*nv,&szdk);
787: FNEvaluateFunction(nep->f[i],lambda,&alpha2);
788: PetscMemcpy(w,proj->V2,szd*nv*sizeof(PetscScalar));
789: for (j=0;j<szd*nv;j++) w[j] *= PetscSqrtReal(np);
790: alpha2 = -alpha2;
791: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
792: alpha2 = 1.0;
793: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("L","U","N","N",&n_,&nv_,&alpha2,extop->H,&ldh_,w,&szd_));
794: PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&szdk,&sone,w,&inc,ww,&inc));
795: }
796: for (j=0;j<n;j++) extop->H[j*ldh_+j] += lambda;
797: } else {
798: NEPDeflationEvaluateHatFunction(extop,i,lambda,NULL,w1,w2,szd);
799: w = deriv?w2:w1; ww = deriv?w1:w2;
800: MPI_Comm_size(PetscObjectComm((PetscObject)ds),&np);
801: s = PetscSqrtReal(np);
802: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&s,w,&szd_,proj->V2,&szd_,&zero,ww,&szd_));
803: }
804: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&nv_,&nv_,&n_,&sone,E,&dim_,ww,&szd_,&sone,T,&ldds_));
805: MatDenseRestoreArray(proj->V1pApX[i],&E);
806: }
808: /* mat = mat + V2^*A(lambda)V1 */
809: basisv = proj->work+nwork; nwork += szd;
810: hH = proj->work+nwork; nwork += szd*szd;
811: hHprev = proj->work+nwork; nwork += szd*szd;
812: AB = proj->work+nwork; nwork += szd*szd;
813: NEPDeflationEvaluateBasis(extop,lambda,n,basisv,deriv);
814: if (!deriv) for (i=0;i<n;i++) AB[i*(szd+1)] = 1.0;
815: for (j=0;j<n;j++)
816: for (i=0;i<n;i++)
817: for (k=1;k<extop->midx;k++) AB[j*szd+i] += basisv[k]*PetscConj(extop->Hj[k*szd*szd+i*szd+j]);
818: MatDenseGetArray(proj->XpV1,&E);
819: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,E,&szd_,&zero,w,&szd_));
820: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
821: MatDenseRestoreArray(proj->XpV1,&E);
823: /* mat = mat + V2^*B(lambda)V2 */
824: PetscMemzero(AB,szd*szd*sizeof(PetscScalar));
825: for (i=1;i<extop->midx;i++) {
826: NEPDeflationEvaluateBasisMat(extop,i,PETSC_TRUE,basisv,hH,hHprev);
827: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,extop->XpX,&szd_,hH,&szd_,&zero,hHprev,&szd_));
828: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,extop->Hj+szd*szd*i,&szd_,hHprev,&szd_,&sone,AB,&szd_));
829: pts = hHprev; hHprev = hH; hH = pts;
830: }
831: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&sone,AB,&szd_,proj->V2,&szd_,&zero,w,&szd_));
832: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&nv_,&nv_,&n_,&sone,proj->V2,&szd_,w,&szd_,&sone,T,&ldds_));
833: }
834: DSRestoreArray(ds,mat,&T);
835: return(0);
836: }
838: PetscErrorCode NEPDeflationProjectOperator(NEP_EXT_OP extop,BV Vext,DS ds,PetscInt j0,PetscInt j1)
839: {
840: PetscErrorCode ierr;
841: PetscInt k,j,n=extop->n,dim;
842: Vec v,ve;
843: BV V1;
844: Mat G;
845: NEP nep=extop->nep;
846: NEP_DEF_PROJECT proj;
849: NEPCheckSplit(extop->nep,1);
850: proj = extop->proj;
851: if (!proj) {
852: /* Initialize the projection data structure */
853: PetscNew(&proj);
854: extop->proj = proj;
855: proj->extop = extop;
856: BVGetSizes(Vext,NULL,NULL,&dim);
857: proj->dim = dim;
858: if (extop->szd) {
859: proj->lwork = 3*dim*dim+2*extop->szd*extop->szd+extop->szd;
860: PetscMalloc3(dim*extop->szd,&proj->V2,nep->nt,&proj->V1pApX,proj->lwork,&proj->work);
861: for (j=0;j<nep->nt;j++) {
862: MatCreateSeqDense(PETSC_COMM_SELF,proj->dim,extop->szd,NULL,&proj->V1pApX[j]);
863: }
864: MatCreateSeqDense(PETSC_COMM_SELF,extop->szd,proj->dim,NULL,&proj->XpV1);
865: BVCreateVec(extop->X,&proj->w);
866: BVDuplicateResize(extop->X,proj->dim,&proj->V1);
867: }
868: DSNEPSetComputeMatrixFunction(ds,NEPDeflationDSNEPComputeMatrix,(void*)proj);
869: }
871: /* Split Vext in V1 and V2 */
872: if (extop->szd) {
873: for (j=j0;j<j1;j++) {
874: BVGetColumn(Vext,j,&ve);
875: BVGetColumn(proj->V1,j,&v);
876: NEPDeflationCopyToExtendedVec(extop,v,proj->V2+j*extop->szd,ve,PETSC_TRUE);
877: BVRestoreColumn(proj->V1,j,&v);
878: BVRestoreColumn(Vext,j,&ve);
879: }
880: V1 = proj->V1;
881: } else V1 = Vext;
883: /* Compute matrices V1^* A_i V1 */
884: BVSetActiveColumns(V1,j0,j1);
885: for (k=0;k<nep->nt;k++) {
886: DSGetMat(ds,DSMatExtra[k],&G);
887: BVMatProject(V1,nep->A[k],V1,G);
888: DSRestoreMat(ds,DSMatExtra[k],&G);
889: }
891: if (extop->n) {
892: if (extop->szd) {
893: /* Compute matrices V1^* A_i X and V1^* X */
894: BVSetActiveColumns(extop->W,0,n);
895: for (k=0;k<nep->nt;k++) {
896: BVMatMult(extop->X,nep->A[k],extop->W);
897: BVDot(extop->W,V1,proj->V1pApX[k]);
898: }
899: BVDot(V1,extop->X,proj->XpV1);
900: }
901: }
902: return(0);
903: }