Actual source code: nrefine.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Newton refinement for polynomial eigenproblems.
13: References:
15: [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
16: of invariant pairs for matrix polynomials", Linear Algebra Appl.
17: 435(3):514-536, 2011.
19: [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
20: polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
21: 730-745, 2016.
22: */
24: #include <slepc/private/pepimpl.h>
25: #include <slepcblaslapack.h>
27: typedef struct {
28: Mat *A,M1;
29: BV V,M2,M3,W;
30: PetscInt k,nmat;
31: PetscScalar *fih,*work,*M4;
32: PetscBLASInt *pM4;
33: PetscBool compM1;
34: Vec t;
35: } FSubctx;
37: typedef struct {
38: Mat E[2],M1;
39: Vec tN,ttN,t1,vseq;
40: VecScatter scatterctx;
41: PetscBool compM1;
42: PetscInt *map0,*map1,*idxg,*idxp;
43: PetscSubcomm subc;
44: VecScatter scatter_sub;
45: VecScatter *scatter_id,*scatterp_id;
46: Mat *A;
47: BV V,W,M2,M3,Wt;
48: PetscScalar *M4,*w,*wt,*d,*dt;
49: Vec t,tg,Rv,Vi,tp,tpg;
50: PetscInt idx,*cols;
51: } MatExplicitCtx;
53: static PetscErrorCode MatFSMult(Mat M ,Vec x,Vec y)
54: {
55: #if defined(PETSC_MISSING_LAPACK_GETRS)
57: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable");
58: #else
60: FSubctx *ctx;
61: PetscInt k,i;
62: PetscScalar *c;
63: PetscBLASInt k_,one=1,info;
66: MatShellGetContext(M,&ctx);
67: VecCopy(x,ctx->t);
68: k = ctx->k;
69: c = ctx->work;
70: PetscBLASIntCast(k,&k_);
71: MatMult(ctx->M1,x,y);
72: VecConjugate(ctx->t);
73: BVDotVec(ctx->M3,ctx->t,c);
74: for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
75: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
76: SlepcCheckLapackInfo("getrs",info);
77: BVMultVec(ctx->M2,-1.0,1.0,y,c);
78: return(0);
79: #endif
80: }
82: /*
83: Evaluates the first d elements of the polynomial basis
84: on a given matrix H which is considered to be triangular
85: */
86: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
87: {
89: PetscInt i,j,ldfh=nm*k,off,nmat=pep->nmat;
90: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
91: PetscScalar corr=0.0,alpha,beta;
92: PetscBLASInt k_,ldh_,ldfh_;
95: PetscBLASIntCast(ldh,&ldh_);
96: PetscBLASIntCast(k,&k_);
97: PetscBLASIntCast(ldfh,&ldfh_);
98: PetscMemzero(fH,nm*k*k*sizeof(PetscScalar));
99: if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
100: if (nm>1) {
101: t = b[0]/a[0];
102: off = k;
103: for (j=0;j<k;j++) {
104: for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
105: fH[j+j*ldfh] -= t;
106: }
107: }
108: for (i=2;i<nm;i++) {
109: off = i*k;
110: if (i==2) {
111: for (j=0;j<k;j++) {
112: fH[off+j+j*ldfh] = 1.0;
113: H[j+j*ldh] -= b[1];
114: }
115: } else {
116: for (j=0;j<k;j++) {
117: PetscMemcpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k*sizeof(PetscScalar));
118: H[j+j*ldh] += corr-b[i-1];
119: }
120: }
121: corr = b[i-1];
122: beta = -g[i-1]/a[i-1];
123: alpha = 1/a[i-1];
124: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
125: }
126: for (j=0;j<k;j++) H[j+j*ldh] += corr;
127: return(0);
128: }
130: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,FSubctx *ctx)
131: {
132: #if defined(PETSC_MISSING_LAPACK_GESV)
134: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV - Lapack routine is unavailable");
135: #else
136: PetscErrorCode ierr;
137: PetscScalar *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
138: const PetscScalar *m3,*m2;
139: PetscInt i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
140: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
141: PetscBLASInt k_,lda_,lds_,nloc_,one=1,info;
142: Mat *A=ctx->A,Mk,M1=ctx->M1,P;
143: BV V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
144: MatStructure str;
145: Vec vc;
148: STGetMatStructure(pep->st,&str);
149: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
150: DHii = T12;
151: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
152: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
153: for (d=2;d<nmat;d++) {
154: for (j=0;j<k;j++) {
155: for (i=0;i<k;i++) {
156: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
157: }
158: }
159: }
160: /* T11 */
161: if (!ctx->compM1) {
162: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
163: PEPEvaluateBasis(pep,h,0,Ts,NULL);
164: for (j=1;j<nmat;j++) {
165: MatAXPY(M1,Ts[j],A[j],str);
166: }
167: }
169: /* T22 */
170: PetscBLASIntCast(lds,&lds_);
171: PetscBLASIntCast(k,&k_);
172: PetscBLASIntCast(lda,&lda_);
173: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
174: for (i=1;i<deg;i++) {
175: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
176: s = (i==1)?0.0:1.0;
177: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
178: }
179: for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }
181: /* T12 */
182: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
183: for (i=1;i<nmat;i++) {
184: MatDenseGetArray(Mk,&array);
185: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
186: MatDenseRestoreArray(Mk,&array);
187: BVSetActiveColumns(W,0,k);
188: BVMult(W,1.0,0.0,V,Mk);
189: if (i==1) {
190: BVMatMult(W,A[i],M2);
191: } else {
192: BVMatMult(W,A[i],M3); /* using M3 as work space */
193: BVMult(M2,1.0,1.0,M3,NULL);
194: }
195: }
197: /* T21 */
198: MatDenseGetArray(Mk,&array);
199: for (i=1;i<deg;i++) {
200: s = (i==1)?0.0:1.0;
201: ss = PetscConj(fh[i]);
202: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
203: }
204: MatDenseRestoreArray(Mk,&array);
205: BVSetActiveColumns(M3,0,k);
206: BVMult(M3,1.0,0.0,V,Mk);
207: for (i=0;i<k;i++) {
208: BVGetColumn(M3,i,&vc);
209: VecConjugate(vc);
210: BVRestoreColumn(M3,i,&vc);
211: }
212: MatDestroy(&Mk);
213: PetscFree3(T12,Tr,Ts);
215: VecGetLocalSize(ctx->t,&nloc);
216: PetscBLASIntCast(nloc,&nloc_);
217: PetscMalloc1(nloc*k,&T);
218: KSPGetOperators(pep->refineksp,NULL,&P);
219: if (!ctx->compM1) { MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN); }
220: BVGetArrayRead(ctx->M2,&m2);
221: BVGetArrayRead(ctx->M3,&m3);
222: VecGetArray(ctx->t,&v);
223: for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
224: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
225: SlepcCheckLapackInfo("gesv",info);
226: for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
227: VecRestoreArray(ctx->t,&v);
228: BVRestoreArrayRead(ctx->M2,&m2);
229: BVRestoreArrayRead(ctx->M3,&m3);
230: MatDiagonalSet(P,ctx->t,ADD_VALUES);
231: PetscFree(T);
232: KSPSetUp(pep->refineksp);
233: return(0);
234: #endif
235: }
237: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
238: {
239: #if defined(PETSC_MISSING_LAPACK_GETRS)
241: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable");
242: #else
244: PetscScalar *t0;
245: PetscBLASInt k_,one=1,info,lda_;
246: PetscInt i,lda=nmat*k;
247: Mat M;
248: FSubctx *ctx;
251: KSPGetOperators(ksp,&M,NULL);
252: MatShellGetContext(M,&ctx);
253: PetscCalloc1(k,&t0);
254: PetscBLASIntCast(lda,&lda_);
255: PetscBLASIntCast(k,&k_);
256: for (i=0;i<k;i++) t0[i] = Rh[i];
257: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
258: SlepcCheckLapackInfo("getrs",info);
259: BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
260: KSPSolve(ksp,Rv,dVi);
261: VecConjugate(dVi);
262: BVDotVec(ctx->M3,dVi,dHi);
263: VecConjugate(dVi);
264: for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
265: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
266: SlepcCheckLapackInfo("getrs",info);
267: PetscFree(t0);
268: return(0);
269: #endif
270: }
272: /*
273: Computes the residual P(H,V*S)*e_j for the polynomial
274: */
275: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
276: {
278: PetscScalar *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
279: PetscReal *a=pcf,*b=pcf+nmat,*g=b+nmat;
280: PetscInt i,ii,jj,lda;
281: PetscBLASInt lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
282: Mat M0;
283: Vec w;
286: PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
287: lda = k*nmat;
288: PetscBLASIntCast(k,&k_);
289: PetscBLASIntCast(lds,&lds_);
290: PetscBLASIntCast(lda,&lda_);
291: PetscBLASIntCast(nmat,&nmat_);
292: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
293: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
294: BVSetActiveColumns(W,0,nmat);
295: BVMult(W,1.0,0.0,V,M0);
296: MatDestroy(&M0);
298: BVGetColumn(W,0,&w);
299: MatMult(A[0],w,Rv);
300: BVRestoreColumn(W,0,&w);
301: for (i=1;i<nmat;i++) {
302: BVGetColumn(W,i,&w);
303: MatMult(A[i],w,t);
304: BVRestoreColumn(W,i,&w);
305: VecAXPY(Rv,1.0,t);
306: }
307: /* Update right-hand side */
308: if (j) {
309: PetscBLASIntCast(ldh,&ldh_);
310: PetscMemzero(Z,k*k*sizeof(PetscScalar));
311: PetscMemzero(DS0,k*k*sizeof(PetscScalar));
312: PetscMemcpy(Z+(j-1)*k,dH+(j-1)*k,k*sizeof(PetscScalar));
313: /* Update DfH */
314: for (i=1;i<nmat;i++) {
315: if (i>1) {
316: beta = -g[i-1];
317: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
318: tt += -b[i-1];
319: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
320: tt = b[i-1];
321: beta = 1.0/a[i-1];
322: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
323: F = DS0; DS0 = DS1; DS1 = F;
324: } else {
325: PetscMemzero(DS1,k*k*sizeof(PetscScalar));
326: for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
327: }
328: for (jj=j;jj<k;jj++) {
329: for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
330: }
331: }
332: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
333: /* Update right-hand side */
334: PetscBLASIntCast(2*k,&k2_);
335: PetscBLASIntCast(j,&j_);
336: PetscBLASIntCast(k+rds,&krds_);
337: c0 = DS0;
338: PetscMemzero(Rh,k*sizeof(PetscScalar));
339: for (i=0;i<nmat;i++) {
340: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
341: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
342: BVMultVec(V,1.0,0.0,t,h);
343: BVSetActiveColumns(dV,0,rds);
344: BVMultVec(dV,1.0,1.0,t,h+k);
345: BVGetColumn(W,i,&w);
346: MatMult(A[i],t,w);
347: BVRestoreColumn(W,i,&w);
348: if (i>0 && i<nmat-1) {
349: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
350: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
351: }
352: }
354: for (i=0;i<nmat;i++) h[i] = -1.0;
355: BVMultVec(W,1.0,1.0,Rv,h);
356: }
357: PetscFree4(h,DS0,DS1,Z);
358: return(0);
359: }
361: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
362: {
364: PetscInt i,j,incf,incc;
365: PetscScalar *y,*g,*xx2,*ww,y2,*dd;
366: Vec v,t,xx1;
367: BV WW,T;
370: PetscMalloc3(sz,&y,sz,&g,k,&xx2);
371: if (trans) {
372: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
373: } else {
374: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
375: }
376: xx1 = vw;
377: VecCopy(x1,xx1);
378: PetscMemcpy(xx2,x2,sz*sizeof(PetscScalar));
379: PetscMemzero(sol2,k*sizeof(PetscScalar));
380: for (i=sz-1;i>=0;i--) {
381: BVGetColumn(WW,i,&v);
382: VecConjugate(v);
383: VecDot(xx1,v,y+i);
384: VecConjugate(v);
385: BVRestoreColumn(WW,i,&v);
386: for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
387: y[i] = -(y[i]-xx2[i])/dd[i];
388: BVGetColumn(T,i,&t);
389: VecAXPY(xx1,-y[i],t);
390: BVRestoreColumn(T,i,&t);
391: for(j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
392: g[i] = xx2[i];
393: }
394: if (trans) {
395: KSPSolveTranspose(ksp,xx1,sol1);
396: } else {
397: KSPSolve(ksp,xx1,sol1);
398: }
399: if (trans) {
400: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
401: } else {
402: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
403: }
404: for (i=0;i<sz;i++) {
405: BVGetColumn(T,i,&t);
406: VecConjugate(t);
407: VecDot(sol1,t,&y2);
408: VecConjugate(t);
409: BVRestoreColumn(T,i,&t);
410: for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
411: y2 = (g[i]-y2)/dd[i];
412: BVGetColumn(WW,i,&v);
413: VecAXPY(sol1,-y2,v);
414: for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
415: sol2[i] = y[i]+y2;
416: BVRestoreColumn(WW,i,&v);
417: }
418: PetscFree3(y,g,xx2);
419: return(0);
420: }
422: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx)
423: {
425: PetscInt i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
426: Mat M1=matctx->M1,*A,*At,Mk;
427: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
428: PetscScalar s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
429: PetscScalar *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
430: PetscBLASInt lds_,lda_,k_;
431: MatStructure str;
432: PetscBool flg;
433: BV M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
434: Vec vc,vc2;
437: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
438: STGetMatStructure(pep->st,&str);
439: STGetTransform(pep->st,&flg);
440: if (flg) {
441: PetscMalloc1(pep->nmat,&At);
442: for (i=0;i<pep->nmat;i++) {
443: STGetMatrixTransformed(pep->st,i,&At[i]);
444: }
445: } else At = pep->A;
446: if (matctx->subc) A = matctx->A;
447: else A = At;
448: /* Form the explicit system matrix */
449: DHii = T12;
450: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
451: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
452: for (l=2;l<nmat;l++) {
453: for (j=0;j<k;j++) {
454: for (i=0;i<k;i++) {
455: DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
456: }
457: }
458: }
460: /* T11 */
461: if (!matctx->compM1) {
462: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
463: PEPEvaluateBasis(pep,h,0,Ts,NULL);
464: for (j=1;j<nmat;j++) {
465: MatAXPY(M1,Ts[j],A[j],str);
466: }
467: }
469: /* T22 */
470: PetscBLASIntCast(lds,&lds_);
471: PetscBLASIntCast(k,&k_);
472: PetscBLASIntCast(lda,&lda_);
473: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
474: for (i=1;i<deg;i++) {
475: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
476: s = (i==1)?0.0:1.0;
477: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
478: }
480: /* T12 */
481: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
482: for (i=1;i<nmat;i++) {
483: MatDenseGetArray(Mk,&array);
484: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
485: MatDenseRestoreArray(Mk,&array);
486: BVSetActiveColumns(W,0,k);
487: BVMult(W,1.0,0.0,V,Mk);
488: if (i==1) {
489: BVMatMult(W,A[i],M2);
490: } else {
491: BVMatMult(W,A[i],M3); /* using M3 as work space */
492: BVMult(M2,1.0,1.0,M3,NULL);
493: }
494: }
496: /* T21 */
497: MatDenseGetArray(Mk,&array);
498: for (i=1;i<deg;i++) {
499: s = (i==1)?0.0:1.0;
500: ss = PetscConj(fh[i]);
501: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
502: }
503: MatDenseRestoreArray(Mk,&array);
504: BVSetActiveColumns(M3,0,k);
505: BVMult(M3,1.0,0.0,V,Mk);
506: for (i=0;i<k;i++) {
507: BVGetColumn(M3,i,&vc);
508: VecConjugate(vc);
509: BVRestoreColumn(M3,i,&vc);
510: }
512: KSPSetOperators(ksp,M1,M1);
513: KSPSetUp(ksp);
514: MatDestroy(&Mk);
516: /* Set up for BEMW */
517: for (i=0;i<k;i++) {
518: BVGetColumn(M2,i,&vc);
519: BVGetColumn(W,i,&vc2);
520: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
521: BVRestoreColumn(M2,i,&vc);
522: BVGetColumn(M3,i,&vc);
523: VecConjugate(vc);
524: VecDot(vc2,vc,&d[i]);
525: VecConjugate(vc);
526: BVRestoreColumn(M3,i,&vc);
527: for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
528: d[i] = M4[i+i*k]-d[i];
529: BVRestoreColumn(W,i,&vc2);
531: BVGetColumn(M3,i,&vc);
532: BVGetColumn(Wt,i,&vc2);
533: for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
534: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
535: BVRestoreColumn(M3,i,&vc);
536: BVGetColumn(M2,i,&vc);
537: VecConjugate(vc2);
538: VecDot(vc,vc2,&dt[i]);
539: VecConjugate(vc2);
540: BVRestoreColumn(M2,i,&vc);
541: for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
542: dt[i] = M4[i+i*k]-dt[i];
543: BVRestoreColumn(Wt,i,&vc2);
544: }
546: if (flg) {
547: PetscFree(At);
548: }
549: PetscFree3(T12,Tr,Ts);
550: return(0);
551: }
553: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx,BV W)
554: {
555: PetscErrorCode ierr;
556: PetscInt i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
557: PetscInt *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
558: Mat M,*E=matctx->E,*A,*At,Mk,Md;
559: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
560: PetscScalar s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
561: PetscBLASInt lds_,lda_,k_;
562: const PetscInt *idxmc;
563: const PetscScalar *valsc,*carray;
564: MatStructure str;
565: Vec vc,vc0;
566: PetscBool flg;
569: PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
570: STGetMatStructure(pep->st,&str);
571: KSPGetOperators(ksp,&M,NULL);
572: MatGetOwnershipRange(E[1],&n1,&m1);
573: MatGetOwnershipRange(E[0],&n0,&m0);
574: MatGetOwnershipRange(M,&n,NULL);
575: PetscMalloc1(nmat,&ts);
576: STGetTransform(pep->st,&flg);
577: if (flg) {
578: PetscMalloc1(pep->nmat,&At);
579: for (i=0;i<pep->nmat;i++) {
580: STGetMatrixTransformed(pep->st,i,&At[i]);
581: }
582: } else At = pep->A;
583: if (matctx->subc) A = matctx->A;
584: else A = At;
585: /* Form the explicit system matrix */
586: DHii = T12;
587: PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
588: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
589: for (d=2;d<nmat;d++) {
590: for (j=0;j<k;j++) {
591: for (i=0;i<k;i++) {
592: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
593: }
594: }
595: }
597: /* T11 */
598: if (!matctx->compM1) {
599: MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
600: PEPEvaluateBasis(pep,h,0,Ts,NULL);
601: for (j=1;j<nmat;j++) {
602: MatAXPY(E[0],Ts[j],A[j],str);
603: }
604: }
605: for (i=n0;i<m0;i++) {
606: MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
607: idx = n+i-n0;
608: for (j=0;j<ncols;j++) {
609: idxg[j] = matctx->map0[idxmc[j]];
610: }
611: MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
612: MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
613: }
615: /* T22 */
616: PetscBLASIntCast(lds,&lds_);
617: PetscBLASIntCast(k,&k_);
618: PetscBLASIntCast(lda,&lda_);
619: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
620: for (i=1;i<deg;i++) {
621: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
622: s = (i==1)?0.0:1.0;
623: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
624: }
625: for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
626: for (i=0;i<m1-n1;i++) {
627: idx = n+m0-n0+i;
628: for (j=0;j<k;j++) {
629: Tr[j] = T22[n1+i+j*k];
630: }
631: MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
632: }
634: /* T21 */
635: for (i=1;i<deg;i++) {
636: s = (i==1)?0.0:1.0;
637: ss = PetscConj(fh[i]);
638: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
639: }
640: BVSetActiveColumns(W,0,k);
641: MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
642: BVMult(W,1.0,0.0,V,Mk);
643: for (i=0;i<k;i++) {
644: BVGetColumn(W,i,&vc);
645: VecConjugate(vc);
646: VecGetArrayRead(vc,&carray);
647: idx = matctx->map1[i];
648: MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
649: VecRestoreArrayRead(vc,&carray);
650: BVRestoreColumn(W,i,&vc);
651: }
653: /* T12 */
654: for (i=1;i<nmat;i++) {
655: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
656: for (j=0;j<k;j++) {
657: PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
658: }
659: }
660: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
661: for (i=0;i<nmat;i++) ts[i] = 1.0;
662: for (j=0;j<k;j++) {
663: MatDenseGetArray(Md,&array);
664: PetscMemcpy(array,T12+k+j*lda,(nmat-1)*k*sizeof(PetscScalar));
665: MatDenseRestoreArray(Md,&array);
666: BVSetActiveColumns(W,0,nmat-1);
667: BVMult(W,1.0,0.0,V,Md);
668: for (i=nmat-1;i>0;i--) {
669: BVGetColumn(W,i-1,&vc0);
670: BVGetColumn(W,i,&vc);
671: MatMult(A[i],vc0,vc);
672: BVRestoreColumn(W,i-1,&vc0);
673: BVRestoreColumn(W,i,&vc);
674: }
675: BVSetActiveColumns(W,1,nmat);
676: BVGetColumn(W,0,&vc0);
677: BVMultVec(W,1.0,0.0,vc0,ts);
678: VecGetArrayRead(vc0,&carray);
679: idx = matctx->map1[j];
680: MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
681: VecRestoreArrayRead(vc0,&carray);
682: BVRestoreColumn(W,0,&vc0);
683: }
684: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
685: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
686: KSPSetOperators(ksp,M,M);
687: KSPSetUp(ksp);
688: PetscFree(ts);
689: MatDestroy(&Mk);
690: MatDestroy(&Md);
691: if (flg) {
692: PetscFree(At);
693: }
694: PetscFree5(T22,T21,T12,Tr,Ts);
695: return(0);
696: }
698: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx)
699: {
700: PetscErrorCode ierr;
701: PetscInt n0,m0,n1,m1,i;
702: PetscScalar *arrayV;
703: const PetscScalar *array;
706: MatGetOwnershipRange(matctx->E[1],&n1,&m1);
707: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
709: /* Right side */
710: VecGetArrayRead(Rv,&array);
711: VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
712: VecRestoreArrayRead(Rv,&array);
713: VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
714: VecAssemblyBegin(matctx->tN);
715: VecAssemblyEnd(matctx->tN);
717: /* Solve */
718: KSPSolve(ksp,matctx->tN,matctx->ttN);
720: /* Retrieve solution */
721: VecGetArray(dVi,&arrayV);
722: VecGetArrayRead(matctx->ttN,&array);
723: PetscMemcpy(arrayV,array,(m0-n0)*sizeof(PetscScalar));
724: VecRestoreArray(dVi,&arrayV);
725: if (!matctx->subc) {
726: VecGetArray(matctx->t1,&arrayV);
727: for (i=0;i<m1-n1;i++) arrayV[i] = array[m0-n0+i];
728: VecRestoreArray(matctx->t1,&arrayV);
729: VecRestoreArrayRead(matctx->ttN,&array);
730: VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
731: VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
732: VecGetArrayRead(matctx->vseq,&array);
733: for (i=0;i<k;i++) dHi[i] = array[i];
734: VecRestoreArrayRead(matctx->vseq,&array);
735: }
736: return(0);
737: }
739: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx,BV W)
740: {
741: PetscErrorCode ierr;
742: PetscInt j,m,lda=pep->nmat*k,n0,m0,idx;
743: PetscMPIInt root,len;
744: PetscScalar *array2,h;
745: const PetscScalar *array;
746: Vec R,Vi;
747: FSubctx *ctx;
748: Mat M;
751: if (!matctx || !matctx->subc) {
752: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
753: h = H[i+i*ldh];
754: idx = i;
755: R = Rv;
756: Vi = dVi;
757: switch (pep->scheme) {
758: case PEP_REFINE_SCHEME_EXPLICIT:
759: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
760: matctx->compM1 = PETSC_FALSE;
761: break;
762: case PEP_REFINE_SCHEME_MBE:
763: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
764: matctx->compM1 = PETSC_FALSE;
765: break;
766: case PEP_REFINE_SCHEME_SCHUR:
767: KSPGetOperators(ksp,&M,NULL);
768: MatShellGetContext(M,&ctx);
769: NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
770: ctx->compM1 = PETSC_FALSE;
771: break;
772: }
773: } else {
774: if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
775: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
776: h = H[idx+idx*ldh];
777: matctx->idx = idx;
778: switch (pep->scheme) {
779: case PEP_REFINE_SCHEME_EXPLICIT:
780: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
781: matctx->compM1 = PETSC_FALSE;
782: break;
783: case PEP_REFINE_SCHEME_MBE:
784: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
785: matctx->compM1 = PETSC_FALSE;
786: break;
787: case PEP_REFINE_SCHEME_SCHUR:
788: break;
789: }
790: } else idx = matctx->idx;
791: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
792: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
793: VecGetArrayRead(matctx->tg,&array);
794: VecPlaceArray(matctx->t,array);
795: VecCopy(matctx->t,matctx->Rv);
796: VecResetArray(matctx->t);
797: VecRestoreArrayRead(matctx->tg,&array);
798: R = matctx->Rv;
799: Vi = matctx->Vi;
800: }
801: if (idx==i && idx<k) {
802: switch (pep->scheme) {
803: case PEP_REFINE_SCHEME_EXPLICIT:
804: NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
805: break;
806: case PEP_REFINE_SCHEME_MBE:
807: NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
808: break;
809: case PEP_REFINE_SCHEME_SCHUR:
810: NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
811: break;
812: }
813: }
814: if (matctx && matctx->subc) {
815: VecGetLocalSize(Vi,&m);
816: VecGetArrayRead(Vi,&array);
817: VecGetArray(matctx->tg,&array2);
818: PetscMemcpy(array2,array,m*sizeof(PetscScalar));
819: VecRestoreArray(matctx->tg,&array2);
820: VecRestoreArrayRead(Vi,&array);
821: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
822: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
823: switch (pep->scheme) {
824: case PEP_REFINE_SCHEME_EXPLICIT:
825: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
826: VecGetArrayRead(matctx->ttN,&array);
827: VecPlaceArray(matctx->tp,array+m0-n0);
828: VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
829: VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
830: VecResetArray(matctx->tp);
831: VecRestoreArrayRead(matctx->ttN,&array);
832: VecGetArrayRead(matctx->tpg,&array);
833: for (j=0;j<k;j++) dHi[j] = array[j];
834: VecRestoreArrayRead(matctx->tpg,&array);
835: break;
836: case PEP_REFINE_SCHEME_MBE:
837: root = 0;
838: for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
839: PetscMPIIntCast(k,&len);
840: MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
841: break;
842: case PEP_REFINE_SCHEME_SCHUR:
843: break;
844: }
845: }
846: return(0);
847: }
849: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,MatExplicitCtx *matctx)
850: {
852: PetscInt i,nmat=pep->nmat,lda=nmat*k;
853: PetscScalar *fh,*Rh,*DfH;
854: PetscReal norm;
855: BV W;
856: Vec Rv,t,dvi;
857: FSubctx *ctx;
858: Mat M,*At;
859: PetscBool flg,lindep;
862: PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
863: *rds = 0;
864: BVCreateVec(pep->V,&Rv);
865: switch (pep->scheme) {
866: case PEP_REFINE_SCHEME_EXPLICIT:
867: BVCreateVec(pep->V,&t);
868: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
869: PetscMalloc1(nmat,&fh);
870: break;
871: case PEP_REFINE_SCHEME_MBE:
872: if (matctx->subc) {
873: BVCreateVec(pep->V,&t);
874: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
875: } else {
876: W = matctx->W;
877: PetscObjectReference((PetscObject)W);
878: t = matctx->t;
879: PetscObjectReference((PetscObject)t);
880: }
881: BVScale(matctx->W,0.0);
882: BVScale(matctx->Wt,0.0);
883: BVScale(matctx->M2,0.0);
884: BVScale(matctx->M3,0.0);
885: PetscMalloc1(nmat,&fh);
886: break;
887: case PEP_REFINE_SCHEME_SCHUR:
888: KSPGetOperators(ksp,&M,NULL);
889: MatShellGetContext(M,&ctx);
890: BVCreateVec(pep->V,&t);
891: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
892: fh = ctx->fih;
893: break;
894: }
895: PetscMemzero(dVS,2*k*k*sizeof(PetscScalar));
896: PetscMemzero(DfH,lda*k*sizeof(PetscScalar));
897: STGetTransform(pep->st,&flg);
898: if (flg) {
899: PetscMalloc1(pep->nmat,&At);
900: for (i=0;i<pep->nmat;i++) {
901: STGetMatrixTransformed(pep->st,i,&At[i]);
902: }
903: } else At = pep->A;
905: /* Main loop for computing the ith columns of dX and dS */
906: for (i=0;i<k;i++) {
907: /* Compute and update i-th column of the right hand side */
908: PetscMemzero(Rh,k*sizeof(PetscScalar));
909: NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);
911: /* Update and solve system */
912: BVGetColumn(dV,i,&dvi);
913: NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
914: /* Orthogonalize computed solution */
915: BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
916: BVRestoreColumn(dV,i,&dvi);
917: if (!lindep) {
918: BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
919: if (!lindep) {
920: dVS[k+i+i*2*k] = norm;
921: BVScaleColumn(dV,i,1.0/norm);
922: (*rds)++;
923: }
924: }
925: }
926: BVSetActiveColumns(dV,0,*rds);
927: VecDestroy(&t);
928: VecDestroy(&Rv);
929: BVDestroy(&W);
930: if (flg) {
931: PetscFree(At);
932: }
933: PetscFree2(DfH,Rh);
934: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) { PetscFree(fh); }
935: return(0);
936: }
938: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
939: {
940: #if defined(PETSC_MISSING_LAPACK_GEQRF)
942: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable");
943: #else
945: PetscInt j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
946: PetscScalar *G,*tau,sone=1.0,zero=0.0,*work;
947: PetscBLASInt lds_,k_,ldh_,info,ldg_,lda_;
950: PetscMalloc3(k,&tau,k,&work,deg*k*k,&G);
951: PetscBLASIntCast(lds,&lds_);
952: PetscBLASIntCast(lda,&lda_);
953: PetscBLASIntCast(k,&k_);
955: /* Form auxiliary matrix for the orthogonalization step */
956: ldg = deg*k;
957: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
958: PetscBLASIntCast(ldg,&ldg_);
959: PetscBLASIntCast(ldh,&ldh_);
960: for (j=0;j<deg;j++) {
961: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
962: }
963: /* Orthogonalize and update S */
964: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
965: SlepcCheckLapackInfo("geqrf",info);
966: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));
968: /* Update H */
969: PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
970: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
971: PetscFree3(tau,work,G);
972: return(0);
973: #endif
974: }
976: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
977: {
978: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
980: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routine is unavailable");
981: #else
983: PetscInt i,j,nmat=pep->nmat,lda=nmat*k;
984: PetscScalar *tau,*array,*work;
985: PetscBLASInt lds_,k_,lda_,ldh_,kdrs_,info,k2_;
986: Mat M0;
989: PetscMalloc2(k,&tau,k,&work);
990: PetscBLASIntCast(lds,&lds_);
991: PetscBLASIntCast(lda,&lda_);
992: PetscBLASIntCast(ldh,&ldh_);
993: PetscBLASIntCast(k,&k_);
994: PetscBLASIntCast(2*k,&k2_);
995: PetscBLASIntCast((k+rds),&kdrs_);
996: /* Update H */
997: for (j=0;j<k;j++) {
998: for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
999: }
1000: /* Update V */
1001: for (j=0;j<k;j++) {
1002: for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
1003: for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
1004: }
1005: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
1006: SlepcCheckLapackInfo("geqrf",info);
1007: /* Copy triangular matrix in S */
1008: for (j=0;j<k;j++) {
1009: for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
1010: for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
1011: }
1012: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
1013: SlepcCheckLapackInfo("orgqr",info);
1014: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
1015: MatDenseGetArray(M0,&array);
1016: for (j=0;j<k;j++) {
1017: PetscMemcpy(array+j*k,dVS+j*2*k,k*sizeof(PetscScalar));
1018: }
1019: MatDenseRestoreArray(M0,&array);
1020: BVMultInPlace(pep->V,M0,0,k);
1021: if (rds) {
1022: MatDenseGetArray(M0,&array);
1023: for (j=0;j<k;j++) {
1024: PetscMemcpy(array+j*k,dVS+k+j*2*k,rds*sizeof(PetscScalar));
1025: }
1026: MatDenseRestoreArray(M0,&array);
1027: BVMultInPlace(dV,M0,0,k);
1028: BVMult(pep->V,1.0,1.0,dV,NULL);
1029: }
1030: MatDestroy(&M0);
1031: NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1032: PetscFree2(tau,work);
1033: return(0);
1034: #endif
1035: }
1037: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,MatExplicitCtx *matctx,PetscBool ini)
1038: {
1039: PetscErrorCode ierr;
1040: FSubctx *ctx;
1041: PetscScalar t,*coef;
1042: const PetscScalar *array;
1043: MatStructure str;
1044: PetscInt j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
1045: MPI_Comm comm;
1046: PetscMPIInt np;
1047: const PetscInt *rgs0,*rgs1;
1048: Mat B,C,*E,*A,*At;
1049: IS is1,is2;
1050: Vec v;
1051: PetscBool flg;
1052: Mat M,P;
1055: PetscMalloc1(nmat,&coef);
1056: STGetTransform(pep->st,&flg);
1057: if (flg) {
1058: PetscMalloc1(pep->nmat,&At);
1059: for (i=0;i<pep->nmat;i++) {
1060: STGetMatrixTransformed(pep->st,i,&At[i]);
1061: }
1062: } else At = pep->A;
1063: switch (pep->scheme) {
1064: case PEP_REFINE_SCHEME_EXPLICIT:
1065: if (ini) {
1066: if (matctx->subc) {
1067: A = matctx->A;
1068: comm = PetscSubcommChild(matctx->subc);
1069: } else {
1070: A = At;
1071: PetscObjectGetComm((PetscObject)pep,&comm);
1072: }
1073: E = matctx->E;
1074: STGetMatStructure(pep->st,&str);
1075: MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1076: j = (matctx->subc)?matctx->subc->color:0;
1077: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1078: for (j=1;j<nmat;j++) {
1079: MatAXPY(E[0],coef[j],A[j],str);
1080: }
1081: MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1082: MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
1083: MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
1084: MatGetOwnershipRange(E[0],&n0,&m0);
1085: MatGetOwnershipRange(E[1],&n1,&m1);
1086: MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1087: MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1088: /* T12 and T21 are computed from V and V*, so,
1089: they must have the same column and row ranges */
1090: if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
1091: MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1092: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1093: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1094: MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1095: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1096: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1097: MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1098: MatDestroy(&B);
1099: MatDestroy(&C);
1100: matctx->compM1 = PETSC_TRUE;
1101: MatGetSize(E[0],NULL,&N0);
1102: MatGetSize(E[1],NULL,&N1);
1103: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1104: MatGetOwnershipRanges(E[0],&rgs0);
1105: MatGetOwnershipRanges(E[1],&rgs1);
1106: PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1107: /* Create column (and row) mapping */
1108: for (p=0;p<np;p++) {
1109: for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1110: for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1111: }
1112: MatCreateVecs(M,NULL,&matctx->tN);
1113: MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1114: VecDuplicate(matctx->tN,&matctx->ttN);
1115: if (matctx->subc) {
1116: MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1117: count = np*k;
1118: PetscMalloc2(count,&idx1,count,&idx2);
1119: VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1120: VecGetOwnershipRange(matctx->tp,&l0,NULL);
1121: VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1122: for (si=0;si<matctx->subc->n;si++) {
1123: if (matctx->subc->color==si) {
1124: j=0;
1125: if (matctx->subc->color==si) {
1126: for (p=0;p<np;p++) {
1127: for (i=n1;i<m1;i++) {
1128: idx1[j] = l0+i-n1;
1129: idx2[j++] =p*k+i;
1130: }
1131: }
1132: }
1133: count = np*(m1-n1);
1134: } else count =0;
1135: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1136: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1137: VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1138: ISDestroy(&is1);
1139: ISDestroy(&is2);
1140: }
1141: PetscFree2(idx1,idx2);
1142: } else {
1143: VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1144: }
1145: P = M;
1146: } else {
1147: if (matctx->subc) {
1148: /* Scatter vectors pep->V */
1149: for (i=0;i<k;i++) {
1150: BVGetColumn(pep->V,i,&v);
1151: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1152: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1153: BVRestoreColumn(pep->V,i,&v);
1154: VecGetArrayRead(matctx->tg,&array);
1155: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1156: BVInsertVec(matctx->V,i,matctx->t);
1157: VecResetArray(matctx->t);
1158: VecRestoreArrayRead(matctx->tg,&array);
1159: }
1160: }
1161: }
1162: break;
1163: case PEP_REFINE_SCHEME_MBE:
1164: if (ini) {
1165: if (matctx->subc) {
1166: A = matctx->A;
1167: comm = PetscSubcommChild(matctx->subc);
1168: } else {
1169: matctx->V = pep->V;
1170: A = At;
1171: PetscObjectGetComm((PetscObject)pep,&comm);
1172: MatCreateVecs(pep->A[0],&matctx->t,NULL);
1173: }
1174: STGetMatStructure(pep->st,&str);
1175: MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1176: j = (matctx->subc)?matctx->subc->color:0;
1177: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1178: for (j=1;j<nmat;j++) {
1179: MatAXPY(matctx->M1,coef[j],A[j],str);
1180: }
1181: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1182: BVDuplicateResize(matctx->V,k,&matctx->M2);
1183: BVDuplicate(matctx->M2,&matctx->M3);
1184: BVDuplicate(matctx->M2,&matctx->Wt);
1185: PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1186: matctx->compM1 = PETSC_TRUE;
1187: M = matctx->M1;
1188: P = M;
1189: }
1190: break;
1191: case PEP_REFINE_SCHEME_SCHUR:
1192: if (ini) {
1193: PetscObjectGetComm((PetscObject)pep,&comm);
1194: MatGetSize(At[0],&m0,&n0);
1195: PetscMalloc1(1,&ctx);
1196: STGetMatStructure(pep->st,&str);
1197: /* Create a shell matrix to solve the linear system */
1198: ctx->V = pep->V;
1199: ctx->k = k; ctx->nmat = nmat;
1200: PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1201: for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1202: PetscMemzero(ctx->M4,k*k*sizeof(PetscScalar));
1203: MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1204: MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatFSMult);
1205: BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1206: BVDuplicateResize(ctx->V,k,&ctx->M2);
1207: BVDuplicate(ctx->M2,&ctx->M3);
1208: BVCreateVec(pep->V,&ctx->t);
1209: MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1210: PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1211: for (j=1;j<nmat;j++) {
1212: MatAXPY(ctx->M1,coef[j],At[j],str);
1213: }
1214: MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1215: /* Compute a precond matrix for the system */
1216: t = H[0];
1217: PEPEvaluateBasis(pep,t,0,coef,NULL);
1218: for (j=1;j<nmat;j++) {
1219: MatAXPY(P,coef[j],At[j],str);
1220: }
1221: ctx->compM1 = PETSC_TRUE;
1222: }
1223: break;
1224: }
1225: if (ini) {
1226: PEPRefineGetKSP(pep,&pep->refineksp);
1227: KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE);
1228: KSPSetOperators(pep->refineksp,M,P);
1229: KSPSetFromOptions(pep->refineksp);
1230: }
1232: if (!ini && matctx && matctx->subc) {
1233: /* Scatter vectors pep->V */
1234: for (i=0;i<k;i++) {
1235: BVGetColumn(pep->V,i,&v);
1236: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1237: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1238: BVRestoreColumn(pep->V,i,&v);
1239: VecGetArrayRead(matctx->tg,&array);
1240: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1241: BVInsertVec(matctx->V,i,matctx->t);
1242: VecResetArray(matctx->t);
1243: VecRestoreArrayRead(matctx->tg,&array);
1244: }
1245: }
1246: PetscFree(coef);
1247: if (flg) {
1248: PetscFree(At);
1249: }
1250: return(0);
1251: }
1253: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,MatExplicitCtx *matctx,PetscInt nsubc)
1254: {
1255: PetscErrorCode ierr;
1256: PetscInt i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1257: IS is1,is2;
1258: BVType type;
1259: Vec v;
1260: const PetscScalar *array;
1261: Mat *A;
1262: PetscBool flg;
1265: STGetTransform(pep->st,&flg);
1266: if (flg) {
1267: PetscMalloc1(pep->nmat,&A);
1268: for (i=0;i<pep->nmat;i++) {
1269: STGetMatrixTransformed(pep->st,i,&A[i]);
1270: }
1271: } else A = pep->A;
1273: /* Duplicate pep matrices */
1274: PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1275: for (i=0;i<pep->nmat;i++) {
1276: MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1277: }
1279: /* Create Scatter */
1280: MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1281: MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1282: VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1283: BVGetColumn(pep->V,0,&v);
1284: VecGetOwnershipRange(v,&n0,&m0);
1285: nloc0 = m0-n0;
1286: PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1287: j = 0;
1288: for (si=0;si<matctx->subc->n;si++) {
1289: for (i=n0;i<m0;i++) {
1290: idx1[j] = i;
1291: idx2[j++] = i+pep->n*si;
1292: }
1293: }
1294: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1295: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1296: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1297: ISDestroy(&is1);
1298: ISDestroy(&is2);
1299: for (si=0;si<matctx->subc->n;si++) {
1300: j=0;
1301: for (i=n0;i<m0;i++) {
1302: idx1[j] = i;
1303: idx2[j++] = i+pep->n*si;
1304: }
1305: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1306: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1307: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1308: ISDestroy(&is1);
1309: ISDestroy(&is2);
1310: }
1311: BVRestoreColumn(pep->V,0,&v);
1312: PetscFree2(idx1,idx2);
1314: /* Duplicate pep->V vecs */
1315: BVGetType(pep->V,&type);
1316: BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1317: BVSetType(matctx->V,type);
1318: BVSetSizesFromVec(matctx->V,matctx->t,k);
1319: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1320: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1321: }
1322: for (i=0;i<k;i++) {
1323: BVGetColumn(pep->V,i,&v);
1324: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1325: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1326: BVRestoreColumn(pep->V,i,&v);
1327: VecGetArrayRead(matctx->tg,&array);
1328: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1329: BVInsertVec(matctx->V,i,matctx->t);
1330: VecResetArray(matctx->t);
1331: VecRestoreArrayRead(matctx->tg,&array);
1332: }
1334: VecDuplicate(matctx->t,&matctx->Rv);
1335: VecDuplicate(matctx->t,&matctx->Vi);
1336: if (flg) {
1337: PetscFree(A);
1338: }
1339: return(0);
1340: }
1342: static PetscErrorCode NRefSubcommDestroy(PEP pep,MatExplicitCtx *matctx)
1343: {
1345: PetscInt i;
1348: VecScatterDestroy(&matctx->scatter_sub);
1349: for (i=0;i<matctx->subc->n;i++) {
1350: VecScatterDestroy(&matctx->scatter_id[i]);
1351: }
1352: for (i=0;i<pep->nmat;i++) {
1353: MatDestroy(&matctx->A[i]);
1354: }
1355: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1356: for (i=0;i<matctx->subc->n;i++) {
1357: VecScatterDestroy(&matctx->scatterp_id[i]);
1358: }
1359: VecDestroy(&matctx->tp);
1360: VecDestroy(&matctx->tpg);
1361: BVDestroy(&matctx->W);
1362: }
1363: PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1364: BVDestroy(&matctx->V);
1365: VecDestroy(&matctx->t);
1366: VecDestroy(&matctx->tg);
1367: VecDestroy(&matctx->Rv);
1368: VecDestroy(&matctx->Vi);
1369: return(0);
1370: }
1372: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1373: {
1374: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI)
1376: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI - Lapack routine is unavailable");
1377: #else
1379: PetscScalar *H,*work,*dH,*fH,*dVS;
1380: PetscInt ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1381: PetscBLASInt k_,ld_,*p,info;
1382: BV dV;
1383: PetscBool sinvert,flg;
1384: MatExplicitCtx *matctx=NULL;
1385: Vec v;
1386: Mat M,P;
1387: FSubctx *ctx;
1390: PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1391: if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1392: /* the input tolerance is not being taken into account (by the moment) */
1393: its = *maxits;
1394: PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1395: DSGetLeadingDimension(pep->ds,&ldh);
1396: DSGetArray(pep->ds,DS_MAT_A,&H);
1397: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1398: PetscMalloc1(2*k*k,&dVS);
1399: STGetTransform(pep->st,&flg);
1400: if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1401: PetscBLASIntCast(k,&k_);
1402: PetscBLASIntCast(ldh,&ld_);
1403: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1404: if (sinvert) {
1405: DSGetArray(pep->ds,DS_MAT_A,&H);
1406: PetscMalloc1(k,&p);
1407: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1408: SlepcCheckLapackInfo("getrf",info);
1409: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1410: SlepcCheckLapackInfo("getri",info);
1411: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1412: pep->ops->backtransform = NULL;
1413: }
1414: if (sigma!=0.0) {
1415: DSGetArray(pep->ds,DS_MAT_A,&H);
1416: for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1417: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1418: pep->ops->backtransform = NULL;
1419: }
1420: }
1421: if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1422: DSGetArray(pep->ds,DS_MAT_A,&H);
1423: for (j=0;j<k;j++) {
1424: for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1425: }
1426: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1427: if (!flg) {
1428: /* Restore original values */
1429: for (i=0;i<pep->nmat;i++){
1430: pep->pbc[pep->nmat+i] *= pep->sfactor;
1431: pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1432: }
1433: }
1434: }
1435: if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1436: for (i=0;i<k;i++) {
1437: BVGetColumn(pep->V,i,&v);
1438: VecPointwiseMult(v,v,pep->Dr);
1439: BVRestoreColumn(pep->V,i,&v);
1440: }
1441: }
1442: DSGetArray(pep->ds,DS_MAT_A,&H);
1444: NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1445: /* check if H is in Schur form */
1446: for (i=0;i<k-1;i++) {
1447: if (H[i+1+i*ldh]!=0.0) {
1448: #if !defined(PETSC_USE_COMPLEX)
1449: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires the complex Schur form of the projected matrix");
1450: #else
1451: SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1452: #endif
1453: }
1454: }
1455: if (nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair dimension");
1456: BVSetActiveColumns(pep->V,0,k);
1457: BVDuplicateResize(pep->V,k,&dV);
1458: PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1459: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1460: PetscMalloc1(1,&matctx);
1461: if (nsubc>1) { /* spliting in subcommunicators */
1462: matctx->subc = pep->refinesubc;
1463: NRefSubcommSetup(pep,k,matctx,nsubc);
1464: } else matctx->subc=NULL;
1465: }
1467: /* Loop performing iterative refinements */
1468: for (i=0;i<its;i++) {
1469: /* Pre-compute the polynomial basis evaluated in H */
1470: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1471: PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1472: /* Solve the linear system */
1473: PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1474: /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1475: PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1476: }
1477: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1478: if (!flg && sinvert) {
1479: PetscFree(p);
1480: }
1481: PetscFree3(dH,fH,work);
1482: PetscFree(dVS);
1483: BVDestroy(&dV);
1484: switch (pep->scheme) {
1485: case PEP_REFINE_SCHEME_EXPLICIT:
1486: for (i=0;i<2;i++) {
1487: MatDestroy(&matctx->E[i]);
1488: }
1489: PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1490: VecDestroy(&matctx->tN);
1491: VecDestroy(&matctx->ttN);
1492: VecDestroy(&matctx->t1);
1493: if (nsubc>1) {
1494: NRefSubcommDestroy(pep,matctx);
1495: } else {
1496: VecDestroy(&matctx->vseq);
1497: VecScatterDestroy(&matctx->scatterctx);
1498: }
1499: PetscFree(matctx);
1500: KSPGetOperators(pep->refineksp,&M,NULL);
1501: MatDestroy(&M);
1502: break;
1503: case PEP_REFINE_SCHEME_MBE:
1504: BVDestroy(&matctx->W);
1505: BVDestroy(&matctx->Wt);
1506: BVDestroy(&matctx->M2);
1507: BVDestroy(&matctx->M3);
1508: MatDestroy(&matctx->M1);
1509: VecDestroy(&matctx->t);
1510: PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1511: if (nsubc>1) {
1512: NRefSubcommDestroy(pep,matctx);
1513: }
1514: PetscFree(matctx);
1515: break;
1516: case PEP_REFINE_SCHEME_SCHUR:
1517: KSPGetOperators(pep->refineksp,&M,&P);
1518: MatShellGetContext(M,&ctx);
1519: PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1520: MatDestroy(&ctx->M1);
1521: BVDestroy(&ctx->M2);
1522: BVDestroy(&ctx->M3);
1523: BVDestroy(&ctx->W);
1524: VecDestroy(&ctx->t);
1525: PetscFree(ctx);
1526: MatDestroy(&M);
1527: MatDestroy(&P);
1528: break;
1529: }
1530: PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1531: return(0);
1532: #endif
1533: }