Actual source code: nrefine.c
slepc-3.16.2 2022-02-01
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Newton refinement for polynomial eigenproblems.
13: References:
15: [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
16: of invariant pairs for matrix polynomials", Linear Algebra Appl.
17: 435(3):514-536, 2011.
19: [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
20: polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
21: 730-745, 2016.
22: */
24: #include <slepc/private/pepimpl.h>
25: #include <slepcblaslapack.h>
27: typedef struct {
28: Mat *A,M1;
29: BV V,M2,M3,W;
30: PetscInt k,nmat;
31: PetscScalar *fih,*work,*M4;
32: PetscBLASInt *pM4;
33: PetscBool compM1;
34: Vec t;
35: } PEP_REFINE_MATSHELL;
37: typedef struct {
38: Mat E[2],M1;
39: Vec tN,ttN,t1,vseq;
40: VecScatter scatterctx;
41: PetscBool compM1;
42: PetscInt *map0,*map1,*idxg,*idxp;
43: PetscSubcomm subc;
44: VecScatter scatter_sub;
45: VecScatter *scatter_id,*scatterp_id;
46: Mat *A;
47: BV V,W,M2,M3,Wt;
48: PetscScalar *M4,*w,*wt,*d,*dt;
49: Vec t,tg,Rv,Vi,tp,tpg;
50: PetscInt idx,*cols;
51: } PEP_REFINE_EXPLICIT;
53: static PetscErrorCode MatMult_FS(Mat M ,Vec x,Vec y)
54: {
55: PetscErrorCode ierr;
56: PEP_REFINE_MATSHELL *ctx;
57: PetscInt k,i;
58: PetscScalar *c;
59: PetscBLASInt k_,one=1,info;
62: MatShellGetContext(M,&ctx);
63: VecCopy(x,ctx->t);
64: k = ctx->k;
65: c = ctx->work;
66: PetscBLASIntCast(k,&k_);
67: MatMult(ctx->M1,x,y);
68: VecConjugate(ctx->t);
69: BVDotVec(ctx->M3,ctx->t,c);
70: for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
71: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
72: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
73: PetscFPTrapPop();
74: SlepcCheckLapackInfo("getrs",info);
75: BVMultVec(ctx->M2,-1.0,1.0,y,c);
76: return(0);
77: }
79: /*
80: Evaluates the first d elements of the polynomial basis
81: on a given matrix H which is considered to be triangular
82: */
83: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
84: {
86: PetscInt i,j,ldfh=nm*k,off,nmat=pep->nmat;
87: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
88: PetscScalar corr=0.0,alpha,beta;
89: PetscBLASInt k_,ldh_,ldfh_;
92: PetscBLASIntCast(ldh,&ldh_);
93: PetscBLASIntCast(k,&k_);
94: PetscBLASIntCast(ldfh,&ldfh_);
95: PetscArrayzero(fH,nm*k*k);
96: if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
97: if (nm>1) {
98: t = b[0]/a[0];
99: off = k;
100: for (j=0;j<k;j++) {
101: for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
102: fH[j+j*ldfh] -= t;
103: }
104: }
105: for (i=2;i<nm;i++) {
106: off = i*k;
107: if (i==2) {
108: for (j=0;j<k;j++) {
109: fH[off+j+j*ldfh] = 1.0;
110: H[j+j*ldh] -= b[1];
111: }
112: } else {
113: for (j=0;j<k;j++) {
114: PetscArraycpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k);
115: H[j+j*ldh] += corr-b[i-1];
116: }
117: }
118: corr = b[i-1];
119: beta = -g[i-1]/a[i-1];
120: alpha = 1/a[i-1];
121: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
122: }
123: for (j=0;j<k;j++) H[j+j*ldh] += corr;
124: return(0);
125: }
127: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PEP_REFINE_MATSHELL *ctx)
128: {
129: PetscErrorCode ierr;
130: PetscScalar *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
131: const PetscScalar *m3,*m2;
132: PetscInt i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
133: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
134: PetscBLASInt k_,lda_,lds_,nloc_,one=1,info;
135: Mat *A=ctx->A,Mk,M1=ctx->M1,P;
136: BV V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
137: MatStructure str;
138: Vec vc;
141: STGetMatStructure(pep->st,&str);
142: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
143: DHii = T12;
144: PetscArrayzero(DHii,k*k*nmat);
145: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
146: for (d=2;d<nmat;d++) {
147: for (j=0;j<k;j++) {
148: for (i=0;i<k;i++) {
149: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
150: }
151: }
152: }
153: /* T11 */
154: if (!ctx->compM1) {
155: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
156: PEPEvaluateBasis(pep,h,0,Ts,NULL);
157: for (j=1;j<nmat;j++) {
158: MatAXPY(M1,Ts[j],A[j],str);
159: }
160: }
162: /* T22 */
163: PetscBLASIntCast(lds,&lds_);
164: PetscBLASIntCast(k,&k_);
165: PetscBLASIntCast(lda,&lda_);
166: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
167: for (i=1;i<deg;i++) {
168: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
169: s = (i==1)?0.0:1.0;
170: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
171: }
172: for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }
174: /* T12 */
175: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
176: for (i=1;i<nmat;i++) {
177: MatDenseGetArrayWrite(Mk,&array);
178: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
179: MatDenseRestoreArrayWrite(Mk,&array);
180: BVSetActiveColumns(W,0,k);
181: BVMult(W,1.0,0.0,V,Mk);
182: if (i==1) {
183: BVMatMult(W,A[i],M2);
184: } else {
185: BVMatMult(W,A[i],M3); /* using M3 as work space */
186: BVMult(M2,1.0,1.0,M3,NULL);
187: }
188: }
190: /* T21 */
191: MatDenseGetArrayWrite(Mk,&array);
192: for (i=1;i<deg;i++) {
193: s = (i==1)?0.0:1.0;
194: ss = PetscConj(fh[i]);
195: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
196: }
197: MatDenseRestoreArrayWrite(Mk,&array);
198: BVSetActiveColumns(M3,0,k);
199: BVMult(M3,1.0,0.0,V,Mk);
200: for (i=0;i<k;i++) {
201: BVGetColumn(M3,i,&vc);
202: VecConjugate(vc);
203: BVRestoreColumn(M3,i,&vc);
204: }
205: MatDestroy(&Mk);
206: PetscFree3(T12,Tr,Ts);
208: VecGetLocalSize(ctx->t,&nloc);
209: PetscBLASIntCast(nloc,&nloc_);
210: PetscMalloc1(nloc*k,&T);
211: KSPGetOperators(pep->refineksp,NULL,&P);
212: if (!ctx->compM1) { MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN); }
213: BVGetArrayRead(ctx->M2,&m2);
214: BVGetArrayRead(ctx->M3,&m3);
215: VecGetArray(ctx->t,&v);
216: for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
217: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
218: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
219: PetscFPTrapPop();
220: SlepcCheckLapackInfo("gesv",info);
221: for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
222: VecRestoreArray(ctx->t,&v);
223: BVRestoreArrayRead(ctx->M2,&m2);
224: BVRestoreArrayRead(ctx->M3,&m3);
225: MatDiagonalSet(P,ctx->t,ADD_VALUES);
226: PetscFree(T);
227: KSPSetUp(pep->refineksp);
228: return(0);
229: }
231: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
232: {
233: PetscErrorCode ierr;
234: PetscScalar *t0;
235: PetscBLASInt k_,one=1,info,lda_;
236: PetscInt i,lda=nmat*k;
237: Mat M;
238: PEP_REFINE_MATSHELL *ctx;
241: KSPGetOperators(ksp,&M,NULL);
242: MatShellGetContext(M,&ctx);
243: PetscCalloc1(k,&t0);
244: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
245: PetscBLASIntCast(lda,&lda_);
246: PetscBLASIntCast(k,&k_);
247: for (i=0;i<k;i++) t0[i] = Rh[i];
248: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
249: SlepcCheckLapackInfo("getrs",info);
250: BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
251: KSPSolve(ksp,Rv,dVi);
252: VecConjugate(dVi);
253: BVDotVec(ctx->M3,dVi,dHi);
254: VecConjugate(dVi);
255: for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
256: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
257: SlepcCheckLapackInfo("getrs",info);
258: PetscFPTrapPop();
259: PetscFree(t0);
260: return(0);
261: }
263: /*
264: Computes the residual P(H,V*S)*e_j for the polynomial
265: */
266: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
267: {
269: PetscScalar *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
270: PetscReal *a=pcf,*b=pcf+nmat,*g=b+nmat;
271: PetscInt i,ii,jj,lda;
272: PetscBLASInt lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
273: Mat M0;
274: Vec w;
277: PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
278: lda = k*nmat;
279: PetscBLASIntCast(k,&k_);
280: PetscBLASIntCast(lds,&lds_);
281: PetscBLASIntCast(lda,&lda_);
282: PetscBLASIntCast(nmat,&nmat_);
283: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
284: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
285: BVSetActiveColumns(W,0,nmat);
286: BVMult(W,1.0,0.0,V,M0);
287: MatDestroy(&M0);
289: BVGetColumn(W,0,&w);
290: MatMult(A[0],w,Rv);
291: BVRestoreColumn(W,0,&w);
292: for (i=1;i<nmat;i++) {
293: BVGetColumn(W,i,&w);
294: MatMult(A[i],w,t);
295: BVRestoreColumn(W,i,&w);
296: VecAXPY(Rv,1.0,t);
297: }
298: /* Update right-hand side */
299: if (j) {
300: PetscBLASIntCast(ldh,&ldh_);
301: PetscArrayzero(Z,k*k);
302: PetscArrayzero(DS0,k*k);
303: PetscArraycpy(Z+(j-1)*k,dH+(j-1)*k,k);
304: /* Update DfH */
305: for (i=1;i<nmat;i++) {
306: if (i>1) {
307: beta = -g[i-1];
308: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
309: tt += -b[i-1];
310: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
311: tt = b[i-1];
312: beta = 1.0/a[i-1];
313: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
314: F = DS0; DS0 = DS1; DS1 = F;
315: } else {
316: PetscArrayzero(DS1,k*k);
317: for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
318: }
319: for (jj=j;jj<k;jj++) {
320: for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
321: }
322: }
323: for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
324: /* Update right-hand side */
325: PetscBLASIntCast(2*k,&k2_);
326: PetscBLASIntCast(j,&j_);
327: PetscBLASIntCast(k+rds,&krds_);
328: c0 = DS0;
329: PetscArrayzero(Rh,k);
330: for (i=0;i<nmat;i++) {
331: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
332: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
333: BVMultVec(V,1.0,0.0,t,h);
334: BVSetActiveColumns(dV,0,rds);
335: BVMultVec(dV,1.0,1.0,t,h+k);
336: BVGetColumn(W,i,&w);
337: MatMult(A[i],t,w);
338: BVRestoreColumn(W,i,&w);
339: if (i>0 && i<nmat-1) {
340: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
341: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
342: }
343: }
345: for (i=0;i<nmat;i++) h[i] = -1.0;
346: BVMultVec(W,1.0,1.0,Rv,h);
347: }
348: PetscFree4(h,DS0,DS1,Z);
349: return(0);
350: }
352: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
353: {
355: PetscInt i,j,incf,incc;
356: PetscScalar *y,*g,*xx2,*ww,y2,*dd;
357: Vec v,t,xx1;
358: BV WW,T;
361: PetscMalloc3(sz,&y,sz,&g,k,&xx2);
362: if (trans) {
363: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
364: } else {
365: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
366: }
367: xx1 = vw;
368: VecCopy(x1,xx1);
369: PetscArraycpy(xx2,x2,sz);
370: PetscArrayzero(sol2,k);
371: for (i=sz-1;i>=0;i--) {
372: BVGetColumn(WW,i,&v);
373: VecConjugate(v);
374: VecDot(xx1,v,y+i);
375: VecConjugate(v);
376: BVRestoreColumn(WW,i,&v);
377: for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
378: y[i] = -(y[i]-xx2[i])/dd[i];
379: BVGetColumn(T,i,&t);
380: VecAXPY(xx1,-y[i],t);
381: BVRestoreColumn(T,i,&t);
382: for (j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
383: g[i] = xx2[i];
384: }
385: if (trans) {
386: KSPSolveTranspose(ksp,xx1,sol1);
387: } else {
388: KSPSolve(ksp,xx1,sol1);
389: }
390: if (trans) {
391: WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
392: } else {
393: WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
394: }
395: for (i=0;i<sz;i++) {
396: BVGetColumn(T,i,&t);
397: VecConjugate(t);
398: VecDot(sol1,t,&y2);
399: VecConjugate(t);
400: BVRestoreColumn(T,i,&t);
401: for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
402: y2 = (g[i]-y2)/dd[i];
403: BVGetColumn(WW,i,&v);
404: VecAXPY(sol1,-y2,v);
405: for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
406: sol2[i] = y[i]+y2;
407: BVRestoreColumn(WW,i,&v);
408: }
409: PetscFree3(y,g,xx2);
410: return(0);
411: }
413: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx)
414: {
416: PetscInt i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
417: Mat M1=matctx->M1,*A,*At,Mk;
418: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
419: PetscScalar s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
420: PetscScalar *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
421: PetscBLASInt lds_,lda_,k_;
422: MatStructure str;
423: PetscBool flg;
424: BV M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
425: Vec vc,vc2;
428: PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
429: STGetMatStructure(pep->st,&str);
430: STGetTransform(pep->st,&flg);
431: if (flg) {
432: PetscMalloc1(pep->nmat,&At);
433: for (i=0;i<pep->nmat;i++) {
434: STGetMatrixTransformed(pep->st,i,&At[i]);
435: }
436: } else At = pep->A;
437: if (matctx->subc) A = matctx->A;
438: else A = At;
439: /* Form the explicit system matrix */
440: DHii = T12;
441: PetscArrayzero(DHii,k*k*nmat);
442: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
443: for (l=2;l<nmat;l++) {
444: for (j=0;j<k;j++) {
445: for (i=0;i<k;i++) {
446: DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
447: }
448: }
449: }
451: /* T11 */
452: if (!matctx->compM1) {
453: MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
454: PEPEvaluateBasis(pep,h,0,Ts,NULL);
455: for (j=1;j<nmat;j++) {
456: MatAXPY(M1,Ts[j],A[j],str);
457: }
458: }
460: /* T22 */
461: PetscBLASIntCast(lds,&lds_);
462: PetscBLASIntCast(k,&k_);
463: PetscBLASIntCast(lda,&lda_);
464: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
465: for (i=1;i<deg;i++) {
466: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
467: s = (i==1)?0.0:1.0;
468: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
469: }
471: /* T12 */
472: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
473: for (i=1;i<nmat;i++) {
474: MatDenseGetArrayWrite(Mk,&array);
475: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
476: MatDenseRestoreArrayWrite(Mk,&array);
477: BVSetActiveColumns(W,0,k);
478: BVMult(W,1.0,0.0,V,Mk);
479: if (i==1) {
480: BVMatMult(W,A[i],M2);
481: } else {
482: BVMatMult(W,A[i],M3); /* using M3 as work space */
483: BVMult(M2,1.0,1.0,M3,NULL);
484: }
485: }
487: /* T21 */
488: MatDenseGetArrayWrite(Mk,&array);
489: for (i=1;i<deg;i++) {
490: s = (i==1)?0.0:1.0;
491: ss = PetscConj(fh[i]);
492: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
493: }
494: MatDenseRestoreArrayWrite(Mk,&array);
495: BVSetActiveColumns(M3,0,k);
496: BVMult(M3,1.0,0.0,V,Mk);
497: for (i=0;i<k;i++) {
498: BVGetColumn(M3,i,&vc);
499: VecConjugate(vc);
500: BVRestoreColumn(M3,i,&vc);
501: }
503: KSPSetOperators(ksp,M1,M1);
504: KSPSetUp(ksp);
505: MatDestroy(&Mk);
507: /* Set up for BEMW */
508: for (i=0;i<k;i++) {
509: BVGetColumn(M2,i,&vc);
510: BVGetColumn(W,i,&vc2);
511: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
512: BVRestoreColumn(M2,i,&vc);
513: BVGetColumn(M3,i,&vc);
514: VecConjugate(vc);
515: VecDot(vc2,vc,&d[i]);
516: VecConjugate(vc);
517: BVRestoreColumn(M3,i,&vc);
518: for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
519: d[i] = M4[i+i*k]-d[i];
520: BVRestoreColumn(W,i,&vc2);
522: BVGetColumn(M3,i,&vc);
523: BVGetColumn(Wt,i,&vc2);
524: for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
525: NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
526: BVRestoreColumn(M3,i,&vc);
527: BVGetColumn(M2,i,&vc);
528: VecConjugate(vc2);
529: VecDot(vc,vc2,&dt[i]);
530: VecConjugate(vc2);
531: BVRestoreColumn(M2,i,&vc);
532: for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
533: dt[i] = M4[i+i*k]-dt[i];
534: BVRestoreColumn(Wt,i,&vc2);
535: }
537: if (flg) {
538: PetscFree(At);
539: }
540: PetscFree3(T12,Tr,Ts);
541: return(0);
542: }
544: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx,BV W)
545: {
546: PetscErrorCode ierr;
547: PetscInt i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
548: PetscInt *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
549: Mat M,*E=matctx->E,*A,*At,Mk,Md;
550: PetscReal *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
551: PetscScalar s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
552: PetscBLASInt lds_,lda_,k_;
553: const PetscInt *idxmc;
554: const PetscScalar *valsc,*carray;
555: MatStructure str;
556: Vec vc,vc0;
557: PetscBool flg;
560: PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
561: STGetMatStructure(pep->st,&str);
562: KSPGetOperators(ksp,&M,NULL);
563: MatGetOwnershipRange(E[1],&n1,&m1);
564: MatGetOwnershipRange(E[0],&n0,&m0);
565: MatGetOwnershipRange(M,&n,NULL);
566: PetscMalloc1(nmat,&ts);
567: STGetTransform(pep->st,&flg);
568: if (flg) {
569: PetscMalloc1(pep->nmat,&At);
570: for (i=0;i<pep->nmat;i++) {
571: STGetMatrixTransformed(pep->st,i,&At[i]);
572: }
573: } else At = pep->A;
574: if (matctx->subc) A = matctx->A;
575: else A = At;
576: /* Form the explicit system matrix */
577: DHii = T12;
578: PetscArrayzero(DHii,k*k*nmat);
579: for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
580: for (d=2;d<nmat;d++) {
581: for (j=0;j<k;j++) {
582: for (i=0;i<k;i++) {
583: DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
584: }
585: }
586: }
588: /* T11 */
589: if (!matctx->compM1) {
590: MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
591: PEPEvaluateBasis(pep,h,0,Ts,NULL);
592: for (j=1;j<nmat;j++) {
593: MatAXPY(E[0],Ts[j],A[j],str);
594: }
595: }
596: for (i=n0;i<m0;i++) {
597: MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
598: idx = n+i-n0;
599: for (j=0;j<ncols;j++) {
600: idxg[j] = matctx->map0[idxmc[j]];
601: }
602: MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
603: MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
604: }
606: /* T22 */
607: PetscBLASIntCast(lds,&lds_);
608: PetscBLASIntCast(k,&k_);
609: PetscBLASIntCast(lda,&lda_);
610: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
611: for (i=1;i<deg;i++) {
612: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
613: s = (i==1)?0.0:1.0;
614: PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
615: }
616: for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
617: for (i=0;i<m1-n1;i++) {
618: idx = n+m0-n0+i;
619: for (j=0;j<k;j++) {
620: Tr[j] = T22[n1+i+j*k];
621: }
622: MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
623: }
625: /* T21 */
626: for (i=1;i<deg;i++) {
627: s = (i==1)?0.0:1.0;
628: ss = PetscConj(fh[i]);
629: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
630: }
631: BVSetActiveColumns(W,0,k);
632: MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
633: BVMult(W,1.0,0.0,V,Mk);
634: for (i=0;i<k;i++) {
635: BVGetColumn(W,i,&vc);
636: VecConjugate(vc);
637: VecGetArrayRead(vc,&carray);
638: idx = matctx->map1[i];
639: MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
640: VecRestoreArrayRead(vc,&carray);
641: BVRestoreColumn(W,i,&vc);
642: }
644: /* T12 */
645: for (i=1;i<nmat;i++) {
646: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
647: for (j=0;j<k;j++) {
648: PetscArraycpy(T12+i*k+j*lda,Ts+j*k,k);
649: }
650: }
651: MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
652: for (i=0;i<nmat;i++) ts[i] = 1.0;
653: for (j=0;j<k;j++) {
654: MatDenseGetArrayWrite(Md,&array);
655: PetscArraycpy(array,T12+k+j*lda,(nmat-1)*k);
656: MatDenseRestoreArrayWrite(Md,&array);
657: BVSetActiveColumns(W,0,nmat-1);
658: BVMult(W,1.0,0.0,V,Md);
659: for (i=nmat-1;i>0;i--) {
660: BVGetColumn(W,i-1,&vc0);
661: BVGetColumn(W,i,&vc);
662: MatMult(A[i],vc0,vc);
663: BVRestoreColumn(W,i-1,&vc0);
664: BVRestoreColumn(W,i,&vc);
665: }
666: BVSetActiveColumns(W,1,nmat);
667: BVGetColumn(W,0,&vc0);
668: BVMultVec(W,1.0,0.0,vc0,ts);
669: VecGetArrayRead(vc0,&carray);
670: idx = matctx->map1[j];
671: MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
672: VecRestoreArrayRead(vc0,&carray);
673: BVRestoreColumn(W,0,&vc0);
674: }
675: MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
676: MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
677: KSPSetOperators(ksp,M,M);
678: KSPSetUp(ksp);
679: PetscFree(ts);
680: MatDestroy(&Mk);
681: MatDestroy(&Md);
682: if (flg) {
683: PetscFree(At);
684: }
685: PetscFree5(T22,T21,T12,Tr,Ts);
686: return(0);
687: }
689: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx)
690: {
691: PetscErrorCode ierr;
692: PetscInt n0,m0,n1,m1,i;
693: PetscScalar *arrayV;
694: const PetscScalar *array;
697: MatGetOwnershipRange(matctx->E[1],&n1,&m1);
698: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
700: /* Right side */
701: VecGetArrayRead(Rv,&array);
702: VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
703: VecRestoreArrayRead(Rv,&array);
704: VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
705: VecAssemblyBegin(matctx->tN);
706: VecAssemblyEnd(matctx->tN);
708: /* Solve */
709: KSPSolve(ksp,matctx->tN,matctx->ttN);
711: /* Retrieve solution */
712: VecGetArray(dVi,&arrayV);
713: VecGetArrayRead(matctx->ttN,&array);
714: PetscArraycpy(arrayV,array,m0-n0);
715: VecRestoreArray(dVi,&arrayV);
716: if (!matctx->subc) {
717: VecGetArray(matctx->t1,&arrayV);
718: for (i=0;i<m1-n1;i++) arrayV[i] = array[m0-n0+i];
719: VecRestoreArray(matctx->t1,&arrayV);
720: VecRestoreArrayRead(matctx->ttN,&array);
721: VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
722: VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
723: VecGetArrayRead(matctx->vseq,&array);
724: for (i=0;i<k;i++) dHi[i] = array[i];
725: VecRestoreArrayRead(matctx->vseq,&array);
726: }
727: return(0);
728: }
730: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx,BV W)
731: {
732: PetscErrorCode ierr;
733: PetscInt j,m,lda=pep->nmat*k,n0,m0,idx;
734: PetscMPIInt root,len;
735: PetscScalar *array2,h;
736: const PetscScalar *array;
737: Vec R,Vi;
738: PEP_REFINE_MATSHELL *ctx;
739: Mat M;
742: if (!matctx || !matctx->subc) {
743: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
744: h = H[i+i*ldh];
745: idx = i;
746: R = Rv;
747: Vi = dVi;
748: switch (pep->scheme) {
749: case PEP_REFINE_SCHEME_EXPLICIT:
750: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
751: matctx->compM1 = PETSC_FALSE;
752: break;
753: case PEP_REFINE_SCHEME_MBE:
754: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
755: matctx->compM1 = PETSC_FALSE;
756: break;
757: case PEP_REFINE_SCHEME_SCHUR:
758: KSPGetOperators(ksp,&M,NULL);
759: MatShellGetContext(M,&ctx);
760: NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
761: ctx->compM1 = PETSC_FALSE;
762: break;
763: }
764: } else {
765: if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
766: for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
767: h = H[idx+idx*ldh];
768: matctx->idx = idx;
769: switch (pep->scheme) {
770: case PEP_REFINE_SCHEME_EXPLICIT:
771: NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
772: matctx->compM1 = PETSC_FALSE;
773: break;
774: case PEP_REFINE_SCHEME_MBE:
775: NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
776: matctx->compM1 = PETSC_FALSE;
777: break;
778: case PEP_REFINE_SCHEME_SCHUR:
779: break;
780: }
781: } else idx = matctx->idx;
782: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
783: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
784: VecGetArrayRead(matctx->tg,&array);
785: VecPlaceArray(matctx->t,array);
786: VecCopy(matctx->t,matctx->Rv);
787: VecResetArray(matctx->t);
788: VecRestoreArrayRead(matctx->tg,&array);
789: R = matctx->Rv;
790: Vi = matctx->Vi;
791: }
792: if (idx==i && idx<k) {
793: switch (pep->scheme) {
794: case PEP_REFINE_SCHEME_EXPLICIT:
795: NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
796: break;
797: case PEP_REFINE_SCHEME_MBE:
798: NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
799: break;
800: case PEP_REFINE_SCHEME_SCHUR:
801: NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
802: break;
803: }
804: }
805: if (matctx && matctx->subc) {
806: VecGetLocalSize(Vi,&m);
807: VecGetArrayRead(Vi,&array);
808: VecGetArray(matctx->tg,&array2);
809: PetscArraycpy(array2,array,m);
810: VecRestoreArray(matctx->tg,&array2);
811: VecRestoreArrayRead(Vi,&array);
812: VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
813: VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
814: switch (pep->scheme) {
815: case PEP_REFINE_SCHEME_EXPLICIT:
816: MatGetOwnershipRange(matctx->E[0],&n0,&m0);
817: VecGetArrayRead(matctx->ttN,&array);
818: VecPlaceArray(matctx->tp,array+m0-n0);
819: VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
820: VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
821: VecResetArray(matctx->tp);
822: VecRestoreArrayRead(matctx->ttN,&array);
823: VecGetArrayRead(matctx->tpg,&array);
824: for (j=0;j<k;j++) dHi[j] = array[j];
825: VecRestoreArrayRead(matctx->tpg,&array);
826: break;
827: case PEP_REFINE_SCHEME_MBE:
828: root = 0;
829: for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
830: PetscMPIIntCast(k,&len);
831: MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
832: break;
833: case PEP_REFINE_SCHEME_SCHUR:
834: break;
835: }
836: }
837: return(0);
838: }
840: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PEP_REFINE_EXPLICIT *matctx)
841: {
842: PetscErrorCode ierr;
843: PetscInt i,nmat=pep->nmat,lda=nmat*k;
844: PetscScalar *fh,*Rh,*DfH;
845: PetscReal norm;
846: BV W;
847: Vec Rv,t,dvi;
848: PEP_REFINE_MATSHELL *ctx;
849: Mat M,*At;
850: PetscBool flg,lindep;
853: PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
854: *rds = 0;
855: BVCreateVec(pep->V,&Rv);
856: switch (pep->scheme) {
857: case PEP_REFINE_SCHEME_EXPLICIT:
858: BVCreateVec(pep->V,&t);
859: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
860: PetscMalloc1(nmat,&fh);
861: break;
862: case PEP_REFINE_SCHEME_MBE:
863: if (matctx->subc) {
864: BVCreateVec(pep->V,&t);
865: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
866: } else {
867: W = matctx->W;
868: PetscObjectReference((PetscObject)W);
869: t = matctx->t;
870: PetscObjectReference((PetscObject)t);
871: }
872: BVScale(matctx->W,0.0);
873: BVScale(matctx->Wt,0.0);
874: BVScale(matctx->M2,0.0);
875: BVScale(matctx->M3,0.0);
876: PetscMalloc1(nmat,&fh);
877: break;
878: case PEP_REFINE_SCHEME_SCHUR:
879: KSPGetOperators(ksp,&M,NULL);
880: MatShellGetContext(M,&ctx);
881: BVCreateVec(pep->V,&t);
882: BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
883: fh = ctx->fih;
884: break;
885: }
886: PetscArrayzero(dVS,2*k*k);
887: PetscArrayzero(DfH,lda*k);
888: STGetTransform(pep->st,&flg);
889: if (flg) {
890: PetscMalloc1(pep->nmat,&At);
891: for (i=0;i<pep->nmat;i++) {
892: STGetMatrixTransformed(pep->st,i,&At[i]);
893: }
894: } else At = pep->A;
896: /* Main loop for computing the i-th columns of dX and dS */
897: for (i=0;i<k;i++) {
898: /* Compute and update i-th column of the right hand side */
899: PetscArrayzero(Rh,k);
900: NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);
902: /* Update and solve system */
903: BVGetColumn(dV,i,&dvi);
904: NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
905: /* Orthogonalize computed solution */
906: BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
907: BVRestoreColumn(dV,i,&dvi);
908: if (!lindep) {
909: BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
910: if (!lindep) {
911: dVS[k+i+i*2*k] = norm;
912: BVScaleColumn(dV,i,1.0/norm);
913: (*rds)++;
914: }
915: }
916: }
917: BVSetActiveColumns(dV,0,*rds);
918: VecDestroy(&t);
919: VecDestroy(&Rv);
920: BVDestroy(&W);
921: if (flg) {
922: PetscFree(At);
923: }
924: PetscFree2(DfH,Rh);
925: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) { PetscFree(fh); }
926: return(0);
927: }
929: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
930: {
932: PetscInt j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
933: PetscScalar *G,*tau,sone=1.0,zero=0.0,*work;
934: PetscBLASInt lds_,k_,ldh_,info,ldg_,lda_;
937: PetscMalloc3(k,&tau,k,&work,deg*k*k,&G);
938: PetscBLASIntCast(lds,&lds_);
939: PetscBLASIntCast(lda,&lda_);
940: PetscBLASIntCast(k,&k_);
942: /* Form auxiliary matrix for the orthogonalization step */
943: ldg = deg*k;
944: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
945: PetscBLASIntCast(ldg,&ldg_);
946: PetscBLASIntCast(ldh,&ldh_);
947: for (j=0;j<deg;j++) {
948: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
949: }
950: /* Orthogonalize and update S */
951: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
952: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
953: PetscFPTrapPop();
954: SlepcCheckLapackInfo("geqrf",info);
955: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));
957: /* Update H */
958: PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
959: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
960: PetscFree3(tau,work,G);
961: return(0);
962: }
964: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
965: {
967: PetscInt i,j,nmat=pep->nmat,lda=nmat*k;
968: PetscScalar *tau,*array,*work;
969: PetscBLASInt lds_,k_,lda_,ldh_,kdrs_,info,k2_;
970: Mat M0;
973: PetscMalloc2(k,&tau,k,&work);
974: PetscBLASIntCast(lds,&lds_);
975: PetscBLASIntCast(lda,&lda_);
976: PetscBLASIntCast(ldh,&ldh_);
977: PetscBLASIntCast(k,&k_);
978: PetscBLASIntCast(2*k,&k2_);
979: PetscBLASIntCast((k+rds),&kdrs_);
980: /* Update H */
981: for (j=0;j<k;j++) {
982: for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
983: }
984: /* Update V */
985: for (j=0;j<k;j++) {
986: for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
987: for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
988: }
989: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
990: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
991: SlepcCheckLapackInfo("geqrf",info);
992: /* Copy triangular matrix in S */
993: for (j=0;j<k;j++) {
994: for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
995: for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
996: }
997: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
998: SlepcCheckLapackInfo("orgqr",info);
999: PetscFPTrapPop();
1000: MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
1001: MatDenseGetArrayWrite(M0,&array);
1002: for (j=0;j<k;j++) {
1003: PetscArraycpy(array+j*k,dVS+j*2*k,k);
1004: }
1005: MatDenseRestoreArrayWrite(M0,&array);
1006: BVMultInPlace(pep->V,M0,0,k);
1007: if (rds) {
1008: MatDenseGetArrayWrite(M0,&array);
1009: for (j=0;j<k;j++) {
1010: PetscArraycpy(array+j*k,dVS+k+j*2*k,rds);
1011: }
1012: MatDenseRestoreArrayWrite(M0,&array);
1013: BVMultInPlace(dV,M0,0,k);
1014: BVMult(pep->V,1.0,1.0,dV,NULL);
1015: }
1016: MatDestroy(&M0);
1017: NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1018: PetscFree2(tau,work);
1019: return(0);
1020: }
1022: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PEP_REFINE_EXPLICIT *matctx,PetscBool ini)
1023: {
1024: PetscErrorCode ierr;
1025: PEP_REFINE_MATSHELL *ctx;
1026: PetscScalar t,*coef;
1027: const PetscScalar *array;
1028: MatStructure str;
1029: PetscInt j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
1030: MPI_Comm comm;
1031: PetscMPIInt np;
1032: const PetscInt *rgs0,*rgs1;
1033: Mat B,C,*E,*A,*At;
1034: IS is1,is2;
1035: Vec v;
1036: PetscBool flg;
1037: Mat M,P;
1040: PetscMalloc1(nmat,&coef);
1041: STGetTransform(pep->st,&flg);
1042: if (flg) {
1043: PetscMalloc1(pep->nmat,&At);
1044: for (i=0;i<pep->nmat;i++) {
1045: STGetMatrixTransformed(pep->st,i,&At[i]);
1046: }
1047: } else At = pep->A;
1048: switch (pep->scheme) {
1049: case PEP_REFINE_SCHEME_EXPLICIT:
1050: if (ini) {
1051: if (matctx->subc) {
1052: A = matctx->A;
1053: comm = PetscSubcommChild(matctx->subc);
1054: } else {
1055: A = At;
1056: PetscObjectGetComm((PetscObject)pep,&comm);
1057: }
1058: E = matctx->E;
1059: STGetMatStructure(pep->st,&str);
1060: MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1061: j = (matctx->subc)?matctx->subc->color:0;
1062: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1063: for (j=1;j<nmat;j++) {
1064: MatAXPY(E[0],coef[j],A[j],str);
1065: }
1066: MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1067: MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
1068: MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
1069: MatGetOwnershipRange(E[0],&n0,&m0);
1070: MatGetOwnershipRange(E[1],&n1,&m1);
1071: MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1072: MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1073: /* T12 and T21 are computed from V and V*, so,
1074: they must have the same column and row ranges */
1075: if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inconsistent dimensions");
1076: MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1077: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1078: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1079: MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1080: MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1081: MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1082: MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1083: MatDestroy(&B);
1084: MatDestroy(&C);
1085: matctx->compM1 = PETSC_TRUE;
1086: MatGetSize(E[0],NULL,&N0);
1087: MatGetSize(E[1],NULL,&N1);
1088: MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1089: MatGetOwnershipRanges(E[0],&rgs0);
1090: MatGetOwnershipRanges(E[1],&rgs1);
1091: PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1092: /* Create column (and row) mapping */
1093: for (p=0;p<np;p++) {
1094: for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1095: for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1096: }
1097: MatCreateVecs(M,NULL,&matctx->tN);
1098: MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1099: VecDuplicate(matctx->tN,&matctx->ttN);
1100: if (matctx->subc) {
1101: MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1102: count = np*k;
1103: PetscMalloc2(count,&idx1,count,&idx2);
1104: VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1105: VecGetOwnershipRange(matctx->tp,&l0,NULL);
1106: VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1107: for (si=0;si<matctx->subc->n;si++) {
1108: if (matctx->subc->color==si) {
1109: j=0;
1110: if (matctx->subc->color==si) {
1111: for (p=0;p<np;p++) {
1112: for (i=n1;i<m1;i++) {
1113: idx1[j] = l0+i-n1;
1114: idx2[j++] =p*k+i;
1115: }
1116: }
1117: }
1118: count = np*(m1-n1);
1119: } else count =0;
1120: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1121: ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1122: VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1123: ISDestroy(&is1);
1124: ISDestroy(&is2);
1125: }
1126: PetscFree2(idx1,idx2);
1127: } else {
1128: VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1129: }
1130: P = M;
1131: } else {
1132: if (matctx->subc) {
1133: /* Scatter vectors pep->V */
1134: for (i=0;i<k;i++) {
1135: BVGetColumn(pep->V,i,&v);
1136: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1137: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1138: BVRestoreColumn(pep->V,i,&v);
1139: VecGetArrayRead(matctx->tg,&array);
1140: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1141: BVInsertVec(matctx->V,i,matctx->t);
1142: VecResetArray(matctx->t);
1143: VecRestoreArrayRead(matctx->tg,&array);
1144: }
1145: }
1146: }
1147: break;
1148: case PEP_REFINE_SCHEME_MBE:
1149: if (ini) {
1150: if (matctx->subc) {
1151: A = matctx->A;
1152: comm = PetscSubcommChild(matctx->subc);
1153: } else {
1154: matctx->V = pep->V;
1155: A = At;
1156: PetscObjectGetComm((PetscObject)pep,&comm);
1157: MatCreateVecs(pep->A[0],&matctx->t,NULL);
1158: }
1159: STGetMatStructure(pep->st,&str);
1160: MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1161: j = (matctx->subc)?matctx->subc->color:0;
1162: PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1163: for (j=1;j<nmat;j++) {
1164: MatAXPY(matctx->M1,coef[j],A[j],str);
1165: }
1166: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1167: BVDuplicateResize(matctx->V,k,&matctx->M2);
1168: BVDuplicate(matctx->M2,&matctx->M3);
1169: BVDuplicate(matctx->M2,&matctx->Wt);
1170: PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1171: matctx->compM1 = PETSC_TRUE;
1172: M = matctx->M1;
1173: P = M;
1174: }
1175: break;
1176: case PEP_REFINE_SCHEME_SCHUR:
1177: if (ini) {
1178: PetscObjectGetComm((PetscObject)pep,&comm);
1179: MatGetSize(At[0],&m0,&n0);
1180: PetscMalloc1(1,&ctx);
1181: STGetMatStructure(pep->st,&str);
1182: /* Create a shell matrix to solve the linear system */
1183: ctx->V = pep->V;
1184: ctx->k = k; ctx->nmat = nmat;
1185: PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1186: for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1187: PetscArrayzero(ctx->M4,k*k);
1188: MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1189: MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatMult_FS);
1190: BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1191: BVDuplicateResize(ctx->V,k,&ctx->M2);
1192: BVDuplicate(ctx->M2,&ctx->M3);
1193: BVCreateVec(pep->V,&ctx->t);
1194: MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1195: PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1196: for (j=1;j<nmat;j++) {
1197: MatAXPY(ctx->M1,coef[j],At[j],str);
1198: }
1199: MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1200: /* Compute a precond matrix for the system */
1201: t = H[0];
1202: PEPEvaluateBasis(pep,t,0,coef,NULL);
1203: for (j=1;j<nmat;j++) {
1204: MatAXPY(P,coef[j],At[j],str);
1205: }
1206: ctx->compM1 = PETSC_TRUE;
1207: }
1208: break;
1209: }
1210: if (ini) {
1211: PEPRefineGetKSP(pep,&pep->refineksp);
1212: KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE);
1213: KSPSetOperators(pep->refineksp,M,P);
1214: KSPSetFromOptions(pep->refineksp);
1215: }
1217: if (!ini && matctx && matctx->subc) {
1218: /* Scatter vectors pep->V */
1219: for (i=0;i<k;i++) {
1220: BVGetColumn(pep->V,i,&v);
1221: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1222: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1223: BVRestoreColumn(pep->V,i,&v);
1224: VecGetArrayRead(matctx->tg,&array);
1225: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1226: BVInsertVec(matctx->V,i,matctx->t);
1227: VecResetArray(matctx->t);
1228: VecRestoreArrayRead(matctx->tg,&array);
1229: }
1230: }
1231: PetscFree(coef);
1232: if (flg) {
1233: PetscFree(At);
1234: }
1235: return(0);
1236: }
1238: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,PEP_REFINE_EXPLICIT *matctx,PetscInt nsubc)
1239: {
1240: PetscErrorCode ierr;
1241: PetscInt i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1242: IS is1,is2;
1243: BVType type;
1244: Vec v;
1245: const PetscScalar *array;
1246: Mat *A;
1247: PetscBool flg;
1250: STGetTransform(pep->st,&flg);
1251: if (flg) {
1252: PetscMalloc1(pep->nmat,&A);
1253: for (i=0;i<pep->nmat;i++) {
1254: STGetMatrixTransformed(pep->st,i,&A[i]);
1255: }
1256: } else A = pep->A;
1258: /* Duplicate pep matrices */
1259: PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1260: for (i=0;i<pep->nmat;i++) {
1261: MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1262: PetscLogObjectParent((PetscObject)pep,(PetscObject)matctx->A[i]);
1263: }
1265: /* Create Scatter */
1266: MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1267: MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1268: VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1269: BVGetColumn(pep->V,0,&v);
1270: VecGetOwnershipRange(v,&n0,&m0);
1271: nloc0 = m0-n0;
1272: PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1273: j = 0;
1274: for (si=0;si<matctx->subc->n;si++) {
1275: for (i=n0;i<m0;i++) {
1276: idx1[j] = i;
1277: idx2[j++] = i+pep->n*si;
1278: }
1279: }
1280: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1281: ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1282: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1283: ISDestroy(&is1);
1284: ISDestroy(&is2);
1285: for (si=0;si<matctx->subc->n;si++) {
1286: j=0;
1287: for (i=n0;i<m0;i++) {
1288: idx1[j] = i;
1289: idx2[j++] = i+pep->n*si;
1290: }
1291: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1292: ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1293: VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1294: ISDestroy(&is1);
1295: ISDestroy(&is2);
1296: }
1297: BVRestoreColumn(pep->V,0,&v);
1298: PetscFree2(idx1,idx2);
1300: /* Duplicate pep->V vecs */
1301: BVGetType(pep->V,&type);
1302: BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1303: BVSetType(matctx->V,type);
1304: BVSetSizesFromVec(matctx->V,matctx->t,k);
1305: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1306: BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1307: }
1308: for (i=0;i<k;i++) {
1309: BVGetColumn(pep->V,i,&v);
1310: VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1311: VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1312: BVRestoreColumn(pep->V,i,&v);
1313: VecGetArrayRead(matctx->tg,&array);
1314: VecPlaceArray(matctx->t,(const PetscScalar*)array);
1315: BVInsertVec(matctx->V,i,matctx->t);
1316: VecResetArray(matctx->t);
1317: VecRestoreArrayRead(matctx->tg,&array);
1318: }
1320: VecDuplicate(matctx->t,&matctx->Rv);
1321: VecDuplicate(matctx->t,&matctx->Vi);
1322: if (flg) {
1323: PetscFree(A);
1324: }
1325: return(0);
1326: }
1328: static PetscErrorCode NRefSubcommDestroy(PEP pep,PEP_REFINE_EXPLICIT *matctx)
1329: {
1331: PetscInt i;
1334: VecScatterDestroy(&matctx->scatter_sub);
1335: for (i=0;i<matctx->subc->n;i++) {
1336: VecScatterDestroy(&matctx->scatter_id[i]);
1337: }
1338: for (i=0;i<pep->nmat;i++) {
1339: MatDestroy(&matctx->A[i]);
1340: }
1341: if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1342: for (i=0;i<matctx->subc->n;i++) {
1343: VecScatterDestroy(&matctx->scatterp_id[i]);
1344: }
1345: VecDestroy(&matctx->tp);
1346: VecDestroy(&matctx->tpg);
1347: BVDestroy(&matctx->W);
1348: }
1349: PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1350: BVDestroy(&matctx->V);
1351: VecDestroy(&matctx->t);
1352: VecDestroy(&matctx->tg);
1353: VecDestroy(&matctx->Rv);
1354: VecDestroy(&matctx->Vi);
1355: return(0);
1356: }
1358: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1359: {
1360: PetscErrorCode ierr;
1361: PetscScalar *H,*work,*dH,*fH,*dVS;
1362: PetscInt ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1363: PetscBLASInt k_,ld_,*p,info;
1364: BV dV;
1365: PetscBool sinvert,flg;
1366: PEP_REFINE_EXPLICIT *matctx=NULL;
1367: Vec v;
1368: Mat M,P;
1369: PEP_REFINE_MATSHELL *ctx;
1372: PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1373: if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1374: /* the input tolerance is not being taken into account (by the moment) */
1375: its = *maxits;
1376: PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1377: DSGetLeadingDimension(pep->ds,&ldh);
1378: DSGetArray(pep->ds,DS_MAT_A,&H);
1379: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1380: PetscMalloc1(2*k*k,&dVS);
1381: STGetTransform(pep->st,&flg);
1382: if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1383: PetscBLASIntCast(k,&k_);
1384: PetscBLASIntCast(ldh,&ld_);
1385: PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1386: if (sinvert) {
1387: DSGetArray(pep->ds,DS_MAT_A,&H);
1388: PetscMalloc1(k,&p);
1389: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1390: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1391: SlepcCheckLapackInfo("getrf",info);
1392: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1393: SlepcCheckLapackInfo("getri",info);
1394: PetscFPTrapPop();
1395: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1396: pep->ops->backtransform = NULL;
1397: }
1398: if (sigma!=0.0) {
1399: DSGetArray(pep->ds,DS_MAT_A,&H);
1400: for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1401: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1402: pep->ops->backtransform = NULL;
1403: }
1404: }
1405: if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1406: DSGetArray(pep->ds,DS_MAT_A,&H);
1407: for (j=0;j<k;j++) {
1408: for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1409: }
1410: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1411: if (!flg) {
1412: /* Restore original values */
1413: for (i=0;i<pep->nmat;i++) {
1414: pep->pbc[pep->nmat+i] *= pep->sfactor;
1415: pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1416: }
1417: }
1418: }
1419: if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1420: for (i=0;i<k;i++) {
1421: BVGetColumn(pep->V,i,&v);
1422: VecPointwiseMult(v,v,pep->Dr);
1423: BVRestoreColumn(pep->V,i,&v);
1424: }
1425: }
1426: DSGetArray(pep->ds,DS_MAT_A,&H);
1428: NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1429: /* check if H is in Schur form */
1430: for (i=0;i<k-1;i++) {
1431: if (H[i+1+i*ldh]!=0.0) {
1432: #if !defined(PETSC_USE_COMPLEX)
1433: SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Iterative Refinement requires the complex Schur form of the projected matrix");
1434: #else
1435: SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Iterative Refinement requires an upper triangular projected matrix");
1436: #endif
1437: }
1438: }
1439: if (nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Amount of subcommunicators should not be larger than the invariant pair dimension");
1440: BVSetActiveColumns(pep->V,0,k);
1441: BVDuplicateResize(pep->V,k,&dV);
1442: PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1443: if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1444: PetscMalloc1(1,&matctx);
1445: if (nsubc>1) { /* splitting in subcommunicators */
1446: matctx->subc = pep->refinesubc;
1447: NRefSubcommSetup(pep,k,matctx,nsubc);
1448: } else matctx->subc=NULL;
1449: }
1451: /* Loop performing iterative refinements */
1452: for (i=0;i<its;i++) {
1453: /* Pre-compute the polynomial basis evaluated in H */
1454: PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1455: PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1456: /* Solve the linear system */
1457: PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1458: /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1459: PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1460: }
1461: DSRestoreArray(pep->ds,DS_MAT_A,&H);
1462: if (!flg && sinvert) {
1463: PetscFree(p);
1464: }
1465: PetscFree3(dH,fH,work);
1466: PetscFree(dVS);
1467: BVDestroy(&dV);
1468: switch (pep->scheme) {
1469: case PEP_REFINE_SCHEME_EXPLICIT:
1470: for (i=0;i<2;i++) {
1471: MatDestroy(&matctx->E[i]);
1472: }
1473: PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1474: VecDestroy(&matctx->tN);
1475: VecDestroy(&matctx->ttN);
1476: VecDestroy(&matctx->t1);
1477: if (nsubc>1) {
1478: NRefSubcommDestroy(pep,matctx);
1479: } else {
1480: VecDestroy(&matctx->vseq);
1481: VecScatterDestroy(&matctx->scatterctx);
1482: }
1483: PetscFree(matctx);
1484: KSPGetOperators(pep->refineksp,&M,NULL);
1485: MatDestroy(&M);
1486: break;
1487: case PEP_REFINE_SCHEME_MBE:
1488: BVDestroy(&matctx->W);
1489: BVDestroy(&matctx->Wt);
1490: BVDestroy(&matctx->M2);
1491: BVDestroy(&matctx->M3);
1492: MatDestroy(&matctx->M1);
1493: VecDestroy(&matctx->t);
1494: PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1495: if (nsubc>1) {
1496: NRefSubcommDestroy(pep,matctx);
1497: }
1498: PetscFree(matctx);
1499: break;
1500: case PEP_REFINE_SCHEME_SCHUR:
1501: KSPGetOperators(pep->refineksp,&M,&P);
1502: MatShellGetContext(M,&ctx);
1503: PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1504: MatDestroy(&ctx->M1);
1505: BVDestroy(&ctx->M2);
1506: BVDestroy(&ctx->M3);
1507: BVDestroy(&ctx->W);
1508: VecDestroy(&ctx->t);
1509: PetscFree(ctx);
1510: MatDestroy(&M);
1511: MatDestroy(&P);
1512: break;
1513: }
1514: PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1515: return(0);
1516: }