Actual source code: nrefine.c

slepc-3.16.2 2022-02-01
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Newton refinement for polynomial eigenproblems.

 13:    References:

 15:        [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
 16:            of invariant pairs for matrix polynomials", Linear Algebra Appl.
 17:            435(3):514-536, 2011.

 19:        [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
 20:            polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
 21:            730-745, 2016.
 22: */

 24: #include <slepc/private/pepimpl.h>
 25: #include <slepcblaslapack.h>

 27: typedef struct {
 28:   Mat          *A,M1;
 29:   BV           V,M2,M3,W;
 30:   PetscInt     k,nmat;
 31:   PetscScalar  *fih,*work,*M4;
 32:   PetscBLASInt *pM4;
 33:   PetscBool    compM1;
 34:   Vec          t;
 35: } PEP_REFINE_MATSHELL;

 37: typedef struct {
 38:   Mat          E[2],M1;
 39:   Vec          tN,ttN,t1,vseq;
 40:   VecScatter   scatterctx;
 41:   PetscBool    compM1;
 42:   PetscInt     *map0,*map1,*idxg,*idxp;
 43:   PetscSubcomm subc;
 44:   VecScatter   scatter_sub;
 45:   VecScatter   *scatter_id,*scatterp_id;
 46:   Mat          *A;
 47:   BV           V,W,M2,M3,Wt;
 48:   PetscScalar  *M4,*w,*wt,*d,*dt;
 49:   Vec          t,tg,Rv,Vi,tp,tpg;
 50:   PetscInt     idx,*cols;
 51: } PEP_REFINE_EXPLICIT;

 53: static PetscErrorCode MatMult_FS(Mat M ,Vec x,Vec y)
 54: {
 55:   PetscErrorCode      ierr;
 56:   PEP_REFINE_MATSHELL *ctx;
 57:   PetscInt            k,i;
 58:   PetscScalar         *c;
 59:   PetscBLASInt        k_,one=1,info;

 62:   MatShellGetContext(M,&ctx);
 63:   VecCopy(x,ctx->t);
 64:   k    = ctx->k;
 65:   c    = ctx->work;
 66:   PetscBLASIntCast(k,&k_);
 67:   MatMult(ctx->M1,x,y);
 68:   VecConjugate(ctx->t);
 69:   BVDotVec(ctx->M3,ctx->t,c);
 70:   for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
 71:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 72:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
 73:   PetscFPTrapPop();
 74:   SlepcCheckLapackInfo("getrs",info);
 75:   BVMultVec(ctx->M2,-1.0,1.0,y,c);
 76:   return(0);
 77: }

 79: /*
 80:   Evaluates the first d elements of the polynomial basis
 81:   on a given matrix H which is considered to be triangular
 82: */
 83: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
 84: {
 86:   PetscInt       i,j,ldfh=nm*k,off,nmat=pep->nmat;
 87:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
 88:   PetscScalar    corr=0.0,alpha,beta;
 89:   PetscBLASInt   k_,ldh_,ldfh_;

 92:   PetscBLASIntCast(ldh,&ldh_);
 93:   PetscBLASIntCast(k,&k_);
 94:   PetscBLASIntCast(ldfh,&ldfh_);
 95:   PetscArrayzero(fH,nm*k*k);
 96:   if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
 97:   if (nm>1) {
 98:     t = b[0]/a[0];
 99:     off = k;
100:     for (j=0;j<k;j++) {
101:       for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
102:       fH[j+j*ldfh] -= t;
103:     }
104:   }
105:   for (i=2;i<nm;i++) {
106:     off = i*k;
107:     if (i==2) {
108:       for (j=0;j<k;j++) {
109:         fH[off+j+j*ldfh] = 1.0;
110:         H[j+j*ldh] -= b[1];
111:       }
112:     } else {
113:       for (j=0;j<k;j++) {
114:         PetscArraycpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k);
115:         H[j+j*ldh] += corr-b[i-1];
116:       }
117:     }
118:     corr  = b[i-1];
119:     beta  = -g[i-1]/a[i-1];
120:     alpha = 1/a[i-1];
121:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
122:   }
123:   for (j=0;j<k;j++) H[j+j*ldh] += corr;
124:   return(0);
125: }

127: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,PEP_REFINE_MATSHELL *ctx)
128: {
129:   PetscErrorCode    ierr;
130:   PetscScalar       *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
131:   const PetscScalar *m3,*m2;
132:   PetscInt          i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
133:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
134:   PetscBLASInt      k_,lda_,lds_,nloc_,one=1,info;
135:   Mat               *A=ctx->A,Mk,M1=ctx->M1,P;
136:   BV                V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
137:   MatStructure      str;
138:   Vec               vc;

141:   STGetMatStructure(pep->st,&str);
142:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
143:   DHii = T12;
144:   PetscArrayzero(DHii,k*k*nmat);
145:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
146:   for (d=2;d<nmat;d++) {
147:     for (j=0;j<k;j++) {
148:       for (i=0;i<k;i++) {
149:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
150:       }
151:     }
152:   }
153:   /* T11 */
154:   if (!ctx->compM1) {
155:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
156:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
157:     for (j=1;j<nmat;j++) {
158:       MatAXPY(M1,Ts[j],A[j],str);
159:     }
160:   }

162:   /* T22 */
163:   PetscBLASIntCast(lds,&lds_);
164:   PetscBLASIntCast(k,&k_);
165:   PetscBLASIntCast(lda,&lda_);
166:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
167:   for (i=1;i<deg;i++) {
168:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
169:     s = (i==1)?0.0:1.0;
170:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
171:   }
172:   for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }

174:   /* T12 */
175:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
176:   for (i=1;i<nmat;i++) {
177:     MatDenseGetArrayWrite(Mk,&array);
178:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
179:     MatDenseRestoreArrayWrite(Mk,&array);
180:     BVSetActiveColumns(W,0,k);
181:     BVMult(W,1.0,0.0,V,Mk);
182:     if (i==1) {
183:       BVMatMult(W,A[i],M2);
184:     } else {
185:       BVMatMult(W,A[i],M3); /* using M3 as work space */
186:       BVMult(M2,1.0,1.0,M3,NULL);
187:     }
188:   }

190:   /* T21 */
191:   MatDenseGetArrayWrite(Mk,&array);
192:   for (i=1;i<deg;i++) {
193:     s = (i==1)?0.0:1.0;
194:     ss = PetscConj(fh[i]);
195:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
196:   }
197:   MatDenseRestoreArrayWrite(Mk,&array);
198:   BVSetActiveColumns(M3,0,k);
199:   BVMult(M3,1.0,0.0,V,Mk);
200:   for (i=0;i<k;i++) {
201:     BVGetColumn(M3,i,&vc);
202:     VecConjugate(vc);
203:     BVRestoreColumn(M3,i,&vc);
204:   }
205:   MatDestroy(&Mk);
206:   PetscFree3(T12,Tr,Ts);

208:   VecGetLocalSize(ctx->t,&nloc);
209:   PetscBLASIntCast(nloc,&nloc_);
210:   PetscMalloc1(nloc*k,&T);
211:   KSPGetOperators(pep->refineksp,NULL,&P);
212:   if (!ctx->compM1) { MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN); }
213:   BVGetArrayRead(ctx->M2,&m2);
214:   BVGetArrayRead(ctx->M3,&m3);
215:   VecGetArray(ctx->t,&v);
216:   for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
217:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
218:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
219:   PetscFPTrapPop();
220:   SlepcCheckLapackInfo("gesv",info);
221:   for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
222:   VecRestoreArray(ctx->t,&v);
223:   BVRestoreArrayRead(ctx->M2,&m2);
224:   BVRestoreArrayRead(ctx->M3,&m3);
225:   MatDiagonalSet(P,ctx->t,ADD_VALUES);
226:   PetscFree(T);
227:   KSPSetUp(pep->refineksp);
228:   return(0);
229: }

231: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
232: {
233:   PetscErrorCode      ierr;
234:   PetscScalar         *t0;
235:   PetscBLASInt        k_,one=1,info,lda_;
236:   PetscInt            i,lda=nmat*k;
237:   Mat                 M;
238:   PEP_REFINE_MATSHELL *ctx;

241:   KSPGetOperators(ksp,&M,NULL);
242:   MatShellGetContext(M,&ctx);
243:   PetscCalloc1(k,&t0);
244:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
245:   PetscBLASIntCast(lda,&lda_);
246:   PetscBLASIntCast(k,&k_);
247:   for (i=0;i<k;i++) t0[i] = Rh[i];
248:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
249:   SlepcCheckLapackInfo("getrs",info);
250:   BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
251:   KSPSolve(ksp,Rv,dVi);
252:   VecConjugate(dVi);
253:   BVDotVec(ctx->M3,dVi,dHi);
254:   VecConjugate(dVi);
255:   for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
256:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
257:   SlepcCheckLapackInfo("getrs",info);
258:   PetscFPTrapPop();
259:   PetscFree(t0);
260:   return(0);
261: }

263: /*
264:    Computes the residual P(H,V*S)*e_j for the polynomial
265: */
266: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
267: {
269:   PetscScalar    *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
270:   PetscReal      *a=pcf,*b=pcf+nmat,*g=b+nmat;
271:   PetscInt       i,ii,jj,lda;
272:   PetscBLASInt   lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
273:   Mat            M0;
274:   Vec            w;

277:   PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
278:   lda = k*nmat;
279:   PetscBLASIntCast(k,&k_);
280:   PetscBLASIntCast(lds,&lds_);
281:   PetscBLASIntCast(lda,&lda_);
282:   PetscBLASIntCast(nmat,&nmat_);
283:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
284:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
285:   BVSetActiveColumns(W,0,nmat);
286:   BVMult(W,1.0,0.0,V,M0);
287:   MatDestroy(&M0);

289:   BVGetColumn(W,0,&w);
290:   MatMult(A[0],w,Rv);
291:   BVRestoreColumn(W,0,&w);
292:   for (i=1;i<nmat;i++) {
293:     BVGetColumn(W,i,&w);
294:     MatMult(A[i],w,t);
295:     BVRestoreColumn(W,i,&w);
296:     VecAXPY(Rv,1.0,t);
297:   }
298:   /* Update right-hand side */
299:   if (j) {
300:     PetscBLASIntCast(ldh,&ldh_);
301:     PetscArrayzero(Z,k*k);
302:     PetscArrayzero(DS0,k*k);
303:     PetscArraycpy(Z+(j-1)*k,dH+(j-1)*k,k);
304:     /* Update DfH */
305:     for (i=1;i<nmat;i++) {
306:       if (i>1) {
307:         beta = -g[i-1];
308:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
309:         tt += -b[i-1];
310:         for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
311:         tt = b[i-1];
312:         beta = 1.0/a[i-1];
313:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
314:         F = DS0; DS0 = DS1; DS1 = F;
315:       } else {
316:         PetscArrayzero(DS1,k*k);
317:         for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
318:       }
319:       for (jj=j;jj<k;jj++) {
320:         for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
321:       }
322:     }
323:     for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
324:     /* Update right-hand side */
325:     PetscBLASIntCast(2*k,&k2_);
326:     PetscBLASIntCast(j,&j_);
327:     PetscBLASIntCast(k+rds,&krds_);
328:     c0 = DS0;
329:     PetscArrayzero(Rh,k);
330:     for (i=0;i<nmat;i++) {
331:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
332:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
333:       BVMultVec(V,1.0,0.0,t,h);
334:       BVSetActiveColumns(dV,0,rds);
335:       BVMultVec(dV,1.0,1.0,t,h+k);
336:       BVGetColumn(W,i,&w);
337:       MatMult(A[i],t,w);
338:       BVRestoreColumn(W,i,&w);
339:       if (i>0 && i<nmat-1) {
340:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
341:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
342:       }
343:     }

345:     for (i=0;i<nmat;i++) h[i] = -1.0;
346:     BVMultVec(W,1.0,1.0,Rv,h);
347:   }
348:   PetscFree4(h,DS0,DS1,Z);
349:   return(0);
350: }

352: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
353: {
355:   PetscInt       i,j,incf,incc;
356:   PetscScalar    *y,*g,*xx2,*ww,y2,*dd;
357:   Vec            v,t,xx1;
358:   BV             WW,T;

361:   PetscMalloc3(sz,&y,sz,&g,k,&xx2);
362:   if (trans) {
363:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
364:   } else {
365:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
366:   }
367:   xx1 = vw;
368:   VecCopy(x1,xx1);
369:   PetscArraycpy(xx2,x2,sz);
370:   PetscArrayzero(sol2,k);
371:   for (i=sz-1;i>=0;i--) {
372:     BVGetColumn(WW,i,&v);
373:     VecConjugate(v);
374:     VecDot(xx1,v,y+i);
375:     VecConjugate(v);
376:     BVRestoreColumn(WW,i,&v);
377:     for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
378:     y[i] = -(y[i]-xx2[i])/dd[i];
379:     BVGetColumn(T,i,&t);
380:     VecAXPY(xx1,-y[i],t);
381:     BVRestoreColumn(T,i,&t);
382:     for (j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
383:     g[i] = xx2[i];
384:   }
385:   if (trans) {
386:     KSPSolveTranspose(ksp,xx1,sol1);
387:   } else {
388:     KSPSolve(ksp,xx1,sol1);
389:   }
390:   if (trans) {
391:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
392:   } else {
393:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
394:   }
395:   for (i=0;i<sz;i++) {
396:     BVGetColumn(T,i,&t);
397:     VecConjugate(t);
398:     VecDot(sol1,t,&y2);
399:     VecConjugate(t);
400:     BVRestoreColumn(T,i,&t);
401:     for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
402:     y2 = (g[i]-y2)/dd[i];
403:     BVGetColumn(WW,i,&v);
404:     VecAXPY(sol1,-y2,v);
405:     for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
406:     sol2[i] = y[i]+y2;
407:     BVRestoreColumn(WW,i,&v);
408:   }
409:   PetscFree3(y,g,xx2);
410:   return(0);
411: }

413: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx)
414: {
416:   PetscInt       i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
417:   Mat            M1=matctx->M1,*A,*At,Mk;
418:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
419:   PetscScalar    s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
420:   PetscScalar    *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
421:   PetscBLASInt   lds_,lda_,k_;
422:   MatStructure   str;
423:   PetscBool      flg;
424:   BV             M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
425:   Vec            vc,vc2;

428:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
429:   STGetMatStructure(pep->st,&str);
430:   STGetTransform(pep->st,&flg);
431:   if (flg) {
432:     PetscMalloc1(pep->nmat,&At);
433:     for (i=0;i<pep->nmat;i++) {
434:       STGetMatrixTransformed(pep->st,i,&At[i]);
435:     }
436:   } else At = pep->A;
437:   if (matctx->subc) A = matctx->A;
438:   else A = At;
439:   /* Form the explicit system matrix */
440:   DHii = T12;
441:   PetscArrayzero(DHii,k*k*nmat);
442:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
443:   for (l=2;l<nmat;l++) {
444:     for (j=0;j<k;j++) {
445:       for (i=0;i<k;i++) {
446:         DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
447:       }
448:     }
449:   }

451:   /* T11 */
452:   if (!matctx->compM1) {
453:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
454:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
455:     for (j=1;j<nmat;j++) {
456:       MatAXPY(M1,Ts[j],A[j],str);
457:     }
458:   }

460:   /* T22 */
461:   PetscBLASIntCast(lds,&lds_);
462:   PetscBLASIntCast(k,&k_);
463:   PetscBLASIntCast(lda,&lda_);
464:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
465:   for (i=1;i<deg;i++) {
466:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
467:     s = (i==1)?0.0:1.0;
468:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
469:   }

471:   /* T12 */
472:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
473:   for (i=1;i<nmat;i++) {
474:     MatDenseGetArrayWrite(Mk,&array);
475:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
476:     MatDenseRestoreArrayWrite(Mk,&array);
477:     BVSetActiveColumns(W,0,k);
478:     BVMult(W,1.0,0.0,V,Mk);
479:     if (i==1) {
480:       BVMatMult(W,A[i],M2);
481:     } else {
482:       BVMatMult(W,A[i],M3); /* using M3 as work space */
483:       BVMult(M2,1.0,1.0,M3,NULL);
484:     }
485:   }

487:   /* T21 */
488:   MatDenseGetArrayWrite(Mk,&array);
489:   for (i=1;i<deg;i++) {
490:     s = (i==1)?0.0:1.0;
491:     ss = PetscConj(fh[i]);
492:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
493:   }
494:   MatDenseRestoreArrayWrite(Mk,&array);
495:   BVSetActiveColumns(M3,0,k);
496:   BVMult(M3,1.0,0.0,V,Mk);
497:   for (i=0;i<k;i++) {
498:     BVGetColumn(M3,i,&vc);
499:     VecConjugate(vc);
500:     BVRestoreColumn(M3,i,&vc);
501:   }

503:   KSPSetOperators(ksp,M1,M1);
504:   KSPSetUp(ksp);
505:   MatDestroy(&Mk);

507:   /* Set up for BEMW */
508:   for (i=0;i<k;i++) {
509:     BVGetColumn(M2,i,&vc);
510:     BVGetColumn(W,i,&vc2);
511:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
512:     BVRestoreColumn(M2,i,&vc);
513:     BVGetColumn(M3,i,&vc);
514:     VecConjugate(vc);
515:     VecDot(vc2,vc,&d[i]);
516:     VecConjugate(vc);
517:     BVRestoreColumn(M3,i,&vc);
518:     for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
519:     d[i] = M4[i+i*k]-d[i];
520:     BVRestoreColumn(W,i,&vc2);

522:     BVGetColumn(M3,i,&vc);
523:     BVGetColumn(Wt,i,&vc2);
524:     for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
525:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
526:     BVRestoreColumn(M3,i,&vc);
527:     BVGetColumn(M2,i,&vc);
528:     VecConjugate(vc2);
529:     VecDot(vc,vc2,&dt[i]);
530:     VecConjugate(vc2);
531:     BVRestoreColumn(M2,i,&vc);
532:     for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
533:     dt[i] = M4[i+i*k]-dt[i];
534:     BVRestoreColumn(Wt,i,&vc2);
535:   }

537:   if (flg) {
538:     PetscFree(At);
539:   }
540:   PetscFree3(T12,Tr,Ts);
541:   return(0);
542: }

544: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,PEP_REFINE_EXPLICIT *matctx,BV W)
545: {
546:   PetscErrorCode    ierr;
547:   PetscInt          i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
548:   PetscInt          *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
549:   Mat               M,*E=matctx->E,*A,*At,Mk,Md;
550:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
551:   PetscScalar       s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
552:   PetscBLASInt      lds_,lda_,k_;
553:   const PetscInt    *idxmc;
554:   const PetscScalar *valsc,*carray;
555:   MatStructure      str;
556:   Vec               vc,vc0;
557:   PetscBool         flg;

560:   PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
561:   STGetMatStructure(pep->st,&str);
562:   KSPGetOperators(ksp,&M,NULL);
563:   MatGetOwnershipRange(E[1],&n1,&m1);
564:   MatGetOwnershipRange(E[0],&n0,&m0);
565:   MatGetOwnershipRange(M,&n,NULL);
566:   PetscMalloc1(nmat,&ts);
567:   STGetTransform(pep->st,&flg);
568:   if (flg) {
569:     PetscMalloc1(pep->nmat,&At);
570:     for (i=0;i<pep->nmat;i++) {
571:       STGetMatrixTransformed(pep->st,i,&At[i]);
572:     }
573:   } else At = pep->A;
574:   if (matctx->subc) A = matctx->A;
575:   else A = At;
576:   /* Form the explicit system matrix */
577:   DHii = T12;
578:   PetscArrayzero(DHii,k*k*nmat);
579:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
580:   for (d=2;d<nmat;d++) {
581:     for (j=0;j<k;j++) {
582:       for (i=0;i<k;i++) {
583:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
584:       }
585:     }
586:   }

588:   /* T11 */
589:   if (!matctx->compM1) {
590:     MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
591:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
592:     for (j=1;j<nmat;j++) {
593:       MatAXPY(E[0],Ts[j],A[j],str);
594:     }
595:   }
596:   for (i=n0;i<m0;i++) {
597:     MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
598:     idx = n+i-n0;
599:     for (j=0;j<ncols;j++) {
600:       idxg[j] = matctx->map0[idxmc[j]];
601:     }
602:     MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
603:     MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
604:   }

606:   /* T22 */
607:   PetscBLASIntCast(lds,&lds_);
608:   PetscBLASIntCast(k,&k_);
609:   PetscBLASIntCast(lda,&lda_);
610:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
611:   for (i=1;i<deg;i++) {
612:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
613:     s = (i==1)?0.0:1.0;
614:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
615:   }
616:   for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
617:   for (i=0;i<m1-n1;i++) {
618:     idx = n+m0-n0+i;
619:     for (j=0;j<k;j++) {
620:       Tr[j] = T22[n1+i+j*k];
621:     }
622:     MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
623:   }

625:   /* T21 */
626:   for (i=1;i<deg;i++) {
627:     s = (i==1)?0.0:1.0;
628:     ss = PetscConj(fh[i]);
629:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
630:   }
631:   BVSetActiveColumns(W,0,k);
632:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
633:   BVMult(W,1.0,0.0,V,Mk);
634:   for (i=0;i<k;i++) {
635:     BVGetColumn(W,i,&vc);
636:     VecConjugate(vc);
637:     VecGetArrayRead(vc,&carray);
638:     idx = matctx->map1[i];
639:     MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
640:     VecRestoreArrayRead(vc,&carray);
641:     BVRestoreColumn(W,i,&vc);
642:   }

644:   /* T12 */
645:   for (i=1;i<nmat;i++) {
646:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
647:     for (j=0;j<k;j++) {
648:       PetscArraycpy(T12+i*k+j*lda,Ts+j*k,k);
649:     }
650:   }
651:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
652:   for (i=0;i<nmat;i++) ts[i] = 1.0;
653:   for (j=0;j<k;j++) {
654:     MatDenseGetArrayWrite(Md,&array);
655:     PetscArraycpy(array,T12+k+j*lda,(nmat-1)*k);
656:     MatDenseRestoreArrayWrite(Md,&array);
657:     BVSetActiveColumns(W,0,nmat-1);
658:     BVMult(W,1.0,0.0,V,Md);
659:     for (i=nmat-1;i>0;i--) {
660:       BVGetColumn(W,i-1,&vc0);
661:       BVGetColumn(W,i,&vc);
662:       MatMult(A[i],vc0,vc);
663:       BVRestoreColumn(W,i-1,&vc0);
664:       BVRestoreColumn(W,i,&vc);
665:     }
666:     BVSetActiveColumns(W,1,nmat);
667:     BVGetColumn(W,0,&vc0);
668:     BVMultVec(W,1.0,0.0,vc0,ts);
669:     VecGetArrayRead(vc0,&carray);
670:     idx = matctx->map1[j];
671:     MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
672:     VecRestoreArrayRead(vc0,&carray);
673:     BVRestoreColumn(W,0,&vc0);
674:   }
675:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
676:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
677:   KSPSetOperators(ksp,M,M);
678:   KSPSetUp(ksp);
679:   PetscFree(ts);
680:   MatDestroy(&Mk);
681:   MatDestroy(&Md);
682:   if (flg) {
683:     PetscFree(At);
684:   }
685:   PetscFree5(T22,T21,T12,Tr,Ts);
686:   return(0);
687: }

689: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx)
690: {
691:   PetscErrorCode    ierr;
692:   PetscInt          n0,m0,n1,m1,i;
693:   PetscScalar       *arrayV;
694:   const PetscScalar *array;

697:   MatGetOwnershipRange(matctx->E[1],&n1,&m1);
698:   MatGetOwnershipRange(matctx->E[0],&n0,&m0);

700:   /* Right side */
701:   VecGetArrayRead(Rv,&array);
702:   VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
703:   VecRestoreArrayRead(Rv,&array);
704:   VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
705:   VecAssemblyBegin(matctx->tN);
706:   VecAssemblyEnd(matctx->tN);

708:   /* Solve */
709:   KSPSolve(ksp,matctx->tN,matctx->ttN);

711:   /* Retrieve solution */
712:   VecGetArray(dVi,&arrayV);
713:   VecGetArrayRead(matctx->ttN,&array);
714:   PetscArraycpy(arrayV,array,m0-n0);
715:   VecRestoreArray(dVi,&arrayV);
716:   if (!matctx->subc) {
717:     VecGetArray(matctx->t1,&arrayV);
718:     for (i=0;i<m1-n1;i++) arrayV[i] =  array[m0-n0+i];
719:     VecRestoreArray(matctx->t1,&arrayV);
720:     VecRestoreArrayRead(matctx->ttN,&array);
721:     VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
722:     VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
723:     VecGetArrayRead(matctx->vseq,&array);
724:     for (i=0;i<k;i++) dHi[i] = array[i];
725:     VecRestoreArrayRead(matctx->vseq,&array);
726:   }
727:   return(0);
728: }

730: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,PEP_REFINE_EXPLICIT *matctx,BV W)
731: {
732:   PetscErrorCode      ierr;
733:   PetscInt            j,m,lda=pep->nmat*k,n0,m0,idx;
734:   PetscMPIInt         root,len;
735:   PetscScalar         *array2,h;
736:   const PetscScalar   *array;
737:   Vec                 R,Vi;
738:   PEP_REFINE_MATSHELL *ctx;
739:   Mat                 M;

742:   if (!matctx || !matctx->subc) {
743:     for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
744:     h   = H[i+i*ldh];
745:     idx = i;
746:     R   = Rv;
747:     Vi  = dVi;
748:     switch (pep->scheme) {
749:     case PEP_REFINE_SCHEME_EXPLICIT:
750:       NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
751:       matctx->compM1 = PETSC_FALSE;
752:       break;
753:     case PEP_REFINE_SCHEME_MBE:
754:       NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
755:       matctx->compM1 = PETSC_FALSE;
756:       break;
757:     case PEP_REFINE_SCHEME_SCHUR:
758:       KSPGetOperators(ksp,&M,NULL);
759:       MatShellGetContext(M,&ctx);
760:       NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
761:       ctx->compM1 = PETSC_FALSE;
762:       break;
763:     }
764:   } else {
765:     if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
766:       for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
767:       h = H[idx+idx*ldh];
768:       matctx->idx = idx;
769:       switch (pep->scheme) {
770:       case PEP_REFINE_SCHEME_EXPLICIT:
771:         NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
772:         matctx->compM1 = PETSC_FALSE;
773:         break;
774:       case PEP_REFINE_SCHEME_MBE:
775:         NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
776:         matctx->compM1 = PETSC_FALSE;
777:         break;
778:       case PEP_REFINE_SCHEME_SCHUR:
779:         break;
780:       }
781:     } else idx = matctx->idx;
782:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
783:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
784:     VecGetArrayRead(matctx->tg,&array);
785:     VecPlaceArray(matctx->t,array);
786:     VecCopy(matctx->t,matctx->Rv);
787:     VecResetArray(matctx->t);
788:     VecRestoreArrayRead(matctx->tg,&array);
789:     R  = matctx->Rv;
790:     Vi = matctx->Vi;
791:   }
792:   if (idx==i && idx<k) {
793:     switch (pep->scheme) {
794:       case PEP_REFINE_SCHEME_EXPLICIT:
795:         NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
796:         break;
797:       case PEP_REFINE_SCHEME_MBE:
798:         NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
799:         break;
800:       case PEP_REFINE_SCHEME_SCHUR:
801:         NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
802:         break;
803:     }
804:   }
805:   if (matctx && matctx->subc) {
806:     VecGetLocalSize(Vi,&m);
807:     VecGetArrayRead(Vi,&array);
808:     VecGetArray(matctx->tg,&array2);
809:     PetscArraycpy(array2,array,m);
810:     VecRestoreArray(matctx->tg,&array2);
811:     VecRestoreArrayRead(Vi,&array);
812:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
813:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
814:     switch (pep->scheme) {
815:     case PEP_REFINE_SCHEME_EXPLICIT:
816:       MatGetOwnershipRange(matctx->E[0],&n0,&m0);
817:       VecGetArrayRead(matctx->ttN,&array);
818:       VecPlaceArray(matctx->tp,array+m0-n0);
819:       VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
820:       VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
821:       VecResetArray(matctx->tp);
822:       VecRestoreArrayRead(matctx->ttN,&array);
823:       VecGetArrayRead(matctx->tpg,&array);
824:       for (j=0;j<k;j++) dHi[j] = array[j];
825:       VecRestoreArrayRead(matctx->tpg,&array);
826:       break;
827:      case PEP_REFINE_SCHEME_MBE:
828:       root = 0;
829:       for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
830:       PetscMPIIntCast(k,&len);
831:       MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
832:       break;
833:     case PEP_REFINE_SCHEME_SCHUR:
834:       break;
835:     }
836:   }
837:   return(0);
838: }

840: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,PEP_REFINE_EXPLICIT *matctx)
841: {
842:   PetscErrorCode      ierr;
843:   PetscInt            i,nmat=pep->nmat,lda=nmat*k;
844:   PetscScalar         *fh,*Rh,*DfH;
845:   PetscReal           norm;
846:   BV                  W;
847:   Vec                 Rv,t,dvi;
848:   PEP_REFINE_MATSHELL *ctx;
849:   Mat                 M,*At;
850:   PetscBool           flg,lindep;

853:   PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
854:   *rds = 0;
855:   BVCreateVec(pep->V,&Rv);
856:   switch (pep->scheme) {
857:   case PEP_REFINE_SCHEME_EXPLICIT:
858:     BVCreateVec(pep->V,&t);
859:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
860:     PetscMalloc1(nmat,&fh);
861:     break;
862:   case PEP_REFINE_SCHEME_MBE:
863:     if (matctx->subc) {
864:       BVCreateVec(pep->V,&t);
865:       BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
866:     } else {
867:       W = matctx->W;
868:       PetscObjectReference((PetscObject)W);
869:       t = matctx->t;
870:       PetscObjectReference((PetscObject)t);
871:     }
872:     BVScale(matctx->W,0.0);
873:     BVScale(matctx->Wt,0.0);
874:     BVScale(matctx->M2,0.0);
875:     BVScale(matctx->M3,0.0);
876:     PetscMalloc1(nmat,&fh);
877:     break;
878:   case PEP_REFINE_SCHEME_SCHUR:
879:     KSPGetOperators(ksp,&M,NULL);
880:     MatShellGetContext(M,&ctx);
881:     BVCreateVec(pep->V,&t);
882:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
883:     fh = ctx->fih;
884:     break;
885:   }
886:   PetscArrayzero(dVS,2*k*k);
887:   PetscArrayzero(DfH,lda*k);
888:   STGetTransform(pep->st,&flg);
889:   if (flg) {
890:     PetscMalloc1(pep->nmat,&At);
891:     for (i=0;i<pep->nmat;i++) {
892:       STGetMatrixTransformed(pep->st,i,&At[i]);
893:     }
894:   } else At = pep->A;

896:   /* Main loop for computing the i-th columns of dX and dS */
897:   for (i=0;i<k;i++) {
898:     /* Compute and update i-th column of the right hand side */
899:     PetscArrayzero(Rh,k);
900:     NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);

902:     /* Update and solve system */
903:     BVGetColumn(dV,i,&dvi);
904:     NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
905:     /* Orthogonalize computed solution */
906:     BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
907:     BVRestoreColumn(dV,i,&dvi);
908:     if (!lindep) {
909:       BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
910:       if (!lindep) {
911:         dVS[k+i+i*2*k] = norm;
912:         BVScaleColumn(dV,i,1.0/norm);
913:         (*rds)++;
914:       }
915:     }
916:   }
917:   BVSetActiveColumns(dV,0,*rds);
918:   VecDestroy(&t);
919:   VecDestroy(&Rv);
920:   BVDestroy(&W);
921:   if (flg) {
922:     PetscFree(At);
923:   }
924:   PetscFree2(DfH,Rh);
925:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) { PetscFree(fh); }
926:   return(0);
927: }

929: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
930: {
932:   PetscInt       j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
933:   PetscScalar    *G,*tau,sone=1.0,zero=0.0,*work;
934:   PetscBLASInt   lds_,k_,ldh_,info,ldg_,lda_;

937:   PetscMalloc3(k,&tau,k,&work,deg*k*k,&G);
938:   PetscBLASIntCast(lds,&lds_);
939:   PetscBLASIntCast(lda,&lda_);
940:   PetscBLASIntCast(k,&k_);

942:   /* Form auxiliary matrix for the orthogonalization step */
943:   ldg = deg*k;
944:   PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
945:   PetscBLASIntCast(ldg,&ldg_);
946:   PetscBLASIntCast(ldh,&ldh_);
947:   for (j=0;j<deg;j++) {
948:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
949:   }
950:   /* Orthogonalize and update S */
951:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
952:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
953:   PetscFPTrapPop();
954:   SlepcCheckLapackInfo("geqrf",info);
955:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));

957:   /* Update H */
958:   PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
959:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
960:   PetscFree3(tau,work,G);
961:   return(0);
962: }

964: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
965: {
967:   PetscInt       i,j,nmat=pep->nmat,lda=nmat*k;
968:   PetscScalar    *tau,*array,*work;
969:   PetscBLASInt   lds_,k_,lda_,ldh_,kdrs_,info,k2_;
970:   Mat            M0;

973:   PetscMalloc2(k,&tau,k,&work);
974:   PetscBLASIntCast(lds,&lds_);
975:   PetscBLASIntCast(lda,&lda_);
976:   PetscBLASIntCast(ldh,&ldh_);
977:   PetscBLASIntCast(k,&k_);
978:   PetscBLASIntCast(2*k,&k2_);
979:   PetscBLASIntCast((k+rds),&kdrs_);
980:   /* Update H */
981:   for (j=0;j<k;j++) {
982:     for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
983:   }
984:   /* Update V */
985:   for (j=0;j<k;j++) {
986:     for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
987:     for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
988:   }
989:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
990:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
991:   SlepcCheckLapackInfo("geqrf",info);
992:   /* Copy triangular matrix in S */
993:   for (j=0;j<k;j++) {
994:     for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
995:     for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
996:   }
997:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
998:   SlepcCheckLapackInfo("orgqr",info);
999:   PetscFPTrapPop();
1000:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
1001:   MatDenseGetArrayWrite(M0,&array);
1002:   for (j=0;j<k;j++) {
1003:     PetscArraycpy(array+j*k,dVS+j*2*k,k);
1004:   }
1005:   MatDenseRestoreArrayWrite(M0,&array);
1006:   BVMultInPlace(pep->V,M0,0,k);
1007:   if (rds) {
1008:     MatDenseGetArrayWrite(M0,&array);
1009:     for (j=0;j<k;j++) {
1010:       PetscArraycpy(array+j*k,dVS+k+j*2*k,rds);
1011:     }
1012:     MatDenseRestoreArrayWrite(M0,&array);
1013:     BVMultInPlace(dV,M0,0,k);
1014:     BVMult(pep->V,1.0,1.0,dV,NULL);
1015:   }
1016:   MatDestroy(&M0);
1017:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1018:   PetscFree2(tau,work);
1019:   return(0);
1020: }

1022: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PEP_REFINE_EXPLICIT *matctx,PetscBool ini)
1023: {
1024:   PetscErrorCode      ierr;
1025:   PEP_REFINE_MATSHELL *ctx;
1026:   PetscScalar         t,*coef;
1027:   const PetscScalar   *array;
1028:   MatStructure        str;
1029:   PetscInt            j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
1030:   MPI_Comm            comm;
1031:   PetscMPIInt         np;
1032:   const PetscInt      *rgs0,*rgs1;
1033:   Mat                 B,C,*E,*A,*At;
1034:   IS                  is1,is2;
1035:   Vec                 v;
1036:   PetscBool           flg;
1037:   Mat                 M,P;

1040:   PetscMalloc1(nmat,&coef);
1041:   STGetTransform(pep->st,&flg);
1042:   if (flg) {
1043:     PetscMalloc1(pep->nmat,&At);
1044:     for (i=0;i<pep->nmat;i++) {
1045:       STGetMatrixTransformed(pep->st,i,&At[i]);
1046:     }
1047:   } else At = pep->A;
1048:   switch (pep->scheme) {
1049:   case PEP_REFINE_SCHEME_EXPLICIT:
1050:     if (ini) {
1051:       if (matctx->subc) {
1052:         A = matctx->A;
1053:         comm = PetscSubcommChild(matctx->subc);
1054:       } else {
1055:         A = At;
1056:         PetscObjectGetComm((PetscObject)pep,&comm);
1057:       }
1058:       E = matctx->E;
1059:       STGetMatStructure(pep->st,&str);
1060:       MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1061:       j = (matctx->subc)?matctx->subc->color:0;
1062:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1063:       for (j=1;j<nmat;j++) {
1064:         MatAXPY(E[0],coef[j],A[j],str);
1065:       }
1066:       MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1067:       MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
1068:       MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
1069:       MatGetOwnershipRange(E[0],&n0,&m0);
1070:       MatGetOwnershipRange(E[1],&n1,&m1);
1071:       MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1072:       MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1073:       /* T12 and T21 are computed from V and V*, so,
1074:          they must have the same column and row ranges */
1075:       if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Inconsistent dimensions");
1076:       MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1077:       MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1078:       MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1079:       MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1080:       MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1081:       MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1082:       MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1083:       MatDestroy(&B);
1084:       MatDestroy(&C);
1085:       matctx->compM1 = PETSC_TRUE;
1086:       MatGetSize(E[0],NULL,&N0);
1087:       MatGetSize(E[1],NULL,&N1);
1088:       MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1089:       MatGetOwnershipRanges(E[0],&rgs0);
1090:       MatGetOwnershipRanges(E[1],&rgs1);
1091:       PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1092:       /* Create column (and row) mapping */
1093:       for (p=0;p<np;p++) {
1094:         for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1095:         for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1096:       }
1097:       MatCreateVecs(M,NULL,&matctx->tN);
1098:       MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1099:       VecDuplicate(matctx->tN,&matctx->ttN);
1100:       if (matctx->subc) {
1101:         MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1102:         count = np*k;
1103:         PetscMalloc2(count,&idx1,count,&idx2);
1104:         VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1105:         VecGetOwnershipRange(matctx->tp,&l0,NULL);
1106:         VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1107:         for (si=0;si<matctx->subc->n;si++) {
1108:           if (matctx->subc->color==si) {
1109:             j=0;
1110:             if (matctx->subc->color==si) {
1111:               for (p=0;p<np;p++) {
1112:                 for (i=n1;i<m1;i++) {
1113:                   idx1[j] = l0+i-n1;
1114:                   idx2[j++] =p*k+i;
1115:                 }
1116:               }
1117:             }
1118:             count = np*(m1-n1);
1119:           } else count =0;
1120:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1121:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1122:           VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1123:           ISDestroy(&is1);
1124:           ISDestroy(&is2);
1125:         }
1126:         PetscFree2(idx1,idx2);
1127:       } else {
1128:         VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1129:       }
1130:       P = M;
1131:     } else {
1132:       if (matctx->subc) {
1133:         /* Scatter vectors pep->V */
1134:         for (i=0;i<k;i++) {
1135:           BVGetColumn(pep->V,i,&v);
1136:           VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1137:           VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1138:           BVRestoreColumn(pep->V,i,&v);
1139:           VecGetArrayRead(matctx->tg,&array);
1140:           VecPlaceArray(matctx->t,(const PetscScalar*)array);
1141:           BVInsertVec(matctx->V,i,matctx->t);
1142:           VecResetArray(matctx->t);
1143:           VecRestoreArrayRead(matctx->tg,&array);
1144:         }
1145:       }
1146:     }
1147:     break;
1148:   case PEP_REFINE_SCHEME_MBE:
1149:     if (ini) {
1150:       if (matctx->subc) {
1151:         A = matctx->A;
1152:         comm = PetscSubcommChild(matctx->subc);
1153:       } else {
1154:         matctx->V = pep->V;
1155:         A = At;
1156:         PetscObjectGetComm((PetscObject)pep,&comm);
1157:         MatCreateVecs(pep->A[0],&matctx->t,NULL);
1158:       }
1159:       STGetMatStructure(pep->st,&str);
1160:       MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1161:       j = (matctx->subc)?matctx->subc->color:0;
1162:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1163:       for (j=1;j<nmat;j++) {
1164:         MatAXPY(matctx->M1,coef[j],A[j],str);
1165:       }
1166:       BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1167:       BVDuplicateResize(matctx->V,k,&matctx->M2);
1168:       BVDuplicate(matctx->M2,&matctx->M3);
1169:       BVDuplicate(matctx->M2,&matctx->Wt);
1170:       PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1171:       matctx->compM1 = PETSC_TRUE;
1172:       M = matctx->M1;
1173:       P = M;
1174:     }
1175:     break;
1176:   case PEP_REFINE_SCHEME_SCHUR:
1177:     if (ini) {
1178:       PetscObjectGetComm((PetscObject)pep,&comm);
1179:       MatGetSize(At[0],&m0,&n0);
1180:       PetscMalloc1(1,&ctx);
1181:       STGetMatStructure(pep->st,&str);
1182:       /* Create a shell matrix to solve the linear system */
1183:       ctx->V = pep->V;
1184:       ctx->k = k; ctx->nmat = nmat;
1185:       PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1186:       for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1187:       PetscArrayzero(ctx->M4,k*k);
1188:       MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1189:       MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatMult_FS);
1190:       BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1191:       BVDuplicateResize(ctx->V,k,&ctx->M2);
1192:       BVDuplicate(ctx->M2,&ctx->M3);
1193:       BVCreateVec(pep->V,&ctx->t);
1194:       MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1195:       PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1196:       for (j=1;j<nmat;j++) {
1197:         MatAXPY(ctx->M1,coef[j],At[j],str);
1198:       }
1199:       MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1200:       /* Compute a precond matrix for the system */
1201:       t = H[0];
1202:       PEPEvaluateBasis(pep,t,0,coef,NULL);
1203:       for (j=1;j<nmat;j++) {
1204:         MatAXPY(P,coef[j],At[j],str);
1205:       }
1206:       ctx->compM1 = PETSC_TRUE;
1207:     }
1208:     break;
1209:   }
1210:   if (ini) {
1211:     PEPRefineGetKSP(pep,&pep->refineksp);
1212:     KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE);
1213:     KSPSetOperators(pep->refineksp,M,P);
1214:     KSPSetFromOptions(pep->refineksp);
1215:   }

1217:   if (!ini && matctx && matctx->subc) {
1218:      /* Scatter vectors pep->V */
1219:     for (i=0;i<k;i++) {
1220:       BVGetColumn(pep->V,i,&v);
1221:       VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1222:       VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1223:       BVRestoreColumn(pep->V,i,&v);
1224:       VecGetArrayRead(matctx->tg,&array);
1225:       VecPlaceArray(matctx->t,(const PetscScalar*)array);
1226:       BVInsertVec(matctx->V,i,matctx->t);
1227:       VecResetArray(matctx->t);
1228:       VecRestoreArrayRead(matctx->tg,&array);
1229:     }
1230:    }
1231:   PetscFree(coef);
1232:   if (flg) {
1233:     PetscFree(At);
1234:   }
1235:   return(0);
1236: }

1238: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,PEP_REFINE_EXPLICIT *matctx,PetscInt nsubc)
1239: {
1240:   PetscErrorCode    ierr;
1241:   PetscInt          i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1242:   IS                is1,is2;
1243:   BVType            type;
1244:   Vec               v;
1245:   const PetscScalar *array;
1246:   Mat               *A;
1247:   PetscBool         flg;

1250:   STGetTransform(pep->st,&flg);
1251:   if (flg) {
1252:     PetscMalloc1(pep->nmat,&A);
1253:     for (i=0;i<pep->nmat;i++) {
1254:       STGetMatrixTransformed(pep->st,i,&A[i]);
1255:     }
1256:   } else A = pep->A;

1258:   /* Duplicate pep matrices */
1259:   PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1260:   for (i=0;i<pep->nmat;i++) {
1261:     MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1262:     PetscLogObjectParent((PetscObject)pep,(PetscObject)matctx->A[i]);
1263:   }

1265:   /* Create Scatter */
1266:   MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1267:   MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1268:   VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1269:   BVGetColumn(pep->V,0,&v);
1270:   VecGetOwnershipRange(v,&n0,&m0);
1271:   nloc0 = m0-n0;
1272:   PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1273:   j = 0;
1274:   for (si=0;si<matctx->subc->n;si++) {
1275:     for (i=n0;i<m0;i++) {
1276:       idx1[j]   = i;
1277:       idx2[j++] = i+pep->n*si;
1278:     }
1279:   }
1280:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1281:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1282:   VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1283:   ISDestroy(&is1);
1284:   ISDestroy(&is2);
1285:   for (si=0;si<matctx->subc->n;si++) {
1286:     j=0;
1287:     for (i=n0;i<m0;i++) {
1288:       idx1[j] = i;
1289:       idx2[j++] = i+pep->n*si;
1290:     }
1291:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1292:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1293:     VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1294:     ISDestroy(&is1);
1295:     ISDestroy(&is2);
1296:   }
1297:   BVRestoreColumn(pep->V,0,&v);
1298:   PetscFree2(idx1,idx2);

1300:   /* Duplicate pep->V vecs */
1301:   BVGetType(pep->V,&type);
1302:   BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1303:   BVSetType(matctx->V,type);
1304:   BVSetSizesFromVec(matctx->V,matctx->t,k);
1305:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1306:     BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1307:   }
1308:   for (i=0;i<k;i++) {
1309:     BVGetColumn(pep->V,i,&v);
1310:     VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1311:     VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1312:     BVRestoreColumn(pep->V,i,&v);
1313:     VecGetArrayRead(matctx->tg,&array);
1314:     VecPlaceArray(matctx->t,(const PetscScalar*)array);
1315:     BVInsertVec(matctx->V,i,matctx->t);
1316:     VecResetArray(matctx->t);
1317:     VecRestoreArrayRead(matctx->tg,&array);
1318:   }

1320:   VecDuplicate(matctx->t,&matctx->Rv);
1321:   VecDuplicate(matctx->t,&matctx->Vi);
1322:   if (flg) {
1323:     PetscFree(A);
1324:   }
1325:   return(0);
1326: }

1328: static PetscErrorCode NRefSubcommDestroy(PEP pep,PEP_REFINE_EXPLICIT *matctx)
1329: {
1331:   PetscInt       i;

1334:   VecScatterDestroy(&matctx->scatter_sub);
1335:   for (i=0;i<matctx->subc->n;i++) {
1336:     VecScatterDestroy(&matctx->scatter_id[i]);
1337:   }
1338:   for (i=0;i<pep->nmat;i++) {
1339:     MatDestroy(&matctx->A[i]);
1340:   }
1341:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1342:     for (i=0;i<matctx->subc->n;i++) {
1343:       VecScatterDestroy(&matctx->scatterp_id[i]);
1344:     }
1345:     VecDestroy(&matctx->tp);
1346:     VecDestroy(&matctx->tpg);
1347:     BVDestroy(&matctx->W);
1348:   }
1349:   PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1350:   BVDestroy(&matctx->V);
1351:   VecDestroy(&matctx->t);
1352:   VecDestroy(&matctx->tg);
1353:   VecDestroy(&matctx->Rv);
1354:   VecDestroy(&matctx->Vi);
1355:   return(0);
1356: }

1358: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1359: {
1360:   PetscErrorCode      ierr;
1361:   PetscScalar         *H,*work,*dH,*fH,*dVS;
1362:   PetscInt            ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1363:   PetscBLASInt        k_,ld_,*p,info;
1364:   BV                  dV;
1365:   PetscBool           sinvert,flg;
1366:   PEP_REFINE_EXPLICIT *matctx=NULL;
1367:   Vec                 v;
1368:   Mat                 M,P;
1369:   PEP_REFINE_MATSHELL *ctx;

1372:   PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1373:   if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1374:   /* the input tolerance is not being taken into account (by the moment) */
1375:   its = *maxits;
1376:   PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1377:   DSGetLeadingDimension(pep->ds,&ldh);
1378:   DSGetArray(pep->ds,DS_MAT_A,&H);
1379:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1380:   PetscMalloc1(2*k*k,&dVS);
1381:   STGetTransform(pep->st,&flg);
1382:   if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1383:     PetscBLASIntCast(k,&k_);
1384:     PetscBLASIntCast(ldh,&ld_);
1385:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1386:     if (sinvert) {
1387:       DSGetArray(pep->ds,DS_MAT_A,&H);
1388:       PetscMalloc1(k,&p);
1389:       PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1390:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1391:       SlepcCheckLapackInfo("getrf",info);
1392:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1393:       SlepcCheckLapackInfo("getri",info);
1394:       PetscFPTrapPop();
1395:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1396:       pep->ops->backtransform = NULL;
1397:     }
1398:     if (sigma!=0.0) {
1399:       DSGetArray(pep->ds,DS_MAT_A,&H);
1400:       for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1401:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1402:       pep->ops->backtransform = NULL;
1403:     }
1404:   }
1405:   if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1406:     DSGetArray(pep->ds,DS_MAT_A,&H);
1407:     for (j=0;j<k;j++) {
1408:       for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1409:     }
1410:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
1411:     if (!flg) {
1412:       /* Restore original values */
1413:       for (i=0;i<pep->nmat;i++) {
1414:         pep->pbc[pep->nmat+i] *= pep->sfactor;
1415:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1416:       }
1417:     }
1418:   }
1419:   if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1420:     for (i=0;i<k;i++) {
1421:       BVGetColumn(pep->V,i,&v);
1422:       VecPointwiseMult(v,v,pep->Dr);
1423:       BVRestoreColumn(pep->V,i,&v);
1424:     }
1425:   }
1426:   DSGetArray(pep->ds,DS_MAT_A,&H);

1428:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1429:   /* check if H is in Schur form */
1430:   for (i=0;i<k-1;i++) {
1431:     if (H[i+1+i*ldh]!=0.0) {
1432: #if !defined(PETSC_USE_COMPLEX)
1433:       SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Iterative Refinement requires the complex Schur form of the projected matrix");
1434: #else
1435:       SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Iterative Refinement requires an upper triangular projected matrix");
1436: #endif
1437:     }
1438:   }
1439:   if (nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Amount of subcommunicators should not be larger than the invariant pair dimension");
1440:   BVSetActiveColumns(pep->V,0,k);
1441:   BVDuplicateResize(pep->V,k,&dV);
1442:   PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1443:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1444:     PetscMalloc1(1,&matctx);
1445:     if (nsubc>1) { /* splitting in subcommunicators */
1446:       matctx->subc = pep->refinesubc;
1447:       NRefSubcommSetup(pep,k,matctx,nsubc);
1448:     } else matctx->subc=NULL;
1449:   }

1451:   /* Loop performing iterative refinements */
1452:   for (i=0;i<its;i++) {
1453:     /* Pre-compute the polynomial basis evaluated in H */
1454:     PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1455:     PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1456:     /* Solve the linear system */
1457:     PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1458:     /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1459:     PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1460:   }
1461:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1462:   if (!flg && sinvert) {
1463:     PetscFree(p);
1464:   }
1465:   PetscFree3(dH,fH,work);
1466:   PetscFree(dVS);
1467:   BVDestroy(&dV);
1468:   switch (pep->scheme) {
1469:   case PEP_REFINE_SCHEME_EXPLICIT:
1470:     for (i=0;i<2;i++) {
1471:       MatDestroy(&matctx->E[i]);
1472:     }
1473:     PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1474:     VecDestroy(&matctx->tN);
1475:     VecDestroy(&matctx->ttN);
1476:     VecDestroy(&matctx->t1);
1477:     if (nsubc>1) {
1478:       NRefSubcommDestroy(pep,matctx);
1479:     } else {
1480:       VecDestroy(&matctx->vseq);
1481:       VecScatterDestroy(&matctx->scatterctx);
1482:     }
1483:     PetscFree(matctx);
1484:     KSPGetOperators(pep->refineksp,&M,NULL);
1485:     MatDestroy(&M);
1486:     break;
1487:   case PEP_REFINE_SCHEME_MBE:
1488:     BVDestroy(&matctx->W);
1489:     BVDestroy(&matctx->Wt);
1490:     BVDestroy(&matctx->M2);
1491:     BVDestroy(&matctx->M3);
1492:     MatDestroy(&matctx->M1);
1493:     VecDestroy(&matctx->t);
1494:     PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1495:     if (nsubc>1) {
1496:       NRefSubcommDestroy(pep,matctx);
1497:     }
1498:     PetscFree(matctx);
1499:     break;
1500:   case PEP_REFINE_SCHEME_SCHUR:
1501:     KSPGetOperators(pep->refineksp,&M,&P);
1502:     MatShellGetContext(M,&ctx);
1503:     PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1504:     MatDestroy(&ctx->M1);
1505:     BVDestroy(&ctx->M2);
1506:     BVDestroy(&ctx->M3);
1507:     BVDestroy(&ctx->W);
1508:     VecDestroy(&ctx->t);
1509:     PetscFree(ctx);
1510:     MatDestroy(&M);
1511:     MatDestroy(&P);
1512:     break;
1513:   }
1514:   PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1515:   return(0);
1516: }