Actual source code: nrefine.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Newton refinement for polynomial eigenproblems.

 13:    References:

 15:        [1] T. Betcke and D. Kressner, "Perturbation, extraction and refinement
 16:            of invariant pairs for matrix polynomials", Linear Algebra Appl.
 17:            435(3):514-536, 2011.

 19:        [2] C. Campos and J.E. Roman, "Parallel iterative refinement in
 20:            polynomial eigenvalue problems", Numer. Linear Algebra Appl. 23(4):
 21:            730-745, 2016.
 22: */

 24: #include <slepc/private/pepimpl.h>
 25: #include <slepcblaslapack.h>

 27: typedef struct {
 28:   Mat          *A,M1;
 29:   BV           V,M2,M3,W;
 30:   PetscInt     k,nmat;
 31:   PetscScalar  *fih,*work,*M4;
 32:   PetscBLASInt *pM4;
 33:   PetscBool    compM1;
 34:   Vec          t;
 35: } FSubctx;

 37: typedef struct {
 38:   Mat          E[2],M1;
 39:   Vec          tN,ttN,t1,vseq;
 40:   VecScatter   scatterctx;
 41:   PetscBool    compM1;
 42:   PetscInt     *map0,*map1,*idxg,*idxp;
 43:   PetscSubcomm subc;
 44:   VecScatter   scatter_sub;
 45:   VecScatter   *scatter_id,*scatterp_id;
 46:   Mat          *A;
 47:   BV           V,W,M2,M3,Wt;
 48:   PetscScalar  *M4,*w,*wt,*d,*dt;
 49:   Vec          t,tg,Rv,Vi,tp,tpg;
 50:   PetscInt     idx,*cols;
 51: } MatExplicitCtx;

 53: static PetscErrorCode MatFSMult(Mat M ,Vec x,Vec y)
 54: {
 55: #if defined(PETSC_MISSING_LAPACK_GETRS)
 57:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable");
 58: #else
 60:   FSubctx        *ctx;
 61:   PetscInt       k,i;
 62:   PetscScalar    *c;
 63:   PetscBLASInt   k_,one=1,info;

 66:   MatShellGetContext(M,&ctx);
 67:   VecCopy(x,ctx->t);
 68:   k    = ctx->k;
 69:   c    = ctx->work;
 70:   PetscBLASIntCast(k,&k_);
 71:   MatMult(ctx->M1,x,y);
 72:   VecConjugate(ctx->t);
 73:   BVDotVec(ctx->M3,ctx->t,c);
 74:   for (i=0;i<k;i++) c[i] = PetscConj(c[i]);
 75:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,c,&k_,&info));
 76:   SlepcCheckLapackInfo("getrs",info);
 77:   BVMultVec(ctx->M2,-1.0,1.0,y,c);
 78:   return(0);
 79: #endif
 80: }

 82: /*
 83:   Evaluates the first d elements of the polynomial basis
 84:   on a given matrix H which is considered to be triangular
 85: */
 86: static PetscErrorCode PEPEvaluateBasisforMatrix(PEP pep,PetscInt nm,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH)
 87: {
 89:   PetscInt       i,j,ldfh=nm*k,off,nmat=pep->nmat;
 90:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat,t;
 91:   PetscScalar    corr=0.0,alpha,beta;
 92:   PetscBLASInt   k_,ldh_,ldfh_;

 95:   PetscBLASIntCast(ldh,&ldh_);
 96:   PetscBLASIntCast(k,&k_);
 97:   PetscBLASIntCast(ldfh,&ldfh_);
 98:   PetscMemzero(fH,nm*k*k*sizeof(PetscScalar));
 99:   if (nm>0) for (j=0;j<k;j++) fH[j+j*ldfh] = 1.0;
100:   if (nm>1) {
101:     t = b[0]/a[0];
102:     off = k;
103:     for (j=0;j<k;j++) {
104:       for (i=0;i<k;i++) fH[off+i+j*ldfh] = H[i+j*ldh]/a[0];
105:       fH[j+j*ldfh] -= t;
106:     }
107:   }
108:   for (i=2;i<nm;i++) {
109:     off = i*k;
110:     if (i==2) {
111:       for (j=0;j<k;j++) {
112:         fH[off+j+j*ldfh] = 1.0;
113:         H[j+j*ldh] -= b[1];
114:       }
115:     } else {
116:       for (j=0;j<k;j++) {
117:         PetscMemcpy(fH+off+j*ldfh,fH+(i-2)*k+j*ldfh,k*sizeof(PetscScalar));
118:         H[j+j*ldh] += corr-b[i-1];
119:       }
120:     }
121:     corr  = b[i-1];
122:     beta  = -g[i-1]/a[i-1];
123:     alpha = 1/a[i-1];
124:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&alpha,H,&ldh_,fH+(i-1)*k,&ldfh_,&beta,fH+off,&ldfh_));
125:   }
126:   for (j=0;j<k;j++) H[j+j*ldh] += corr;
127:   return(0);
128: }

130: static PetscErrorCode NRefSysSetup_shell(PEP pep,PetscInt k,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,FSubctx *ctx)
131: {
132: #if defined(PETSC_MISSING_LAPACK_GESV)
134:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV - Lapack routine is unavailable");
135: #else
136:   PetscErrorCode    ierr;
137:   PetscScalar       *DHii,*T12,*Tr,*Ts,*array,s,ss,sone=1.0,zero=0.0,*M4=ctx->M4,t,*v,*T;
138:   const PetscScalar *m3,*m2;
139:   PetscInt          i,d,j,nmat=pep->nmat,lda=nmat*k,deg=nmat-1,nloc;
140:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
141:   PetscBLASInt      k_,lda_,lds_,nloc_,one=1,info;
142:   Mat               *A=ctx->A,Mk,M1=ctx->M1,P;
143:   BV                V=ctx->V,M2=ctx->M2,M3=ctx->M3,W=ctx->W;
144:   MatStructure      str;
145:   Vec               vc;

148:   STGetMatStructure(pep->st,&str);
149:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
150:   DHii = T12;
151:   PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
152:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
153:   for (d=2;d<nmat;d++) {
154:     for (j=0;j<k;j++) {
155:       for (i=0;i<k;i++) {
156:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/(a[d-1]);
157:       }
158:     }
159:   }
160:   /* T11 */
161:   if (!ctx->compM1) {
162:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
163:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
164:     for (j=1;j<nmat;j++) {
165:       MatAXPY(M1,Ts[j],A[j],str);
166:     }
167:   }

169:   /* T22 */
170:   PetscBLASIntCast(lds,&lds_);
171:   PetscBLASIntCast(k,&k_);
172:   PetscBLASIntCast(lda,&lda_);
173:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
174:   for (i=1;i<deg;i++) {
175:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
176:     s = (i==1)?0.0:1.0;
177:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
178:   }
179:   for (i=0;i<k;i++) for (j=0;j<i;j++) { t=M4[i+j*k];M4[i+j*k]=M4[j+i*k];M4[j+i*k]=t; }

181:   /* T12 */
182:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
183:   for (i=1;i<nmat;i++) {
184:     MatDenseGetArray(Mk,&array);
185:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
186:     MatDenseRestoreArray(Mk,&array);
187:     BVSetActiveColumns(W,0,k);
188:     BVMult(W,1.0,0.0,V,Mk);
189:     if (i==1) {
190:       BVMatMult(W,A[i],M2);
191:     } else {
192:       BVMatMult(W,A[i],M3); /* using M3 as work space */
193:       BVMult(M2,1.0,1.0,M3,NULL);
194:     }
195:   }

197:   /* T21 */
198:   MatDenseGetArray(Mk,&array);
199:   for (i=1;i<deg;i++) {
200:     s = (i==1)?0.0:1.0;
201:     ss = PetscConj(fh[i]);
202:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
203:   }
204:   MatDenseRestoreArray(Mk,&array);
205:   BVSetActiveColumns(M3,0,k);
206:   BVMult(M3,1.0,0.0,V,Mk);
207:   for (i=0;i<k;i++) {
208:     BVGetColumn(M3,i,&vc);
209:     VecConjugate(vc);
210:     BVRestoreColumn(M3,i,&vc);
211:   }
212:   MatDestroy(&Mk);
213:   PetscFree3(T12,Tr,Ts);

215:   VecGetLocalSize(ctx->t,&nloc);
216:   PetscBLASIntCast(nloc,&nloc_);
217:   PetscMalloc1(nloc*k,&T);
218:   KSPGetOperators(pep->refineksp,NULL,&P);
219:   if (!ctx->compM1) { MatCopy(ctx->M1,P,SAME_NONZERO_PATTERN); }
220:   BVGetArrayRead(ctx->M2,&m2);
221:   BVGetArrayRead(ctx->M3,&m3);
222:   VecGetArray(ctx->t,&v);
223:   for (i=0;i<nloc;i++) for (j=0;j<k;j++) T[j+i*k] = m3[i+j*nloc];
224:   PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&nloc_,ctx->M4,&k_,ctx->pM4,T,&k_,&info));
225:   SlepcCheckLapackInfo("gesv",info);
226:   for (i=0;i<nloc;i++) v[i] = BLASdot_(&k_,m2+i,&nloc_,T+i*k,&one);
227:   VecRestoreArray(ctx->t,&v);
228:   BVRestoreArrayRead(ctx->M2,&m2);
229:   BVRestoreArrayRead(ctx->M3,&m3);
230:   MatDiagonalSet(P,ctx->t,ADD_VALUES);
231:   PetscFree(T);
232:   KSPSetUp(pep->refineksp);
233:   return(0);
234: #endif
235: }

237: static PetscErrorCode NRefSysSolve_shell(KSP ksp,PetscInt nmat,Vec Rv,PetscScalar *Rh,PetscInt k,Vec dVi,PetscScalar *dHi)
238: {
239: #if defined(PETSC_MISSING_LAPACK_GETRS)
241:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRS - Lapack routine is unavailable");
242: #else
244:   PetscScalar    *t0;
245:   PetscBLASInt   k_,one=1,info,lda_;
246:   PetscInt       i,lda=nmat*k;
247:   Mat            M;
248:   FSubctx        *ctx;

251:   KSPGetOperators(ksp,&M,NULL);
252:   MatShellGetContext(M,&ctx);
253:   PetscCalloc1(k,&t0);
254:   PetscBLASIntCast(lda,&lda_);
255:   PetscBLASIntCast(k,&k_);
256:   for (i=0;i<k;i++) t0[i] = Rh[i];
257:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,t0,&k_,&info));
258:   SlepcCheckLapackInfo("getrs",info);
259:   BVMultVec(ctx->M2,-1.0,1.0,Rv,t0);
260:   KSPSolve(ksp,Rv,dVi);
261:   VecConjugate(dVi);
262:   BVDotVec(ctx->M3,dVi,dHi);
263:   VecConjugate(dVi);
264:   for (i=0;i<k;i++) dHi[i] = Rh[i]-PetscConj(dHi[i]);
265:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&k_,&one,ctx->M4,&k_,ctx->pM4,dHi,&k_,&info));
266:   SlepcCheckLapackInfo("getrs",info);
267:   PetscFree(t0);
268:   return(0);
269: #endif
270: }

272: /*
273:    Computes the residual P(H,V*S)*e_j for the polynomial
274: */
275: static PetscErrorCode NRefRightSide(PetscInt nmat,PetscReal *pcf,Mat *A,PetscInt k,BV V,PetscScalar *S,PetscInt lds,PetscInt j,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *DfH,PetscScalar *dH,BV dV,PetscScalar *dVS,PetscInt rds,Vec Rv,PetscScalar *Rh,BV W,Vec t)
276: {
278:   PetscScalar    *DS0,*DS1,*F,beta=0.0,sone=1.0,none=-1.0,tt=0.0,*h,zero=0.0,*Z,*c0;
279:   PetscReal      *a=pcf,*b=pcf+nmat,*g=b+nmat;
280:   PetscInt       i,ii,jj,lda;
281:   PetscBLASInt   lda_,k_,ldh_,lds_,nmat_,k2_,krds_,j_,one=1;
282:   Mat            M0;
283:   Vec            w;

286:   PetscMalloc4(k*nmat,&h,k*k,&DS0,k*k,&DS1,k*k,&Z);
287:   lda = k*nmat;
288:   PetscBLASIntCast(k,&k_);
289:   PetscBLASIntCast(lds,&lds_);
290:   PetscBLASIntCast(lda,&lda_);
291:   PetscBLASIntCast(nmat,&nmat_);
292:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&nmat_,&k_,&sone,S,&lds_,fH+j*lda,&k_,&zero,h,&k_));
293:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat,h,&M0);
294:   BVSetActiveColumns(W,0,nmat);
295:   BVMult(W,1.0,0.0,V,M0);
296:   MatDestroy(&M0);

298:   BVGetColumn(W,0,&w);
299:   MatMult(A[0],w,Rv);
300:   BVRestoreColumn(W,0,&w);
301:   for (i=1;i<nmat;i++) {
302:     BVGetColumn(W,i,&w);
303:     MatMult(A[i],w,t);
304:     BVRestoreColumn(W,i,&w);
305:     VecAXPY(Rv,1.0,t);
306:   }
307:   /* Update right-hand side */
308:   if (j) {
309:     PetscBLASIntCast(ldh,&ldh_);
310:     PetscMemzero(Z,k*k*sizeof(PetscScalar));
311:     PetscMemzero(DS0,k*k*sizeof(PetscScalar));
312:     PetscMemcpy(Z+(j-1)*k,dH+(j-1)*k,k*sizeof(PetscScalar));
313:     /* Update DfH */
314:     for (i=1;i<nmat;i++) {
315:       if (i>1) {
316:         beta = -g[i-1];
317:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,fH+(i-1)*k,&lda_,Z,&k_,&beta,DS0,&k_));
318:         tt += -b[i-1];
319:         for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
320:         tt = b[i-1];
321:         beta = 1.0/a[i-1];
322:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&beta,DS1,&k_,H,&ldh_,&beta,DS0,&k_));
323:         F = DS0; DS0 = DS1; DS1 = F;
324:       } else {
325:         PetscMemzero(DS1,k*k*sizeof(PetscScalar));
326:         for (ii=0;ii<k;ii++) DS1[ii+(j-1)*k] = Z[ii+(j-1)*k]/a[0];
327:       }
328:       for (jj=j;jj<k;jj++) {
329:         for (ii=0;ii<k;ii++) DfH[k*i+ii+jj*lda] += DS1[ii+jj*k];
330:       }
331:     }
332:     for (ii=0;ii<k;ii++) H[ii+ii*ldh] += tt;
333:     /* Update right-hand side */
334:     PetscBLASIntCast(2*k,&k2_);
335:     PetscBLASIntCast(j,&j_);
336:     PetscBLASIntCast(k+rds,&krds_);
337:     c0 = DS0;
338:     PetscMemzero(Rh,k*sizeof(PetscScalar));
339:     for (i=0;i<nmat;i++) {
340:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&krds_,&j_,&sone,dVS,&k2_,fH+j*lda+i*k,&one,&zero,h,&one));
341:       PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&k_,&k_,&sone,S,&lds_,DfH+i*k+j*lda,&one,&sone,h,&one));
342:       BVMultVec(V,1.0,0.0,t,h);
343:       BVSetActiveColumns(dV,0,rds);
344:       BVMultVec(dV,1.0,1.0,t,h+k);
345:       BVGetColumn(W,i,&w);
346:       MatMult(A[i],t,w);
347:       BVRestoreColumn(W,i,&w);
348:       if (i>0 && i<nmat-1) {
349:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&sone,S,&lds_,h,&one,&zero,c0,&one));
350:         PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&k_,&k_,&none,fH+i*k,&lda_,c0,&one,&sone,Rh,&one));
351:       }
352:     }

354:     for (i=0;i<nmat;i++) h[i] = -1.0;
355:     BVMultVec(W,1.0,1.0,Rv,h);
356:   }
357:   PetscFree4(h,DS0,DS1,Z);
358:   return(0);
359: }

361: static PetscErrorCode NRefSysSolve_mbe(PetscInt k,PetscInt sz,BV W,PetscScalar *w,BV Wt,PetscScalar *wt,PetscScalar *d,PetscScalar *dt,KSP ksp,BV T2,BV T3 ,PetscScalar *T4,PetscBool trans,Vec x1,PetscScalar *x2,Vec sol1,PetscScalar *sol2,Vec vw)
362: {
364:   PetscInt       i,j,incf,incc;
365:   PetscScalar    *y,*g,*xx2,*ww,y2,*dd;
366:   Vec            v,t,xx1;
367:   BV             WW,T;

370:   PetscMalloc3(sz,&y,sz,&g,k,&xx2);
371:   if (trans) {
372:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
373:   } else {
374:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
375:   }
376:   xx1 = vw;
377:   VecCopy(x1,xx1);
378:   PetscMemcpy(xx2,x2,sz*sizeof(PetscScalar));
379:   PetscMemzero(sol2,k*sizeof(PetscScalar));
380:   for (i=sz-1;i>=0;i--) {
381:     BVGetColumn(WW,i,&v);
382:     VecConjugate(v);
383:     VecDot(xx1,v,y+i);
384:     VecConjugate(v);
385:     BVRestoreColumn(WW,i,&v);
386:     for (j=0;j<i;j++) y[i] += ww[j+i*k]*xx2[j];
387:     y[i] = -(y[i]-xx2[i])/dd[i];
388:     BVGetColumn(T,i,&t);
389:     VecAXPY(xx1,-y[i],t);
390:     BVRestoreColumn(T,i,&t);
391:     for(j=0;j<=i;j++) xx2[j] -= y[i]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
392:     g[i] = xx2[i];
393:   }
394:   if (trans) {
395:     KSPSolveTranspose(ksp,xx1,sol1);
396:   } else {
397:     KSPSolve(ksp,xx1,sol1);
398:   }
399:   if (trans) {
400:     WW = Wt; ww = wt; dd = dt; T = T2; incf = 1; incc = 0;
401:   } else {
402:     WW = W; ww = w; dd = d; T = T3; incf = 0; incc = 1;
403:   }
404:   for (i=0;i<sz;i++) {
405:     BVGetColumn(T,i,&t);
406:     VecConjugate(t);
407:     VecDot(sol1,t,&y2);
408:     VecConjugate(t);
409:     BVRestoreColumn(T,i,&t);
410:     for (j=0;j<i;j++) y2 += sol2[j]*T4[j*incf+incc*i+(i*incf+incc*j)*k];
411:     y2 = (g[i]-y2)/dd[i];
412:     BVGetColumn(WW,i,&v);
413:     VecAXPY(sol1,-y2,v);
414:     for (j=0;j<i;j++) sol2[j] -= ww[j+i*k]*y2;
415:     sol2[i] = y[i]+y2;
416:     BVRestoreColumn(WW,i,&v);
417:   }
418:   PetscFree3(y,g,xx2);
419:   return(0);
420: }

422: static PetscErrorCode NRefSysSetup_mbe(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx)
423: {
425:   PetscInt       i,j,l,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
426:   Mat            M1=matctx->M1,*A,*At,Mk;
427:   PetscReal      *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
428:   PetscScalar    s,ss,*DHii,*T12,*array,*Ts,*Tr,*M4=matctx->M4,sone=1.0,zero=0.0;
429:   PetscScalar    *w=matctx->w,*wt=matctx->wt,*d=matctx->d,*dt=matctx->dt;
430:   PetscBLASInt   lds_,lda_,k_;
431:   MatStructure   str;
432:   PetscBool      flg;
433:   BV             M2=matctx->M2,M3=matctx->M3,W=matctx->W,Wt=matctx->Wt;
434:   Vec            vc,vc2;

437:   PetscMalloc3(nmat*k*k,&T12,k*k,&Tr,PetscMax(k*k,nmat),&Ts);
438:   STGetMatStructure(pep->st,&str);
439:   STGetTransform(pep->st,&flg);
440:   if (flg) {
441:     PetscMalloc1(pep->nmat,&At);
442:     for (i=0;i<pep->nmat;i++) {
443:       STGetMatrixTransformed(pep->st,i,&At[i]);
444:     }
445:   } else At = pep->A;
446:   if (matctx->subc) A = matctx->A;
447:   else A = At;
448:   /* Form the explicit system matrix */
449:   DHii = T12;
450:   PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
451:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
452:   for (l=2;l<nmat;l++) {
453:     for (j=0;j<k;j++) {
454:       for (i=0;i<k;i++) {
455:         DHii[l*k+i+j*lda] = ((h-b[l-1])*DHii[(l-1)*k+i+j*lda]+fH[(l-1)*k+i+j*lda]-g[l-1]*DHii[(l-2)*k+i+j*lda])/a[l-1];
456:       }
457:     }
458:   }

460:   /* T11 */
461:   if (!matctx->compM1) {
462:     MatCopy(A[0],M1,DIFFERENT_NONZERO_PATTERN);
463:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
464:     for (j=1;j<nmat;j++) {
465:       MatAXPY(M1,Ts[j],A[j],str);
466:     }
467:   }

469:   /* T22 */
470:   PetscBLASIntCast(lds,&lds_);
471:   PetscBLASIntCast(k,&k_);
472:   PetscBLASIntCast(lda,&lda_);
473:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
474:   for (i=1;i<deg;i++) {
475:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
476:     s = (i==1)?0.0:1.0;
477:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,M4,&k_));
478:   }

480:   /* T12 */
481:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&Mk);
482:   for (i=1;i<nmat;i++) {
483:     MatDenseGetArray(Mk,&array);
484:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,array,&k_));
485:     MatDenseRestoreArray(Mk,&array);
486:     BVSetActiveColumns(W,0,k);
487:     BVMult(W,1.0,0.0,V,Mk);
488:     if (i==1) {
489:       BVMatMult(W,A[i],M2);
490:     } else {
491:       BVMatMult(W,A[i],M3); /* using M3 as work space */
492:       BVMult(M2,1.0,1.0,M3,NULL);
493:     }
494:   }

496:   /* T21 */
497:   MatDenseGetArray(Mk,&array);
498:   for (i=1;i<deg;i++) {
499:     s = (i==1)?0.0:1.0;
500:     ss = PetscConj(fh[i]);
501:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,array,&k_));
502:   }
503:   MatDenseRestoreArray(Mk,&array);
504:   BVSetActiveColumns(M3,0,k);
505:   BVMult(M3,1.0,0.0,V,Mk);
506:   for (i=0;i<k;i++) {
507:     BVGetColumn(M3,i,&vc);
508:     VecConjugate(vc);
509:     BVRestoreColumn(M3,i,&vc);
510:   }

512:   KSPSetOperators(ksp,M1,M1);
513:   KSPSetUp(ksp);
514:   MatDestroy(&Mk);

516:   /* Set up for BEMW */
517:   for (i=0;i<k;i++) {
518:     BVGetColumn(M2,i,&vc);
519:     BVGetColumn(W,i,&vc2);
520:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_FALSE,vc,M4+i*k,vc2,w+i*k,matctx->t);
521:     BVRestoreColumn(M2,i,&vc);
522:     BVGetColumn(M3,i,&vc);
523:     VecConjugate(vc);
524:     VecDot(vc2,vc,&d[i]);
525:     VecConjugate(vc);
526:     BVRestoreColumn(M3,i,&vc);
527:     for (j=0;j<i;j++) d[i] += M4[i+j*k]*w[j+i*k];
528:     d[i] = M4[i+i*k]-d[i];
529:     BVRestoreColumn(W,i,&vc2);

531:     BVGetColumn(M3,i,&vc);
532:     BVGetColumn(Wt,i,&vc2);
533:     for (j=0;j<=i;j++) Ts[j] = M4[i+j*k];
534:     NRefSysSolve_mbe(k,i,W,w,Wt,wt,d,dt,ksp,M2,M3,M4,PETSC_TRUE,vc,Ts,vc2,wt+i*k,matctx->t);
535:     BVRestoreColumn(M3,i,&vc);
536:     BVGetColumn(M2,i,&vc);
537:     VecConjugate(vc2);
538:     VecDot(vc,vc2,&dt[i]);
539:     VecConjugate(vc2);
540:     BVRestoreColumn(M2,i,&vc);
541:     for (j=0;j<i;j++) dt[i] += M4[j+i*k]*wt[j+i*k];
542:     dt[i] = M4[i+i*k]-dt[i];
543:     BVRestoreColumn(Wt,i,&vc2);
544:   }

546:   if (flg) {
547:     PetscFree(At);
548:   }
549:   PetscFree3(T12,Tr,Ts);
550:   return(0);
551: }

553: static PetscErrorCode NRefSysSetup_explicit(PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar h,BV V,MatExplicitCtx *matctx,BV W)
554: {
555:   PetscErrorCode    ierr;
556:   PetscInt          i,j,d,n,n0,m0,n1,m1,nmat=pep->nmat,lda=nmat*k,deg=nmat-1;
557:   PetscInt          *idxg=matctx->idxg,*idxp=matctx->idxp,idx,ncols;
558:   Mat               M,*E=matctx->E,*A,*At,Mk,Md;
559:   PetscReal         *a=pep->pbc,*b=pep->pbc+nmat,*g=pep->pbc+2*nmat;
560:   PetscScalar       s,ss,*DHii,*T22,*T21,*T12,*Ts,*Tr,*array,*ts,sone=1.0,zero=0.0;
561:   PetscBLASInt      lds_,lda_,k_;
562:   const PetscInt    *idxmc;
563:   const PetscScalar *valsc,*carray;
564:   MatStructure      str;
565:   Vec               vc,vc0;
566:   PetscBool         flg;

569:   PetscMalloc5(k*k,&T22,k*k,&T21,nmat*k*k,&T12,k*k,&Tr,k*k,&Ts);
570:   STGetMatStructure(pep->st,&str);
571:   KSPGetOperators(ksp,&M,NULL);
572:   MatGetOwnershipRange(E[1],&n1,&m1);
573:   MatGetOwnershipRange(E[0],&n0,&m0);
574:   MatGetOwnershipRange(M,&n,NULL);
575:   PetscMalloc1(nmat,&ts);
576:   STGetTransform(pep->st,&flg);
577:   if (flg) {
578:     PetscMalloc1(pep->nmat,&At);
579:     for (i=0;i<pep->nmat;i++) {
580:       STGetMatrixTransformed(pep->st,i,&At[i]);
581:     }
582:   } else At = pep->A;
583:   if (matctx->subc) A = matctx->A;
584:   else A = At;
585:   /* Form the explicit system matrix */
586:   DHii = T12;
587:   PetscMemzero(DHii,k*k*nmat*sizeof(PetscScalar));
588:   for (i=0;i<k;i++) DHii[k+i+i*lda] = 1.0/a[0];
589:   for (d=2;d<nmat;d++) {
590:     for (j=0;j<k;j++) {
591:       for (i=0;i<k;i++) {
592:         DHii[d*k+i+j*lda] = ((h-b[d-1])*DHii[(d-1)*k+i+j*lda]+fH[(d-1)*k+i+j*lda]-g[d-1]*DHii[(d-2)*k+i+j*lda])/a[d-1];
593:       }
594:     }
595:   }

597:   /* T11 */
598:   if (!matctx->compM1) {
599:     MatCopy(A[0],E[0],DIFFERENT_NONZERO_PATTERN);
600:     PEPEvaluateBasis(pep,h,0,Ts,NULL);
601:     for (j=1;j<nmat;j++) {
602:       MatAXPY(E[0],Ts[j],A[j],str);
603:     }
604:   }
605:   for (i=n0;i<m0;i++) {
606:     MatGetRow(E[0],i,&ncols,&idxmc,&valsc);
607:     idx = n+i-n0;
608:     for (j=0;j<ncols;j++) {
609:       idxg[j] = matctx->map0[idxmc[j]];
610:     }
611:     MatSetValues(M,1,&idx,ncols,idxg,valsc,INSERT_VALUES);
612:     MatRestoreRow(E[0],i,&ncols,&idxmc,&valsc);
613:   }

615:   /* T22 */
616:   PetscBLASIntCast(lds,&lds_);
617:   PetscBLASIntCast(k,&k_);
618:   PetscBLASIntCast(lda,&lda_);
619:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,S,&lds_,S,&lds_,&zero,Tr,&k_));
620:   for (i=1;i<deg;i++) {
621:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,Tr,&k_,DHii+i*k,&lda_,&zero,Ts,&k_));
622:     s = (i==1)?0.0:1.0;
623:     PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&k_,&k_,&k_,&sone,fH+i*k,&lda_,Ts,&k_,&s,T22,&k_));
624:   }
625:   for (j=0;j<k;j++) idxp[j] = matctx->map1[j];
626:   for (i=0;i<m1-n1;i++) {
627:     idx = n+m0-n0+i;
628:     for (j=0;j<k;j++) {
629:       Tr[j] = T22[n1+i+j*k];
630:     }
631:     MatSetValues(M,1,&idx,k,idxp,Tr,INSERT_VALUES);
632:   }

634:   /* T21 */
635:   for (i=1;i<deg;i++) {
636:     s = (i==1)?0.0:1.0;
637:     ss = PetscConj(fh[i]);
638:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&ss,S,&lds_,fH+i*k,&lda_,&s,T21,&k_));
639:   }
640:   BVSetActiveColumns(W,0,k);
641:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,T21,&Mk);
642:   BVMult(W,1.0,0.0,V,Mk);
643:   for (i=0;i<k;i++) {
644:     BVGetColumn(W,i,&vc);
645:     VecConjugate(vc);
646:     VecGetArrayRead(vc,&carray);
647:     idx = matctx->map1[i];
648:     MatSetValues(M,1,&idx,m0-n0,matctx->map0+n0,carray,INSERT_VALUES);
649:     VecRestoreArrayRead(vc,&carray);
650:     BVRestoreColumn(W,i,&vc);
651:   }

653:   /* T12 */
654:   for (i=1;i<nmat;i++) {
655:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,DHii+i*k,&lda_,&zero,Ts,&k_));
656:     for (j=0;j<k;j++) {
657:       PetscMemcpy(T12+i*k+j*lda,Ts+j*k,k*sizeof(PetscScalar));
658:     }
659:   }
660:   MatCreateSeqDense(PETSC_COMM_SELF,k,nmat-1,NULL,&Md);
661:   for (i=0;i<nmat;i++) ts[i] = 1.0;
662:   for (j=0;j<k;j++) {
663:     MatDenseGetArray(Md,&array);
664:     PetscMemcpy(array,T12+k+j*lda,(nmat-1)*k*sizeof(PetscScalar));
665:     MatDenseRestoreArray(Md,&array);
666:     BVSetActiveColumns(W,0,nmat-1);
667:     BVMult(W,1.0,0.0,V,Md);
668:     for (i=nmat-1;i>0;i--) {
669:       BVGetColumn(W,i-1,&vc0);
670:       BVGetColumn(W,i,&vc);
671:       MatMult(A[i],vc0,vc);
672:       BVRestoreColumn(W,i-1,&vc0);
673:       BVRestoreColumn(W,i,&vc);
674:     }
675:     BVSetActiveColumns(W,1,nmat);
676:     BVGetColumn(W,0,&vc0);
677:     BVMultVec(W,1.0,0.0,vc0,ts);
678:     VecGetArrayRead(vc0,&carray);
679:     idx = matctx->map1[j];
680:     MatSetValues(M,m0-n0,matctx->map0+n0,1,&idx,carray,INSERT_VALUES);
681:     VecRestoreArrayRead(vc0,&carray);
682:     BVRestoreColumn(W,0,&vc0);
683:   }
684:   MatAssemblyBegin(M,MAT_FINAL_ASSEMBLY);
685:   MatAssemblyEnd(M,MAT_FINAL_ASSEMBLY);
686:   KSPSetOperators(ksp,M,M);
687:   KSPSetUp(ksp);
688:   PetscFree(ts);
689:   MatDestroy(&Mk);
690:   MatDestroy(&Md);
691:   if (flg) {
692:     PetscFree(At);
693:   }
694:   PetscFree5(T22,T21,T12,Tr,Ts);
695:   return(0);
696: }

698: static PetscErrorCode NRefSysSolve_explicit(PetscInt k,KSP ksp,Vec Rv,PetscScalar *Rh,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx)
699: {
700:   PetscErrorCode    ierr;
701:   PetscInt          n0,m0,n1,m1,i;
702:   PetscScalar       *arrayV;
703:   const PetscScalar *array;

706:   MatGetOwnershipRange(matctx->E[1],&n1,&m1);
707:   MatGetOwnershipRange(matctx->E[0],&n0,&m0);

709:   /* Right side */
710:   VecGetArrayRead(Rv,&array);
711:   VecSetValues(matctx->tN,m0-n0,matctx->map0+n0,array,INSERT_VALUES);
712:   VecRestoreArrayRead(Rv,&array);
713:   VecSetValues(matctx->tN,m1-n1,matctx->map1+n1,Rh+n1,INSERT_VALUES);
714:   VecAssemblyBegin(matctx->tN);
715:   VecAssemblyEnd(matctx->tN);

717:   /* Solve */
718:   KSPSolve(ksp,matctx->tN,matctx->ttN);

720:   /* Retrieve solution */
721:   VecGetArray(dVi,&arrayV);
722:   VecGetArrayRead(matctx->ttN,&array);
723:   PetscMemcpy(arrayV,array,(m0-n0)*sizeof(PetscScalar));
724:   VecRestoreArray(dVi,&arrayV);
725:   if (!matctx->subc) {
726:     VecGetArray(matctx->t1,&arrayV);
727:     for (i=0;i<m1-n1;i++) arrayV[i] =  array[m0-n0+i];
728:     VecRestoreArray(matctx->t1,&arrayV);
729:     VecRestoreArrayRead(matctx->ttN,&array);
730:     VecScatterBegin(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
731:     VecScatterEnd(matctx->scatterctx,matctx->t1,matctx->vseq,INSERT_VALUES,SCATTER_FORWARD);
732:     VecGetArrayRead(matctx->vseq,&array);
733:     for (i=0;i<k;i++) dHi[i] = array[i];
734:     VecRestoreArrayRead(matctx->vseq,&array);
735:   }
736:   return(0);
737: }

739: static PetscErrorCode NRefSysIter(PetscInt i,PEP pep,PetscInt k,KSP ksp,PetscScalar *fH,PetscScalar *S,PetscInt lds,PetscScalar *fh,PetscScalar *H,PetscInt ldh,Vec Rv,PetscScalar *Rh,BV V,Vec dVi,PetscScalar *dHi,MatExplicitCtx *matctx,BV W)
740: {
741:   PetscErrorCode    ierr;
742:   PetscInt          j,m,lda=pep->nmat*k,n0,m0,idx;
743:   PetscMPIInt       root,len;
744:   PetscScalar       *array2,h;
745:   const PetscScalar *array;
746:   Vec               R,Vi;
747:   FSubctx           *ctx;
748:   Mat               M;

751:   if (!matctx || !matctx->subc) {
752:     for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+i+i*lda];
753:     h   = H[i+i*ldh];
754:     idx = i;
755:     R   = Rv;
756:     Vi  = dVi;
757:     switch (pep->scheme) {
758:     case PEP_REFINE_SCHEME_EXPLICIT:
759:       NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,V,matctx,W);
760:       matctx->compM1 = PETSC_FALSE;
761:       break;
762:     case PEP_REFINE_SCHEME_MBE:
763:       NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,V,matctx);
764:       matctx->compM1 = PETSC_FALSE;
765:       break;
766:     case PEP_REFINE_SCHEME_SCHUR:
767:       KSPGetOperators(ksp,&M,NULL);
768:       MatShellGetContext(M,&ctx);
769:       NRefSysSetup_shell(pep,k,fH,S,lds,fh,h,ctx);
770:       ctx->compM1 = PETSC_FALSE;
771:       break;
772:     }
773:   } else {
774:     if (i%matctx->subc->n==0 && (idx=i+matctx->subc->color)<k) {
775:       for (j=0;j<pep->nmat;j++) fh[j] = fH[j*k+idx+idx*lda];
776:       h = H[idx+idx*ldh];
777:       matctx->idx = idx;
778:       switch (pep->scheme) {
779:       case PEP_REFINE_SCHEME_EXPLICIT:
780:         NRefSysSetup_explicit(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx,matctx->W);
781:         matctx->compM1 = PETSC_FALSE;
782:         break;
783:       case PEP_REFINE_SCHEME_MBE:
784:         NRefSysSetup_mbe(pep,k,ksp,fH,S,lds,fh,h,matctx->V,matctx);
785:         matctx->compM1 = PETSC_FALSE;
786:         break;
787:       case PEP_REFINE_SCHEME_SCHUR:
788:         break;
789:       }
790:     } else idx = matctx->idx;
791:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
792:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],Rv,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
793:     VecGetArrayRead(matctx->tg,&array);
794:     VecPlaceArray(matctx->t,array);
795:     VecCopy(matctx->t,matctx->Rv);
796:     VecResetArray(matctx->t);
797:     VecRestoreArrayRead(matctx->tg,&array);
798:     R  = matctx->Rv;
799:     Vi = matctx->Vi;
800:   }
801:   if (idx==i && idx<k) {
802:     switch (pep->scheme) {
803:       case PEP_REFINE_SCHEME_EXPLICIT:
804:         NRefSysSolve_explicit(k,ksp,R,Rh,Vi,dHi,matctx);
805:         break;
806:       case PEP_REFINE_SCHEME_MBE:
807:         NRefSysSolve_mbe(k,k,matctx->W,matctx->w,matctx->Wt,matctx->wt,matctx->d,matctx->dt,ksp,matctx->M2,matctx->M3 ,matctx->M4,PETSC_FALSE,R,Rh,Vi,dHi,matctx->t);
808:         break;
809:       case PEP_REFINE_SCHEME_SCHUR:
810:         NRefSysSolve_shell(ksp,pep->nmat,R,Rh,k,Vi,dHi);
811:         break;
812:     }
813:   }
814:   if (matctx && matctx->subc) {
815:     VecGetLocalSize(Vi,&m);
816:     VecGetArrayRead(Vi,&array);
817:     VecGetArray(matctx->tg,&array2);
818:     PetscMemcpy(array2,array,m*sizeof(PetscScalar));
819:     VecRestoreArray(matctx->tg,&array2);
820:     VecRestoreArrayRead(Vi,&array);
821:     VecScatterBegin(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
822:     VecScatterEnd(matctx->scatter_id[i%matctx->subc->n],matctx->tg,dVi,INSERT_VALUES,SCATTER_REVERSE);
823:     switch (pep->scheme) {
824:     case PEP_REFINE_SCHEME_EXPLICIT:
825:       MatGetOwnershipRange(matctx->E[0],&n0,&m0);
826:       VecGetArrayRead(matctx->ttN,&array);
827:       VecPlaceArray(matctx->tp,array+m0-n0);
828:       VecScatterBegin(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
829:       VecScatterEnd(matctx->scatterp_id[i%matctx->subc->n],matctx->tp,matctx->tpg,INSERT_VALUES,SCATTER_FORWARD);
830:       VecResetArray(matctx->tp);
831:       VecRestoreArrayRead(matctx->ttN,&array);
832:       VecGetArrayRead(matctx->tpg,&array);
833:       for (j=0;j<k;j++) dHi[j] = array[j];
834:       VecRestoreArrayRead(matctx->tpg,&array);
835:       break;
836:      case PEP_REFINE_SCHEME_MBE:
837:       root = 0;
838:       for (j=0;j<i%matctx->subc->n;j++) root += matctx->subc->subsize[j];
839:       PetscMPIIntCast(k,&len);
840:       MPI_Bcast(dHi,len,MPIU_SCALAR,root,matctx->subc->dupparent);
841:       break;
842:     case PEP_REFINE_SCHEME_SCHUR:
843:       break;
844:     }
845:   }
846:   return(0);
847: }

849: static PetscErrorCode PEPNRefForwardSubstitution(PEP pep,PetscInt k,PetscScalar *S,PetscInt lds,PetscScalar *H,PetscInt ldh,PetscScalar *fH,BV dV,PetscScalar *dVS,PetscInt *rds,PetscScalar *dH,PetscInt lddh,KSP ksp,MatExplicitCtx *matctx)
850: {
852:   PetscInt       i,nmat=pep->nmat,lda=nmat*k;
853:   PetscScalar    *fh,*Rh,*DfH;
854:   PetscReal      norm;
855:   BV             W;
856:   Vec            Rv,t,dvi;
857:   FSubctx        *ctx;
858:   Mat            M,*At;
859:   PetscBool      flg,lindep;

862:   PetscMalloc2(nmat*k*k,&DfH,k,&Rh);
863:   *rds = 0;
864:   BVCreateVec(pep->V,&Rv);
865:   switch (pep->scheme) {
866:   case PEP_REFINE_SCHEME_EXPLICIT:
867:     BVCreateVec(pep->V,&t);
868:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
869:     PetscMalloc1(nmat,&fh);
870:     break;
871:   case PEP_REFINE_SCHEME_MBE:
872:     if (matctx->subc) {
873:       BVCreateVec(pep->V,&t);
874:       BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
875:     } else {
876:       W = matctx->W;
877:       PetscObjectReference((PetscObject)W);
878:       t = matctx->t;
879:       PetscObjectReference((PetscObject)t);
880:     }
881:     BVScale(matctx->W,0.0);
882:     BVScale(matctx->Wt,0.0);
883:     BVScale(matctx->M2,0.0);
884:     BVScale(matctx->M3,0.0);
885:     PetscMalloc1(nmat,&fh);
886:     break;
887:   case PEP_REFINE_SCHEME_SCHUR:
888:     KSPGetOperators(ksp,&M,NULL);
889:     MatShellGetContext(M,&ctx);
890:     BVCreateVec(pep->V,&t);
891:     BVDuplicateResize(pep->V,PetscMax(k,nmat),&W);
892:     fh = ctx->fih;
893:     break;
894:   }
895:   PetscMemzero(dVS,2*k*k*sizeof(PetscScalar));
896:   PetscMemzero(DfH,lda*k*sizeof(PetscScalar));
897:   STGetTransform(pep->st,&flg);
898:   if (flg) {
899:     PetscMalloc1(pep->nmat,&At);
900:     for (i=0;i<pep->nmat;i++) {
901:       STGetMatrixTransformed(pep->st,i,&At[i]);
902:     }
903:   } else At = pep->A;

905:   /* Main loop for computing the ith columns of dX and dS */
906:   for (i=0;i<k;i++) {
907:     /* Compute and update i-th column of the right hand side */
908:     PetscMemzero(Rh,k*sizeof(PetscScalar));
909:     NRefRightSide(nmat,pep->pbc,At,k,pep->V,S,lds,i,H,ldh,fH,DfH,dH,dV,dVS,*rds,Rv,Rh,W,t);

911:     /* Update and solve system */
912:     BVGetColumn(dV,i,&dvi);
913:     NRefSysIter(i,pep,k,ksp,fH,S,lds,fh,H,ldh,Rv,Rh,pep->V,dvi,dH+i*k,matctx,W);
914:     /* Orthogonalize computed solution */
915:     BVOrthogonalizeVec(pep->V,dvi,dVS+i*2*k,&norm,&lindep);
916:     BVRestoreColumn(dV,i,&dvi);
917:     if (!lindep) {
918:       BVOrthogonalizeColumn(dV,i,dVS+k+i*2*k,&norm,&lindep);
919:       if (!lindep) {
920:         dVS[k+i+i*2*k] = norm;
921:         BVScaleColumn(dV,i,1.0/norm);
922:         (*rds)++;
923:       }
924:     }
925:   }
926:   BVSetActiveColumns(dV,0,*rds);
927:   VecDestroy(&t);
928:   VecDestroy(&Rv);
929:   BVDestroy(&W);
930:   if (flg) {
931:     PetscFree(At);
932:   }
933:   PetscFree2(DfH,Rh);
934:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) { PetscFree(fh); }
935:   return(0);
936: }

938: static PetscErrorCode NRefOrthogStep(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *S,PetscInt lds)
939: {
940: #if defined(PETSC_MISSING_LAPACK_GEQRF)
942:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable");
943: #else
945:   PetscInt       j,nmat=pep->nmat,deg=nmat-1,lda=nmat*k,ldg;
946:   PetscScalar    *G,*tau,sone=1.0,zero=0.0,*work;
947:   PetscBLASInt   lds_,k_,ldh_,info,ldg_,lda_;

950:   PetscMalloc3(k,&tau,k,&work,deg*k*k,&G);
951:   PetscBLASIntCast(lds,&lds_);
952:   PetscBLASIntCast(lda,&lda_);
953:   PetscBLASIntCast(k,&k_);

955:   /* Form auxiliary matrix for the orthogonalization step */
956:   ldg = deg*k;
957:   PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
958:   PetscBLASIntCast(ldg,&ldg_);
959:   PetscBLASIntCast(ldh,&ldh_);
960:   for (j=0;j<deg;j++) {
961:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&sone,S,&lds_,fH+j*k,&lda_,&zero,G+j*k,&ldg_));
962:   }
963:   /* Orthogonalize and update S */
964:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&ldg_,&k_,G,&ldg_,tau,work,&k_,&info));
965:   SlepcCheckLapackInfo("geqrf",info);
966:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,S,&lds_));

968:   /* Update H */
969:   PetscStackCallBLAS("BLAStrmm",BLAStrmm_("L","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
970:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&k_,&k_,&sone,G,&ldg_,H,&ldh_));
971:   PetscFree3(tau,work,G);
972:   return(0);
973: #endif
974: }

976: static PetscErrorCode PEPNRefUpdateInvPair(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,PetscScalar *fH,PetscScalar *dH,PetscScalar *S,PetscInt lds,BV dV,PetscScalar *dVS,PetscInt rds)
977: {
978: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
980:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routine is unavailable");
981: #else
983:   PetscInt       i,j,nmat=pep->nmat,lda=nmat*k;
984:   PetscScalar    *tau,*array,*work;
985:   PetscBLASInt   lds_,k_,lda_,ldh_,kdrs_,info,k2_;
986:   Mat            M0;

989:   PetscMalloc2(k,&tau,k,&work);
990:   PetscBLASIntCast(lds,&lds_);
991:   PetscBLASIntCast(lda,&lda_);
992:   PetscBLASIntCast(ldh,&ldh_);
993:   PetscBLASIntCast(k,&k_);
994:   PetscBLASIntCast(2*k,&k2_);
995:   PetscBLASIntCast((k+rds),&kdrs_);
996:   /* Update H */
997:   for (j=0;j<k;j++) {
998:     for (i=0;i<k;i++) H[i+j*ldh] -= dH[i+j*k];
999:   }
1000:   /* Update V */
1001:   for (j=0;j<k;j++) {
1002:     for (i=0;i<k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k]+S[i+j*lds];
1003:     for (i=k;i<2*k;i++) dVS[i+j*2*k] = -dVS[i+j*2*k];
1004:   }
1005:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&kdrs_,&k_,dVS,&k2_,tau,work,&k_,&info));
1006:   SlepcCheckLapackInfo("geqrf",info);
1007:   /* Copy triangular matrix in S */
1008:   for (j=0;j<k;j++) {
1009:     for (i=0;i<=j;i++) S[i+j*lds] = dVS[i+j*2*k];
1010:     for (i=j+1;i<k;i++) S[i+j*lds] = 0.0;
1011:   }
1012:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&k2_,&k_,&k_,dVS,&k2_,tau,work,&k_,&info));
1013:   SlepcCheckLapackInfo("orgqr",info);
1014:   MatCreateSeqDense(PETSC_COMM_SELF,k,k,NULL,&M0);
1015:   MatDenseGetArray(M0,&array);
1016:   for (j=0;j<k;j++) {
1017:     PetscMemcpy(array+j*k,dVS+j*2*k,k*sizeof(PetscScalar));
1018:   }
1019:   MatDenseRestoreArray(M0,&array);
1020:   BVMultInPlace(pep->V,M0,0,k);
1021:   if (rds) {
1022:     MatDenseGetArray(M0,&array);
1023:     for (j=0;j<k;j++) {
1024:       PetscMemcpy(array+j*k,dVS+k+j*2*k,rds*sizeof(PetscScalar));
1025:     }
1026:     MatDenseRestoreArray(M0,&array);
1027:     BVMultInPlace(dV,M0,0,k);
1028:     BVMult(pep->V,1.0,1.0,dV,NULL);
1029:   }
1030:   MatDestroy(&M0);
1031:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1032:   PetscFree2(tau,work);
1033:   return(0);
1034: #endif
1035: }

1037: static PetscErrorCode PEPNRefSetUp(PEP pep,PetscInt k,PetscScalar *H,PetscInt ldh,MatExplicitCtx *matctx,PetscBool ini)
1038: {
1039:   PetscErrorCode    ierr;
1040:   FSubctx           *ctx;
1041:   PetscScalar       t,*coef;
1042:   const PetscScalar *array;
1043:   MatStructure      str;
1044:   PetscInt          j,nmat=pep->nmat,n0,m0,n1,m1,n0_,m0_,n1_,m1_,N0,N1,p,*idx1,*idx2,count,si,i,l0;
1045:   MPI_Comm          comm;
1046:   PetscMPIInt       np;
1047:   const PetscInt    *rgs0,*rgs1;
1048:   Mat               B,C,*E,*A,*At;
1049:   IS                is1,is2;
1050:   Vec               v;
1051:   PetscBool         flg;
1052:   Mat               M,P;

1055:   PetscMalloc1(nmat,&coef);
1056:   STGetTransform(pep->st,&flg);
1057:   if (flg) {
1058:     PetscMalloc1(pep->nmat,&At);
1059:     for (i=0;i<pep->nmat;i++) {
1060:       STGetMatrixTransformed(pep->st,i,&At[i]);
1061:     }
1062:   } else At = pep->A;
1063:   switch (pep->scheme) {
1064:   case PEP_REFINE_SCHEME_EXPLICIT:
1065:     if (ini) {
1066:       if (matctx->subc) {
1067:         A = matctx->A;
1068:         comm = PetscSubcommChild(matctx->subc);
1069:       } else {
1070:         A = At;
1071:         PetscObjectGetComm((PetscObject)pep,&comm);
1072:       }
1073:       E = matctx->E;
1074:       STGetMatStructure(pep->st,&str);
1075:       MatDuplicate(A[0],MAT_COPY_VALUES,&E[0]);
1076:       j = (matctx->subc)?matctx->subc->color:0;
1077:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1078:       for (j=1;j<nmat;j++) {
1079:         MatAXPY(E[0],coef[j],A[j],str);
1080:       }
1081:       MatCreateDense(comm,PETSC_DECIDE,PETSC_DECIDE,k,k,NULL,&E[1]);
1082:       MatAssemblyBegin(E[1],MAT_FINAL_ASSEMBLY);
1083:       MatAssemblyEnd(E[1],MAT_FINAL_ASSEMBLY);
1084:       MatGetOwnershipRange(E[0],&n0,&m0);
1085:       MatGetOwnershipRange(E[1],&n1,&m1);
1086:       MatGetOwnershipRangeColumn(E[0],&n0_,&m0_);
1087:       MatGetOwnershipRangeColumn(E[1],&n1_,&m1_);
1088:       /* T12 and T21 are computed from V and V*, so,
1089:          they must have the same column and row ranges */
1090:       if (m0_-n0_ != m0-n0) SETERRQ(PETSC_COMM_SELF,1,"Inconsistent dimensions");
1091:       MatCreateDense(comm,m0-n0,m1_-n1_,PETSC_DECIDE,PETSC_DECIDE,NULL,&B);
1092:       MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
1093:       MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
1094:       MatCreateDense(comm,m1-n1,m0_-n0_,PETSC_DECIDE,PETSC_DECIDE,NULL,&C);
1095:       MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
1096:       MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);
1097:       MatCreateTile(1.0,E[0],1.0,B,1.0,C,1.0,E[1],&M);
1098:       MatDestroy(&B);
1099:       MatDestroy(&C);
1100:       matctx->compM1 = PETSC_TRUE;
1101:       MatGetSize(E[0],NULL,&N0);
1102:       MatGetSize(E[1],NULL,&N1);
1103:       MPI_Comm_size(PetscObjectComm((PetscObject)M),&np);
1104:       MatGetOwnershipRanges(E[0],&rgs0);
1105:       MatGetOwnershipRanges(E[1],&rgs1);
1106:       PetscMalloc4(PetscMax(k,N1),&matctx->idxp,N0,&matctx->idxg,N0,&matctx->map0,N1,&matctx->map1);
1107:       /* Create column (and row) mapping */
1108:       for (p=0;p<np;p++) {
1109:         for (j=rgs0[p];j<rgs0[p+1];j++) matctx->map0[j] = j+rgs1[p];
1110:         for (j=rgs1[p];j<rgs1[p+1];j++) matctx->map1[j] = j+rgs0[p+1];
1111:       }
1112:       MatCreateVecs(M,NULL,&matctx->tN);
1113:       MatCreateVecs(matctx->E[1],NULL,&matctx->t1);
1114:       VecDuplicate(matctx->tN,&matctx->ttN);
1115:       if (matctx->subc) {
1116:         MPI_Comm_size(PetscObjectComm((PetscObject)pep),&np);
1117:         count = np*k;
1118:         PetscMalloc2(count,&idx1,count,&idx2);
1119:         VecCreateMPI(PetscObjectComm((PetscObject)pep),m1-n1,PETSC_DECIDE,&matctx->tp);
1120:         VecGetOwnershipRange(matctx->tp,&l0,NULL);
1121:         VecCreateMPI(PetscObjectComm((PetscObject)pep),k,PETSC_DECIDE,&matctx->tpg);
1122:         for (si=0;si<matctx->subc->n;si++) {
1123:           if (matctx->subc->color==si) {
1124:             j=0;
1125:             if (matctx->subc->color==si) {
1126:               for (p=0;p<np;p++) {
1127:                 for (i=n1;i<m1;i++) {
1128:                   idx1[j] = l0+i-n1;
1129:                   idx2[j++] =p*k+i;
1130:                 }
1131:               }
1132:             }
1133:             count = np*(m1-n1);
1134:           } else count =0;
1135:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx1,PETSC_COPY_VALUES,&is1);
1136:           ISCreateGeneral(PetscObjectComm((PetscObject)pep),count,idx2,PETSC_COPY_VALUES,&is2);
1137:           VecScatterCreate(matctx->tp,is1,matctx->tpg,is2,&matctx->scatterp_id[si]);
1138:           ISDestroy(&is1);
1139:           ISDestroy(&is2);
1140:         }
1141:         PetscFree2(idx1,idx2);
1142:       } else {
1143:         VecScatterCreateToAll(matctx->t1,&matctx->scatterctx,&matctx->vseq);
1144:       }
1145:       P = M;
1146:     } else {
1147:       if (matctx->subc) {
1148:         /* Scatter vectors pep->V */
1149:         for (i=0;i<k;i++) {
1150:           BVGetColumn(pep->V,i,&v);
1151:           VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1152:           VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1153:           BVRestoreColumn(pep->V,i,&v);
1154:           VecGetArrayRead(matctx->tg,&array);
1155:           VecPlaceArray(matctx->t,(const PetscScalar*)array);
1156:           BVInsertVec(matctx->V,i,matctx->t);
1157:           VecResetArray(matctx->t);
1158:           VecRestoreArrayRead(matctx->tg,&array);
1159:         }
1160:       }
1161:     }
1162:     break;
1163:   case PEP_REFINE_SCHEME_MBE:
1164:     if (ini) {
1165:       if (matctx->subc) {
1166:         A = matctx->A;
1167:         comm = PetscSubcommChild(matctx->subc);
1168:       } else {
1169:         matctx->V = pep->V;
1170:         A = At;
1171:         PetscObjectGetComm((PetscObject)pep,&comm);
1172:         MatCreateVecs(pep->A[0],&matctx->t,NULL);
1173:       }
1174:       STGetMatStructure(pep->st,&str);
1175:       MatDuplicate(A[0],MAT_COPY_VALUES,&matctx->M1);
1176:       j = (matctx->subc)?matctx->subc->color:0;
1177:       PEPEvaluateBasis(pep,H[j+j*ldh],0,coef,NULL);
1178:       for (j=1;j<nmat;j++) {
1179:         MatAXPY(matctx->M1,coef[j],A[j],str);
1180:       }
1181:       BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1182:       BVDuplicateResize(matctx->V,k,&matctx->M2);
1183:       BVDuplicate(matctx->M2,&matctx->M3);
1184:       BVDuplicate(matctx->M2,&matctx->Wt);
1185:       PetscMalloc5(k*k,&matctx->M4,k*k,&matctx->w,k*k,&matctx->wt,k,&matctx->d,k,&matctx->dt);
1186:       matctx->compM1 = PETSC_TRUE;
1187:       M = matctx->M1;
1188:       P = M;
1189:     }
1190:     break;
1191:   case PEP_REFINE_SCHEME_SCHUR:
1192:     if (ini) {
1193:       PetscObjectGetComm((PetscObject)pep,&comm);
1194:       MatGetSize(At[0],&m0,&n0);
1195:       PetscMalloc1(1,&ctx);
1196:       STGetMatStructure(pep->st,&str);
1197:       /* Create a shell matrix to solve the linear system */
1198:       ctx->V = pep->V;
1199:       ctx->k = k; ctx->nmat = nmat;
1200:       PetscMalloc5(nmat,&ctx->A,k*k,&ctx->M4,k,&ctx->pM4,2*k*k,&ctx->work,nmat,&ctx->fih);
1201:       for (i=0;i<nmat;i++) ctx->A[i] = At[i];
1202:       PetscMemzero(ctx->M4,k*k*sizeof(PetscScalar));
1203:       MatCreateShell(comm,PETSC_DECIDE,PETSC_DECIDE,m0,n0,ctx,&M);
1204:       MatShellSetOperation(M,MATOP_MULT,(void(*)(void))MatFSMult);
1205:       BVDuplicateResize(ctx->V,PetscMax(k,pep->nmat),&ctx->W);
1206:       BVDuplicateResize(ctx->V,k,&ctx->M2);
1207:       BVDuplicate(ctx->M2,&ctx->M3);
1208:       BVCreateVec(pep->V,&ctx->t);
1209:       MatDuplicate(At[0],MAT_COPY_VALUES,&ctx->M1);
1210:       PEPEvaluateBasis(pep,H[0],0,coef,NULL);
1211:       for (j=1;j<nmat;j++) {
1212:         MatAXPY(ctx->M1,coef[j],At[j],str);
1213:       }
1214:       MatDuplicate(At[0],MAT_COPY_VALUES,&P);
1215:       /* Compute a precond matrix for the system */
1216:       t = H[0];
1217:       PEPEvaluateBasis(pep,t,0,coef,NULL);
1218:       for (j=1;j<nmat;j++) {
1219:         MatAXPY(P,coef[j],At[j],str);
1220:       }
1221:       ctx->compM1 = PETSC_TRUE;
1222:     }
1223:     break;
1224:   }
1225:   if (ini) {
1226:     PEPRefineGetKSP(pep,&pep->refineksp);
1227:     KSPSetErrorIfNotConverged(pep->refineksp,PETSC_TRUE);
1228:     KSPSetOperators(pep->refineksp,M,P);
1229:     KSPSetFromOptions(pep->refineksp);
1230:   }

1232:   if (!ini && matctx && matctx->subc) {
1233:      /* Scatter vectors pep->V */
1234:     for (i=0;i<k;i++) {
1235:       BVGetColumn(pep->V,i,&v);
1236:       VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1237:       VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1238:       BVRestoreColumn(pep->V,i,&v);
1239:       VecGetArrayRead(matctx->tg,&array);
1240:       VecPlaceArray(matctx->t,(const PetscScalar*)array);
1241:       BVInsertVec(matctx->V,i,matctx->t);
1242:       VecResetArray(matctx->t);
1243:       VecRestoreArrayRead(matctx->tg,&array);
1244:     }
1245:    }
1246:   PetscFree(coef);
1247:   if (flg) {
1248:     PetscFree(At);
1249:   }
1250:   return(0);
1251: }

1253: static PetscErrorCode NRefSubcommSetup(PEP pep,PetscInt k,MatExplicitCtx *matctx,PetscInt nsubc)
1254: {
1255:   PetscErrorCode    ierr;
1256:   PetscInt          i,si,j,m0,n0,nloc0,nloc_sub,*idx1,*idx2;
1257:   IS                is1,is2;
1258:   BVType            type;
1259:   Vec               v;
1260:   const PetscScalar *array;
1261:   Mat               *A;
1262:   PetscBool         flg;

1265:   STGetTransform(pep->st,&flg);
1266:   if (flg) {
1267:     PetscMalloc1(pep->nmat,&A);
1268:     for (i=0;i<pep->nmat;i++) {
1269:       STGetMatrixTransformed(pep->st,i,&A[i]);
1270:     }
1271:   } else A = pep->A;

1273:   /* Duplicate pep matrices */
1274:   PetscMalloc3(pep->nmat,&matctx->A,nsubc,&matctx->scatter_id,nsubc,&matctx->scatterp_id);
1275:   for (i=0;i<pep->nmat;i++) {
1276:     MatCreateRedundantMatrix(A[i],0,PetscSubcommChild(matctx->subc),MAT_INITIAL_MATRIX,&matctx->A[i]);
1277:   }

1279:   /* Create Scatter */
1280:   MatCreateVecs(matctx->A[0],&matctx->t,NULL);
1281:   MatGetLocalSize(matctx->A[0],&nloc_sub,NULL);
1282:   VecCreateMPI(PetscSubcommContiguousParent(matctx->subc),nloc_sub,PETSC_DECIDE,&matctx->tg);
1283:   BVGetColumn(pep->V,0,&v);
1284:   VecGetOwnershipRange(v,&n0,&m0);
1285:   nloc0 = m0-n0;
1286:   PetscMalloc2(matctx->subc->n*nloc0,&idx1,matctx->subc->n*nloc0,&idx2);
1287:   j = 0;
1288:   for (si=0;si<matctx->subc->n;si++) {
1289:     for (i=n0;i<m0;i++) {
1290:       idx1[j]   = i;
1291:       idx2[j++] = i+pep->n*si;
1292:     }
1293:   }
1294:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx1,PETSC_COPY_VALUES,&is1);
1295:   ISCreateGeneral(PetscObjectComm((PetscObject)pep),matctx->subc->n*nloc0,idx2,PETSC_COPY_VALUES,&is2);
1296:   VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_sub);
1297:   ISDestroy(&is1);
1298:   ISDestroy(&is2);
1299:   for (si=0;si<matctx->subc->n;si++) {
1300:     j=0;
1301:     for (i=n0;i<m0;i++) {
1302:       idx1[j] = i;
1303:       idx2[j++] = i+pep->n*si;
1304:     }
1305:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx1,PETSC_COPY_VALUES,&is1);
1306:     ISCreateGeneral(PetscObjectComm((PetscObject)pep),nloc0,idx2,PETSC_COPY_VALUES,&is2);
1307:     VecScatterCreate(v,is1,matctx->tg,is2,&matctx->scatter_id[si]);
1308:     ISDestroy(&is1);
1309:     ISDestroy(&is2);
1310:   }
1311:   BVRestoreColumn(pep->V,0,&v);
1312:   PetscFree2(idx1,idx2);

1314:   /* Duplicate pep->V vecs */
1315:   BVGetType(pep->V,&type);
1316:   BVCreate(PetscSubcommChild(matctx->subc),&matctx->V);
1317:   BVSetType(matctx->V,type);
1318:   BVSetSizesFromVec(matctx->V,matctx->t,k);
1319:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1320:     BVDuplicateResize(matctx->V,PetscMax(k,pep->nmat),&matctx->W);
1321:   }
1322:   for (i=0;i<k;i++) {
1323:     BVGetColumn(pep->V,i,&v);
1324:     VecScatterBegin(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1325:     VecScatterEnd(matctx->scatter_sub,v,matctx->tg,INSERT_VALUES,SCATTER_FORWARD);
1326:     BVRestoreColumn(pep->V,i,&v);
1327:     VecGetArrayRead(matctx->tg,&array);
1328:     VecPlaceArray(matctx->t,(const PetscScalar*)array);
1329:     BVInsertVec(matctx->V,i,matctx->t);
1330:     VecResetArray(matctx->t);
1331:     VecRestoreArrayRead(matctx->tg,&array);
1332:   }

1334:   VecDuplicate(matctx->t,&matctx->Rv);
1335:   VecDuplicate(matctx->t,&matctx->Vi);
1336:   if (flg) {
1337:     PetscFree(A);
1338:   }
1339:   return(0);
1340: }

1342: static PetscErrorCode NRefSubcommDestroy(PEP pep,MatExplicitCtx *matctx)
1343: {
1345:   PetscInt       i;

1348:   VecScatterDestroy(&matctx->scatter_sub);
1349:   for (i=0;i<matctx->subc->n;i++) {
1350:     VecScatterDestroy(&matctx->scatter_id[i]);
1351:   }
1352:   for (i=0;i<pep->nmat;i++) {
1353:     MatDestroy(&matctx->A[i]);
1354:   }
1355:   if (pep->scheme==PEP_REFINE_SCHEME_EXPLICIT) {
1356:     for (i=0;i<matctx->subc->n;i++) {
1357:       VecScatterDestroy(&matctx->scatterp_id[i]);
1358:     }
1359:     VecDestroy(&matctx->tp);
1360:     VecDestroy(&matctx->tpg);
1361:     BVDestroy(&matctx->W);
1362:   }
1363:   PetscFree3(matctx->A,matctx->scatter_id,matctx->scatterp_id);
1364:   BVDestroy(&matctx->V);
1365:   VecDestroy(&matctx->t);
1366:   VecDestroy(&matctx->tg);
1367:   VecDestroy(&matctx->Rv);
1368:   VecDestroy(&matctx->Vi);
1369:   return(0);
1370: }

1372: PetscErrorCode PEPNewtonRefinement_TOAR(PEP pep,PetscScalar sigma,PetscInt *maxits,PetscReal *tol,PetscInt k,PetscScalar *S,PetscInt lds)
1373: {
1374: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI)
1376:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI - Lapack routine is unavailable");
1377: #else
1379:   PetscScalar    *H,*work,*dH,*fH,*dVS;
1380:   PetscInt       ldh,i,j,its=1,nmat=pep->nmat,nsubc=pep->npart,rds;
1381:   PetscBLASInt   k_,ld_,*p,info;
1382:   BV             dV;
1383:   PetscBool      sinvert,flg;
1384:   MatExplicitCtx *matctx=NULL;
1385:   Vec            v;
1386:   Mat            M,P;
1387:   FSubctx        *ctx;

1390:   PetscLogEventBegin(PEP_Refine,pep,0,0,0);
1391:   if (k > pep->n) SETERRQ1(PetscObjectComm((PetscObject)pep),1,"Multiple Refinement available only for invariant pairs of dimension smaller than n=%D",pep->n);
1392:   /* the input tolerance is not being taken into account (by the moment) */
1393:   its = *maxits;
1394:   PetscMalloc3(k*k,&dH,nmat*k*k,&fH,k,&work);
1395:   DSGetLeadingDimension(pep->ds,&ldh);
1396:   DSGetArray(pep->ds,DS_MAT_A,&H);
1397:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1398:   PetscMalloc1(2*k*k,&dVS);
1399:   STGetTransform(pep->st,&flg);
1400:   if (!flg && pep->st && pep->ops->backtransform) { /* BackTransform */
1401:     PetscBLASIntCast(k,&k_);
1402:     PetscBLASIntCast(ldh,&ld_);
1403:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
1404:     if (sinvert) {
1405:       DSGetArray(pep->ds,DS_MAT_A,&H);
1406:       PetscMalloc1(k,&p);
1407:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,H,&ld_,p,&info));
1408:       SlepcCheckLapackInfo("getrf",info);
1409:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,H,&ld_,p,work,&k_,&info));
1410:       SlepcCheckLapackInfo("getri",info);
1411:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1412:       pep->ops->backtransform = NULL;
1413:     }
1414:     if (sigma!=0.0) {
1415:       DSGetArray(pep->ds,DS_MAT_A,&H);
1416:       for (i=0;i<k;i++) H[i+ldh*i] += sigma;
1417:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
1418:       pep->ops->backtransform = NULL;
1419:     }
1420:   }
1421:   if ((pep->scale==PEP_SCALE_BOTH || pep->scale==PEP_SCALE_SCALAR) && pep->sfactor!=1.0) {
1422:     DSGetArray(pep->ds,DS_MAT_A,&H);
1423:     for (j=0;j<k;j++) {
1424:       for (i=0;i<k;i++) H[i+j*ldh] *= pep->sfactor;
1425:     }
1426:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
1427:     if (!flg) {
1428:       /* Restore original values */
1429:       for (i=0;i<pep->nmat;i++){
1430:         pep->pbc[pep->nmat+i] *= pep->sfactor;
1431:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
1432:       }
1433:     }
1434:   }
1435:   if ((pep->scale==PEP_SCALE_DIAGONAL || pep->scale==PEP_SCALE_BOTH) && pep->Dr) {
1436:     for (i=0;i<k;i++) {
1437:       BVGetColumn(pep->V,i,&v);
1438:       VecPointwiseMult(v,v,pep->Dr);
1439:       BVRestoreColumn(pep->V,i,&v);
1440:     }
1441:   }
1442:   DSGetArray(pep->ds,DS_MAT_A,&H);

1444:   NRefOrthogStep(pep,k,H,ldh,fH,S,lds);
1445:   /* check if H is in Schur form */
1446:   for (i=0;i<k-1;i++) {
1447:     if (H[i+1+i*ldh]!=0.0) {
1448: #if !defined(PETSC_USE_COMPLEX)
1449:       SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires the complex Schur form of the projected matrix");
1450: #else
1451:       SETERRQ(PetscObjectComm((PetscObject)pep),1,"Iterative Refinement requires an upper triangular projected matrix");
1452: #endif
1453:     }
1454:   }
1455:   if (nsubc>k) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Amount of subcommunicators should not be larger than the invariant pair dimension");
1456:   BVSetActiveColumns(pep->V,0,k);
1457:   BVDuplicateResize(pep->V,k,&dV);
1458:   PetscLogObjectParent((PetscObject)pep,(PetscObject)dV);
1459:   if (pep->scheme!=PEP_REFINE_SCHEME_SCHUR) {
1460:     PetscMalloc1(1,&matctx);
1461:     if (nsubc>1) { /* spliting in subcommunicators */
1462:       matctx->subc = pep->refinesubc;
1463:       NRefSubcommSetup(pep,k,matctx,nsubc);
1464:     } else matctx->subc=NULL;
1465:   }

1467:   /* Loop performing iterative refinements */
1468:   for (i=0;i<its;i++) {
1469:     /* Pre-compute the polynomial basis evaluated in H */
1470:     PEPEvaluateBasisforMatrix(pep,nmat,k,H,ldh,fH);
1471:     PEPNRefSetUp(pep,k,H,ldh,matctx,PetscNot(i));
1472:     /* Solve the linear system */
1473:     PEPNRefForwardSubstitution(pep,k,S,lds,H,ldh,fH,dV,dVS,&rds,dH,k,pep->refineksp,matctx);
1474:     /* Update X (=V*S) and H, and orthogonalize [X;X*fH1;...;XfH(deg-1)] */
1475:     PEPNRefUpdateInvPair(pep,k,H,ldh,fH,dH,S,lds,dV,dVS,rds);
1476:   }
1477:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
1478:   if (!flg && sinvert) {
1479:     PetscFree(p);
1480:   }
1481:   PetscFree3(dH,fH,work);
1482:   PetscFree(dVS);
1483:   BVDestroy(&dV);
1484:   switch (pep->scheme) {
1485:   case PEP_REFINE_SCHEME_EXPLICIT:
1486:     for (i=0;i<2;i++) {
1487:       MatDestroy(&matctx->E[i]);
1488:     }
1489:     PetscFree4(matctx->idxp,matctx->idxg,matctx->map0,matctx->map1);
1490:     VecDestroy(&matctx->tN);
1491:     VecDestroy(&matctx->ttN);
1492:     VecDestroy(&matctx->t1);
1493:     if (nsubc>1) {
1494:       NRefSubcommDestroy(pep,matctx);
1495:     } else {
1496:       VecDestroy(&matctx->vseq);
1497:       VecScatterDestroy(&matctx->scatterctx);
1498:     }
1499:     PetscFree(matctx);
1500:     KSPGetOperators(pep->refineksp,&M,NULL);
1501:     MatDestroy(&M);
1502:     break;
1503:   case PEP_REFINE_SCHEME_MBE:
1504:     BVDestroy(&matctx->W);
1505:     BVDestroy(&matctx->Wt);
1506:     BVDestroy(&matctx->M2);
1507:     BVDestroy(&matctx->M3);
1508:     MatDestroy(&matctx->M1);
1509:     VecDestroy(&matctx->t);
1510:     PetscFree5(matctx->M4,matctx->w,matctx->wt,matctx->d,matctx->dt);
1511:     if (nsubc>1) {
1512:       NRefSubcommDestroy(pep,matctx);
1513:     }
1514:     PetscFree(matctx);
1515:     break;
1516:   case PEP_REFINE_SCHEME_SCHUR:
1517:     KSPGetOperators(pep->refineksp,&M,&P);
1518:     MatShellGetContext(M,&ctx);
1519:     PetscFree5(ctx->A,ctx->M4,ctx->pM4,ctx->work,ctx->fih);
1520:     MatDestroy(&ctx->M1);
1521:     BVDestroy(&ctx->M2);
1522:     BVDestroy(&ctx->M3);
1523:     BVDestroy(&ctx->W);
1524:     VecDestroy(&ctx->t);
1525:     PetscFree(ctx);
1526:     MatDestroy(&M);
1527:     MatDestroy(&P);
1528:     break;
1529:   }
1530:   PetscLogEventEnd(PEP_Refine,pep,0,0,0);
1531:   return(0);
1532: #endif
1533: }