Actual source code: ptoar.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc polynomial eigensolver: "toar"

 13:    Method: TOAR

 15:    Algorithm:

 17:        Two-Level Orthogonal Arnoldi.

 19:    References:

 21:        [1] Y. Su, J. Zhang and Z. Bai, "A compact Arnoldi algorithm for
 22:            polynomial eigenvalue problems", talk presented at RANMEP 2008.

 24:        [2] C. Campos and J.E. Roman, "Parallel Krylov solvers for the
 25:            polynomial eigenvalue problem in SLEPc", SIAM J. Sci. Comput.
 26:            38(5):S385-S411, 2016.

 28:        [3] D. Lu, Y. Su and Z. Bai, "Stability analysis of the two-level
 29:            orthogonal Arnoldi procedure", SIAM J. Matrix Anal. App.
 30:            37(1):195-214, 2016.
 31: */

 33: #include <slepc/private/pepimpl.h>    /*I "slepcpep.h" I*/
 34: #include "../src/pep/impls/krylov/pepkrylov.h"
 35: #include <slepcblaslapack.h>

 37: static PetscBool  cited = PETSC_FALSE;
 38: static const char citation[] =
 39:   "@Article{slepc-pep,\n"
 40:   "   author = \"C. Campos and J. E. Roman\",\n"
 41:   "   title = \"Parallel {Krylov} solvers for the polynomial eigenvalue problem in {SLEPc}\",\n"
 42:   "   journal = \"{SIAM} J. Sci. Comput.\",\n"
 43:   "   volume = \"38\",\n"
 44:   "   number = \"5\",\n"
 45:   "   pages = \"S385--S411\",\n"
 46:   "   year = \"2016,\"\n"
 47:   "   doi = \"https://doi.org/10.1137/15M1022458\"\n"
 48:   "}\n";

 50: PetscErrorCode PEPSetUp_TOAR(PEP pep)
 51: {
 53:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
 54:   PetscBool      shift,sinv,flg;
 55:   PetscInt       i;

 58:   pep->lineariz = PETSC_TRUE;
 59:   PEPSetDimensions_Default(pep,pep->nev,&pep->ncv,&pep->mpd);
 60:   if (!ctx->lock && pep->mpd<pep->ncv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Should not use mpd parameter in non-locking variant");
 61:   if (!pep->max_it) pep->max_it = PetscMax(100,2*(pep->nmat-1)*pep->n/pep->ncv);

 63:   PetscObjectTypeCompare((PetscObject)pep->st,STSHIFT,&shift);
 64:   PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
 65:   if (!shift && !sinv) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"Only STSHIFT and STSINVERT spectral transformations can be used");
 66:   if (!pep->which) {
 67:     if (sinv) pep->which = PEP_TARGET_MAGNITUDE;
 68:     else pep->which = PEP_LARGEST_MAGNITUDE;
 69:   }
 70:   if (pep->which==PEP_ALL) SETERRQ(PetscObjectComm((PetscObject)pep),1,"Wrong value of pep->which");
 71:   if (pep->problem_type!=PEP_GENERAL) {
 72:     PetscInfo(pep,"Problem type ignored, performing a non-symmetric linearization\n");
 73:   }

 75:   if (!ctx->keep) ctx->keep = 0.5;

 77:   PEPAllocateSolution(pep,pep->nmat-1);
 78:   PEPSetWorkVecs(pep,3);
 79:   DSSetType(pep->ds,DSNHEP);
 80:   DSSetExtraRow(pep->ds,PETSC_TRUE);
 81:   DSAllocate(pep->ds,pep->ncv+1);

 83:   PEPBasisCoefficients(pep,pep->pbc);
 84:   STGetTransform(pep->st,&flg);
 85:   if (!flg) {
 86:     PetscMalloc1(pep->nmat,&pep->solvematcoeffs);
 87:     PetscLogObjectMemory((PetscObject)pep,pep->nmat*sizeof(PetscScalar));
 88:     if (sinv) {
 89:       PEPEvaluateBasis(pep,pep->target,0,pep->solvematcoeffs,NULL);
 90:     } else {
 91:       for (i=0;i<pep->nmat-1;i++) pep->solvematcoeffs[i] = 0.0;
 92:       pep->solvematcoeffs[pep->nmat-1] = 1.0;
 93:     }
 94:   }
 95:   BVDestroy(&ctx->V);
 96:   BVCreateTensor(pep->V,pep->nmat-1,&ctx->V);
 97:   return(0);
 98: }

100: /*
101:   Extend the TOAR basis by applying the the matrix operator
102:   over a vector which is decomposed in the TOAR way
103:   Input:
104:     - pbc: array containing the polynomial basis coefficients
105:     - S,V: define the latest Arnoldi vector (nv vectors in V)
106:   Output:
107:     - t: new vector extending the TOAR basis
108:     - r: temporary coefficients to compute the TOAR coefficients
109:          for the new Arnoldi vector
110:   Workspace: t_ (two vectors)
111: */
112: static PetscErrorCode PEPTOARExtendBasis(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscScalar *S,PetscInt ls,PetscInt nv,BV V,Vec t,PetscScalar *r,PetscInt lr,Vec *t_)
113: {
115:   PetscInt       nmat=pep->nmat,deg=nmat-1,k,j,off=0,lss;
116:   Vec            v=t_[0],ve=t_[1],q=t_[2];
117:   PetscScalar    alpha=1.0,*ss,a;
118:   PetscReal      *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
119:   PetscBool      flg;

122:   BVSetActiveColumns(pep->V,0,nv);
123:   STGetTransform(pep->st,&flg);
124:   if (sinvert) {
125:     for (j=0;j<nv;j++) {
126:       if (deg>1) r[lr+j] = S[j]/ca[0];
127:       if (deg>2) r[2*lr+j] = (S[ls+j]+(sigma-cb[1])*r[lr+j])/ca[1];
128:     }
129:     for (k=2;k<deg-1;k++) {
130:       for (j=0;j<nv;j++) r[(k+1)*lr+j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
131:     }
132:     k = deg-1;
133:     for (j=0;j<nv;j++) r[j] = (S[k*ls+j]+(sigma-cb[k])*r[k*lr+j]-cg[k]*r[(k-1)*lr+j])/ca[k];
134:     ss = r; lss = lr; off = 1; alpha = -1.0; a = pep->sfactor;
135:   } else {
136:     ss = S; lss = ls; off = 0; alpha = -ca[deg-1]; a = 1.0;
137:   }
138:   BVMultVec(V,1.0,0.0,v,ss+off*lss);
139:   if (pep->Dr) { /* balancing */
140:     VecPointwiseMult(v,v,pep->Dr);
141:   }
142:   STMatMult(pep->st,off,v,q);
143:   VecScale(q,a);
144:   for (j=1+off;j<deg+off-1;j++) {
145:     BVMultVec(V,1.0,0.0,v,ss+j*lss);
146:     if (pep->Dr) {
147:       VecPointwiseMult(v,v,pep->Dr);
148:     }
149:     STMatMult(pep->st,j,v,t);
150:     a *= pep->sfactor;
151:     VecAXPY(q,a,t);
152:   }
153:   if (sinvert) {
154:     BVMultVec(V,1.0,0.0,v,ss);
155:     if (pep->Dr) {
156:       VecPointwiseMult(v,v,pep->Dr);
157:     }
158:     STMatMult(pep->st,deg,v,t);
159:     a *= pep->sfactor;
160:     VecAXPY(q,a,t);
161:   } else {
162:     BVMultVec(V,1.0,0.0,ve,ss+(deg-1)*lss);
163:     if (pep->Dr) {
164:       VecPointwiseMult(ve,ve,pep->Dr);
165:     }
166:     a *= pep->sfactor;
167:     STMatMult(pep->st,deg-1,ve,t);
168:     VecAXPY(q,a,t);
169:     a *= pep->sfactor;
170:   }
171:   if (flg || !sinvert) alpha /= a;
172:   STMatSolve(pep->st,q,t);
173:   VecScale(t,alpha);
174:   if (!sinvert) {
175:     if (cg[deg-1]!=0) { VecAXPY(t,cg[deg-1],v); }
176:     if (cb[deg-1]!=0) { VecAXPY(t,cb[deg-1],ve); }
177:   }
178:   if (pep->Dr) {
179:     VecPointwiseDivide(t,t,pep->Dr);
180:   }
181:   return(0);
182: }

184: /*
185:   Compute TOAR coefficients of the blocks of the new Arnoldi vector computed
186: */
187: static PetscErrorCode PEPTOARCoefficients(PEP pep,PetscBool sinvert,PetscScalar sigma,PetscInt nv,PetscScalar *S,PetscInt ls,PetscScalar *r,PetscInt lr,PetscScalar *x)
188: {
189:   PetscInt    k,j,nmat=pep->nmat,d=nmat-1;
190:   PetscReal   *ca=pep->pbc,*cb=pep->pbc+nmat,*cg=pep->pbc+2*nmat;
191:   PetscScalar t=1.0,tp=0.0,tt;

194:   if (sinvert) {
195:     for (k=1;k<d;k++) {
196:       tt = t;
197:       t = ((sigma-cb[k-1])*t-cg[k-1]*tp)/ca[k-1]; /* k-th basis polynomial */
198:       tp = tt;
199:       for (j=0;j<=nv;j++) r[k*lr+j] += t*x[j];
200:     }
201:   } else {
202:     for (j=0;j<=nv;j++) r[j] = (cb[0]-sigma)*S[j]+ca[0]*S[ls+j];
203:     for (k=1;k<d-1;k++) {
204:       for (j=0;j<=nv;j++) r[k*lr+j] = (cb[k]-sigma)*S[k*ls+j]+ca[k]*S[(k+1)*ls+j]+cg[k]*S[(k-1)*ls+j];
205:     }
206:     if (sigma!=0.0) for (j=0;j<=nv;j++) r[(d-1)*lr+j] -= sigma*S[(d-1)*ls+j];
207:   }
208:   return(0);
209: }

211: /*
212:   Compute a run of Arnoldi iterations dim(work)=ld
213: */
214: static PetscErrorCode PEPTOARrun(PEP pep,PetscScalar sigma,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscBool *breakdown,Vec *t_)
215: {
217:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
218:   PetscInt       j,m=*M,deg=pep->nmat-1,ld;
219:   PetscInt       lds,nqt,l;
220:   Vec            t;
221:   PetscReal      norm;
222:   PetscBool      flg,sinvert=PETSC_FALSE,lindep;
223:   PetscScalar    *x,*S;
224:   Mat            MS;

227:   BVTensorGetFactors(ctx->V,NULL,&MS);
228:   MatDenseGetArray(MS,&S);
229:   BVGetSizes(pep->V,NULL,NULL,&ld);
230:   lds = ld*deg;
231:   BVGetActiveColumns(pep->V,&l,&nqt);
232:   STGetTransform(pep->st,&flg);
233:   if (!flg) {
234:     /* spectral transformation handled by the solver */
235:     PetscObjectTypeCompareAny((PetscObject)pep->st,&flg,STSINVERT,STSHIFT,"");
236:     if (!flg) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_SUP,"ST type not supported for TOAR without transforming matrices");
237:     PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinvert);
238:   }
239:   BVSetActiveColumns(ctx->V,0,m);
240:   for (j=k;j<m;j++) {
241:     /* apply operator */
242:     BVGetColumn(pep->V,nqt,&t);
243:     PEPTOARExtendBasis(pep,sinvert,sigma,S+j*lds,ld,nqt,pep->V,t,S+(j+1)*lds,ld,t_);
244:     BVRestoreColumn(pep->V,nqt,&t);

246:     /* orthogonalize */
247:     if (sinvert) x = S+(j+1)*lds;
248:     else x = S+(deg-1)*ld+(j+1)*lds;
249:     BVOrthogonalizeColumn(pep->V,nqt,x,&norm,&lindep);
250:     if (!lindep) {
251:       x[nqt] = norm;
252:       BVScaleColumn(pep->V,nqt,1.0/norm);
253:       nqt++;
254:     }

256:     PEPTOARCoefficients(pep,sinvert,sigma,nqt-1,S+j*lds,ld,S+(j+1)*lds,ld,x);

258:     /* level-2 orthogonalization */
259:     BVOrthogonalizeColumn(ctx->V,j+1,H+j*ldh,&norm,breakdown);
260:     H[j+1+ldh*j] = norm;
261:     if (*breakdown) {
262:       *M = j+1;
263:       break;
264:     }
265:     BVScaleColumn(ctx->V,j+1,1.0/norm);
266:     BVSetActiveColumns(pep->V,l,nqt);
267:   }
268:   BVSetActiveColumns(ctx->V,0,*M);
269:   MatDenseRestoreArray(MS,&S);
270:   BVTensorRestoreFactors(ctx->V,NULL,&MS);
271:   return(0);
272: }

274: /*
275:   Computes T_j = phi_idx(T). In T_j and T_p are phi_{idx-1}(T)
276:    and phi_{idx-2}(T) respectively or null if idx=0,1.
277:    Tp and Tj are input/output arguments
278: */
279: static PetscErrorCode PEPEvaluateBasisM(PEP pep,PetscInt k,PetscScalar *T,PetscInt ldt,PetscInt idx,PetscScalar **Tp,PetscScalar **Tj)
280: {
282:   PetscInt       i;
283:   PetscReal      *ca,*cb,*cg;
284:   PetscScalar    *pt,g,a;
285:   PetscBLASInt   k_,ldt_;

288:   if (idx==0) {
289:     PetscMemzero(*Tj,k*k*sizeof(PetscScalar));
290:     PetscMemzero(*Tp,k*k*sizeof(PetscScalar));
291:     for (i=0;i<k;i++) (*Tj)[i+i*k] = 1.0;
292:   } else {
293:     PetscBLASIntCast(ldt,&ldt_);
294:     PetscBLASIntCast(k,&k_);
295:     ca = pep->pbc; cb = pep->pbc+pep->nmat; cg = pep->pbc+2*pep->nmat;
296:     for (i=0;i<k;i++) T[i*ldt+i] -= cb[idx-1];
297:     a = 1/ca[idx-1];
298:     g = (idx==1)?0.0:-cg[idx-1]/ca[idx-1];
299:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&k_,&k_,&k_,&a,T,&ldt_,*Tj,&k_,&g,*Tp,&k_));
300:     pt = *Tj; *Tj = *Tp; *Tp = pt;
301:     for (i=0;i<k;i++) T[i*ldt+i] += cb[idx-1];
302:   }
303:   return(0);
304: }

306: static PetscErrorCode PEPExtractInvariantPair(PEP pep,PetscScalar sigma,PetscInt sr,PetscInt k,PetscScalar *S,PetscInt ld,PetscInt deg,PetscScalar *H,PetscInt ldh)
307: {
308: #if defined(PETSC_MISSING_LAPACK_GESV) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(PETSC_MISSING_LAPACK_GETRF)
310:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESV/GETRI/GETRF - Lapack routine is unavailable");
311: #else
313:   PetscInt       i,j,jj,lds,ldt,d=pep->nmat-1,idxcpy=0;
314:   PetscScalar    *At,*Bt,*Hj,*Hp,*T,sone=1.0,g,a,*pM,*work;
315:   PetscBLASInt   k_,sr_,lds_,ldh_,info,*p,lwork,ldt_;
316:   PetscBool      transf=PETSC_FALSE,flg;
317:   PetscReal      nrm,norm,maxnrm,*rwork;
318:   BV             *R,Y;
319:   Mat            M,*A;
320:   Vec            v;

323:   if (k==0) return(0);
324:   lds = deg*ld;
325:   PetscCalloc6(k,&p,sr*k,&At,k*k,&Bt,k*k,&Hj,k*k,&Hp,sr*k,&work);
326:   PetscBLASIntCast(sr,&sr_);
327:   PetscBLASIntCast(k,&k_);
328:   PetscBLASIntCast(lds,&lds_);
329:   PetscBLASIntCast(ldh,&ldh_);
330:   STGetTransform(pep->st,&flg);
331:   if (!flg) {
332:      PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&flg);
333:     if (flg || sigma!=0.0) transf=PETSC_TRUE;
334:   }
335:   if (transf) {
336:     PetscMalloc1(k*k,&T);
337:     ldt = k;
338:     for (i=0;i<k;i++) {
339:       PetscMemcpy(T+k*i,H+i*ldh,k*sizeof(PetscScalar));
340:     }
341:     if (flg) {
342:       PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&k_,&k_,T,&k_,p,&info));
343:       SlepcCheckLapackInfo("getrf",info);
344:       PetscBLASIntCast(sr*k,&lwork);
345:       PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&k_,T,&k_,p,work,&lwork,&info));
346:       SlepcCheckLapackInfo("getri",info);
347:     }
348:     if (sigma!=0.0) for (i=0;i<k;i++) T[i+k*i] += sigma;
349:   } else {
350:     T = H; ldt = ldh;
351:   }
352:   PetscBLASIntCast(ldt,&ldt_);
353:   switch (pep->extract) {
354:   case PEP_EXTRACT_NONE:
355:     break;
356:   case PEP_EXTRACT_NORM:
357:     if (pep->basis == PEP_BASIS_MONOMIAL) {
358:       PetscBLASIntCast(ldt,&ldt_);
359:       PetscMalloc1(k,&rwork);
360:       norm = LAPACKlange_("F",&k_,&k_,T,&ldt_,rwork);
361:       PetscFree(rwork);
362:       if (norm>1.0) idxcpy = d-1;
363:     } else {
364:       PetscBLASIntCast(ldt,&ldt_);
365:       PetscMalloc1(k,&rwork);
366:       maxnrm = 0.0;
367:       for (i=0;i<pep->nmat-1;i++) {
368:         PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
369:         norm = LAPACKlange_("F",&k_,&k_,Hj,&k_,rwork);
370:         if (norm > maxnrm) {
371:           idxcpy = i;
372:           maxnrm = norm;
373:         }
374:       }
375:       PetscFree(rwork);
376:     }
377:     if (idxcpy>0) {
378:       /* copy block idxcpy of S to the first one */
379:       for (j=0;j<k;j++) {
380:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
381:       }
382:     }
383:     break;
384:   case PEP_EXTRACT_RESIDUAL:
385:     STGetTransform(pep->st,&flg);
386:     if (flg) {
387:       PetscMalloc1(pep->nmat,&A);
388:       for (i=0;i<pep->nmat;i++) {
389:         STGetMatrixTransformed(pep->st,i,A+i);
390:       }
391:     } else A = pep->A;
392:     PetscMalloc1(pep->nmat-1,&R);
393:     for (i=0;i<pep->nmat-1;i++) {
394:       BVDuplicateResize(pep->V,k,R+i);
395:     }
396:     BVDuplicateResize(pep->V,sr,&Y);
397:     MatCreateSeqDense(PETSC_COMM_SELF,sr,k,NULL,&M);
398:     g = 0.0; a = 1.0;
399:     BVSetActiveColumns(pep->V,0,sr);
400:     for (j=0;j<pep->nmat;j++) {
401:       BVMatMult(pep->V,A[j],Y);
402:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
403:       for (i=0;i<pep->nmat-1;i++) {
404:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sr_,&k_,&k_,&a,S+i*ld,&lds_,Hj,&k_,&g,At,&sr_));
405:         MatDenseGetArray(M,&pM);
406:         for (jj=0;jj<k;jj++) {
407:           PetscMemcpy(pM+jj*sr,At+jj*sr,sr*sizeof(PetscScalar));
408:         }
409:         MatDenseRestoreArray(M,&pM);
410:         BVMult(R[i],1.0,(i==0)?0.0:1.0,Y,M);
411:       }
412:     }

414:     /* frobenius norm */
415:     maxnrm = 0.0;
416:     for (i=0;i<pep->nmat-1;i++) {
417:       norm = 0.0;
418:       for (j=0;j<k;j++) {
419:         BVGetColumn(R[i],j,&v);
420:         VecNorm(v,NORM_2,&nrm);
421:         BVRestoreColumn(R[i],j,&v);
422:         norm += nrm*nrm;
423:       }
424:       norm = PetscSqrtReal(norm);
425:       if (maxnrm > norm) {
426:         maxnrm = norm;
427:         idxcpy = i;
428:       }
429:     }
430:     if (idxcpy>0) {
431:       /* copy block idxcpy of S to the first one */
432:       for (j=0;j<k;j++) {
433:         PetscMemcpy(S+j*lds,S+idxcpy*ld+j*lds,sr*sizeof(PetscScalar));
434:       }
435:     }
436:     if (flg) PetscFree(A);
437:     for (i=0;i<pep->nmat-1;i++) {
438:       BVDestroy(&R[i]);
439:     }
440:     PetscFree(R);
441:     BVDestroy(&Y);
442:     MatDestroy(&M);
443:     break;
444:   case PEP_EXTRACT_STRUCTURED:
445:     for (j=0;j<k;j++) Bt[j+j*k] = 1.0;
446:     for (j=0;j<sr;j++) {
447:       for (i=0;i<k;i++) At[j*k+i] = PetscConj(S[i*lds+j]);
448:     }
449:     PEPEvaluateBasisM(pep,k,T,ldt,0,&Hp,&Hj);
450:     for (i=1;i<deg;i++) {
451:       PEPEvaluateBasisM(pep,k,T,ldt,i,&Hp,&Hj);
452:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&sr_,&k_,&sone,Hj,&k_,S+i*ld,&lds_,&sone,At,&k_));
453:       PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&k_,&k_,&k_,&sone,Hj,&k_,Hj,&k_,&sone,Bt,&k_));
454:     }
455:     PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&k_,&sr_,Bt,&k_,p,At,&k_,&info));
456:     SlepcCheckLapackInfo("gesv",info);
457:     for (j=0;j<sr;j++) {
458:       for (i=0;i<k;i++) S[i*lds+j] = PetscConj(At[j*k+i]);
459:     }
460:     break;
461:   }
462:   if (transf) { PetscFree(T); }
463:   PetscFree6(p,At,Bt,Hj,Hp,work);
464:   return(0);
465: #endif
466: }

468: PetscErrorCode PEPSolve_TOAR(PEP pep)
469: {
471:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
472:   PetscInt       i,j,k,l,nv=0,ld,lds,ldds,newn,nq=0,nconv=0;
473:   PetscInt       nmat=pep->nmat,deg=nmat-1;
474:   PetscScalar    *S,*H,sigma;
475:   PetscReal      beta;
476:   PetscBool      breakdown=PETSC_FALSE,flg,falselock=PETSC_FALSE,sinv=PETSC_FALSE;
477:   Mat            MS,MQ;

480:   PetscCitationsRegister(citation,&cited);
481:   if (ctx->lock) {
482:     PetscOptionsGetBool(NULL,NULL,"-pep_toar_falselocking",&falselock,NULL);
483:   }
484:   DSGetLeadingDimension(pep->ds,&ldds);
485:   STGetShift(pep->st,&sigma);

487:   /* update polynomial basis coefficients */
488:   STGetTransform(pep->st,&flg);
489:   if (pep->sfactor!=1.0) {
490:     for (i=0;i<nmat;i++) {
491:       pep->pbc[nmat+i] /= pep->sfactor;
492:       pep->pbc[2*nmat+i] /= pep->sfactor*pep->sfactor;
493:     }
494:     if (!flg) {
495:       pep->target /= pep->sfactor;
496:       RGPushScale(pep->rg,1.0/pep->sfactor);
497:       STScaleShift(pep->st,1.0/pep->sfactor);
498:       sigma /= pep->sfactor;
499:     } else {
500:       PetscObjectTypeCompare((PetscObject)pep->st,STSINVERT,&sinv);
501:       pep->target = sinv?pep->target*pep->sfactor:pep->target/pep->sfactor;
502:       RGPushScale(pep->rg,sinv?pep->sfactor:1.0/pep->sfactor);
503:       STScaleShift(pep->st,sinv?pep->sfactor:1.0/pep->sfactor);
504:     }
505:   }

507:   if (flg) sigma = 0.0;

509:   /* clean projected matrix (including the extra-arrow) */
510:   DSGetArray(pep->ds,DS_MAT_A,&H);
511:   PetscMemzero(H,ldds*ldds*sizeof(PetscScalar));
512:   DSRestoreArray(pep->ds,DS_MAT_A,&H);
513:   
514:   /* Get the starting Arnoldi vector */
515:   BVTensorBuildFirstColumn(ctx->V,pep->nini);

517:   /* restart loop */
518:   l = 0;
519:   while (pep->reason == PEP_CONVERGED_ITERATING) {
520:     pep->its++;

522:     /* compute an nv-step Lanczos factorization */
523:     nv = PetscMax(PetscMin(nconv+pep->mpd,pep->ncv),nv);
524:     DSGetArray(pep->ds,DS_MAT_A,&H);
525:     PEPTOARrun(pep,sigma,H,ldds,pep->nconv+l,&nv,&breakdown,pep->work);
526:     beta = PetscAbsScalar(H[(nv-1)*ldds+nv]);
527:     DSRestoreArray(pep->ds,DS_MAT_A,&H);
528:     DSSetDimensions(pep->ds,nv,0,pep->nconv,pep->nconv+l);
529:     if (l==0) {
530:       DSSetState(pep->ds,DS_STATE_INTERMEDIATE);
531:     } else {
532:       DSSetState(pep->ds,DS_STATE_RAW);
533:     }
534:     BVSetActiveColumns(ctx->V,pep->nconv,nv);

536:     /* solve projected problem */
537:     DSSolve(pep->ds,pep->eigr,pep->eigi);
538:     DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
539:     DSUpdateExtraRow(pep->ds);
540:     DSSynchronize(pep->ds,pep->eigr,pep->eigi);

542:     /* check convergence */
543:     PEPKrylovConvergence(pep,PETSC_FALSE,pep->nconv,nv-pep->nconv,beta,&k);
544:     (*pep->stopping)(pep,pep->its,pep->max_it,k,pep->nev,&pep->reason,pep->stoppingctx);

546:     /* update l */
547:     if (pep->reason != PEP_CONVERGED_ITERATING || breakdown) l = 0;
548:     else {
549:       l = (nv==k)?0:PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
550:       if (!breakdown) {
551:         /* prepare the Rayleigh quotient for restart */
552:         DSTruncate(pep->ds,k+l);
553:         DSGetDimensions(pep->ds,&newn,NULL,NULL,NULL,NULL);
554:         l = newn-k;
555:       }
556:     }
557:     nconv = k;
558:     if (!ctx->lock && pep->reason == PEP_CONVERGED_ITERATING && !breakdown) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

560:     /* update S */
561:     DSGetMat(pep->ds,DS_MAT_Q,&MQ);
562:     BVMultInPlace(ctx->V,MQ,pep->nconv,k+l);
563:     MatDestroy(&MQ);

565:     /* copy last column of S */
566:     BVCopyColumn(ctx->V,nv,k+l);

568:     if (breakdown && pep->reason == PEP_CONVERGED_ITERATING) {
569:       /* stop if breakdown */
570:       PetscInfo2(pep,"Breakdown TOAR method (it=%D norm=%g)\n",pep->its,(double)beta);
571:       pep->reason = PEP_DIVERGED_BREAKDOWN;
572:     }
573:     if (pep->reason != PEP_CONVERGED_ITERATING) l--; 
574:     /* truncate S */
575:     BVGetActiveColumns(pep->V,NULL,&nq);
576:     if (k+l+deg<=nq) {
577:       BVSetActiveColumns(ctx->V,pep->nconv,k+l+1);
578:       if (!falselock && ctx->lock) {
579:         BVTensorCompress(ctx->V,k-pep->nconv);
580:       } else {
581:         BVTensorCompress(ctx->V,0);
582:       }
583:     }
584:     pep->nconv = k;
585:     PEPMonitor(pep,pep->its,nconv,pep->eigr,pep->eigi,pep->errest,nv);
586:   }
587:   if (pep->nconv>0) {
588:     /* {V*S_nconv^i}_{i=0}^{d-1} has rank nconv instead of nconv+d-1. Force zeros in each S_nconv^i block */
589:     BVSetActiveColumns(ctx->V,0,pep->nconv);
590:     BVGetActiveColumns(pep->V,NULL,&nq);
591:     BVSetActiveColumns(pep->V,0,nq);
592:     if (nq>pep->nconv) {
593:       BVTensorCompress(ctx->V,pep->nconv);
594:       BVSetActiveColumns(pep->V,0,pep->nconv);
595:       nq = pep->nconv;
596:     }

598:     /* perform Newton refinement if required */
599:     if (pep->refine==PEP_REFINE_MULTIPLE && pep->rits>0) {
600:       /* extract invariant pair */
601:       BVTensorGetFactors(ctx->V,NULL,&MS);
602:       MatDenseGetArray(MS,&S);
603:       DSGetArray(pep->ds,DS_MAT_A,&H);
604:       BVGetSizes(pep->V,NULL,NULL,&ld);
605:       lds = deg*ld;
606:       PEPExtractInvariantPair(pep,sigma,nq,pep->nconv,S,ld,deg,H,ldds);
607:       DSRestoreArray(pep->ds,DS_MAT_A,&H);
608:       DSSetDimensions(pep->ds,pep->nconv,0,0,0);
609:       DSSetState(pep->ds,DS_STATE_RAW);
610:       PEPNewtonRefinement_TOAR(pep,sigma,&pep->rits,NULL,pep->nconv,S,lds);
611:       DSSolve(pep->ds,pep->eigr,pep->eigi);
612:       DSSort(pep->ds,pep->eigr,pep->eigi,NULL,NULL,NULL);
613:       DSSynchronize(pep->ds,pep->eigr,pep->eigi);
614:       DSGetMat(pep->ds,DS_MAT_Q,&MQ);
615:       BVMultInPlace(ctx->V,MQ,0,pep->nconv);
616:       MatDestroy(&MQ);
617:       MatDenseRestoreArray(MS,&S);
618:       BVTensorRestoreFactors(ctx->V,NULL,&MS);
619:     }
620:   }
621:   STGetTransform(pep->st,&flg);
622:   if (pep->refine!=PEP_REFINE_MULTIPLE || pep->rits==0) {
623:     if (!flg && pep->ops->backtransform) {
624:         (*pep->ops->backtransform)(pep);
625:     }
626:     if (pep->sfactor!=1.0) {
627:       for (j=0;j<pep->nconv;j++) {
628:         pep->eigr[j] *= pep->sfactor;
629:         pep->eigi[j] *= pep->sfactor;
630:       }
631:       /* restore original values */
632:       for (i=0;i<pep->nmat;i++){
633:         pep->pbc[pep->nmat+i] *= pep->sfactor;
634:         pep->pbc[2*pep->nmat+i] *= pep->sfactor*pep->sfactor;
635:       }
636:     }
637:   }
638:   /* restore original values */
639:   if (!flg) {
640:     pep->target *= pep->sfactor;
641:     STScaleShift(pep->st,pep->sfactor);
642:   } else {
643:     STScaleShift(pep->st,sinv?1.0/pep->sfactor:pep->sfactor);
644:     pep->target = (sinv)?pep->target/pep->sfactor:pep->target*pep->sfactor;
645:   }
646:   if (pep->sfactor!=1.0) { RGPopScale(pep->rg); }

648:   /* change the state to raw so that DSVectors() computes eigenvectors from scratch */
649:   DSSetDimensions(pep->ds,pep->nconv,0,0,0);
650:   DSSetState(pep->ds,DS_STATE_RAW);
651:   return(0);
652: }

654: static PetscErrorCode PEPTOARSetRestart_TOAR(PEP pep,PetscReal keep)
655: {
656:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

659:   if (keep==PETSC_DEFAULT) ctx->keep = 0.5;
660:   else {
661:     if (keep<0.1 || keep>0.9) SETERRQ(PetscObjectComm((PetscObject)pep),PETSC_ERR_ARG_OUTOFRANGE,"The keep argument must be in the range [0.1,0.9]");
662:     ctx->keep = keep;
663:   }
664:   return(0);
665: }

667: /*@
668:    PEPTOARSetRestart - Sets the restart parameter for the TOAR
669:    method, in particular the proportion of basis vectors that must be kept
670:    after restart.

672:    Logically Collective on PEP

674:    Input Parameters:
675: +  pep  - the eigenproblem solver context
676: -  keep - the number of vectors to be kept at restart

678:    Options Database Key:
679: .  -pep_toar_restart - Sets the restart parameter

681:    Notes:
682:    Allowed values are in the range [0.1,0.9]. The default is 0.5.

684:    Level: advanced

686: .seealso: PEPTOARGetRestart()
687: @*/
688: PetscErrorCode PEPTOARSetRestart(PEP pep,PetscReal keep)
689: {

695:   PetscTryMethod(pep,"PEPTOARSetRestart_C",(PEP,PetscReal),(pep,keep));
696:   return(0);
697: }

699: static PetscErrorCode PEPTOARGetRestart_TOAR(PEP pep,PetscReal *keep)
700: {
701:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

704:   *keep = ctx->keep;
705:   return(0);
706: }

708: /*@
709:    PEPTOARGetRestart - Gets the restart parameter used in the TOAR method.

711:    Not Collective

713:    Input Parameter:
714: .  pep - the eigenproblem solver context

716:    Output Parameter:
717: .  keep - the restart parameter

719:    Level: advanced

721: .seealso: PEPTOARSetRestart()
722: @*/
723: PetscErrorCode PEPTOARGetRestart(PEP pep,PetscReal *keep)
724: {

730:   PetscUseMethod(pep,"PEPTOARGetRestart_C",(PEP,PetscReal*),(pep,keep));
731:   return(0);
732: }

734: static PetscErrorCode PEPTOARSetLocking_TOAR(PEP pep,PetscBool lock)
735: {
736:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

739:   ctx->lock = lock;
740:   return(0);
741: }

743: /*@
744:    PEPTOARSetLocking - Choose between locking and non-locking variants of
745:    the TOAR method.

747:    Logically Collective on PEP

749:    Input Parameters:
750: +  pep  - the eigenproblem solver context
751: -  lock - true if the locking variant must be selected

753:    Options Database Key:
754: .  -pep_toar_locking - Sets the locking flag

756:    Notes:
757:    The default is to lock converged eigenpairs when the method restarts.
758:    This behaviour can be changed so that all directions are kept in the
759:    working subspace even if already converged to working accuracy (the
760:    non-locking variant).

762:    Level: advanced

764: .seealso: PEPTOARGetLocking()
765: @*/
766: PetscErrorCode PEPTOARSetLocking(PEP pep,PetscBool lock)
767: {

773:   PetscTryMethod(pep,"PEPTOARSetLocking_C",(PEP,PetscBool),(pep,lock));
774:   return(0);
775: }

777: static PetscErrorCode PEPTOARGetLocking_TOAR(PEP pep,PetscBool *lock)
778: {
779:   PEP_TOAR *ctx = (PEP_TOAR*)pep->data;

782:   *lock = ctx->lock;
783:   return(0);
784: }

786: /*@
787:    PEPTOARGetLocking - Gets the locking flag used in the TOAR method.

789:    Not Collective

791:    Input Parameter:
792: .  pep - the eigenproblem solver context

794:    Output Parameter:
795: .  lock - the locking flag

797:    Level: advanced

799: .seealso: PEPTOARSetLocking()
800: @*/
801: PetscErrorCode PEPTOARGetLocking(PEP pep,PetscBool *lock)
802: {

808:   PetscUseMethod(pep,"PEPTOARGetLocking_C",(PEP,PetscBool*),(pep,lock));
809:   return(0);
810: }

812: PetscErrorCode PEPSetFromOptions_TOAR(PetscOptionItems *PetscOptionsObject,PEP pep)
813: {
815:   PetscBool      flg,lock;
816:   PetscReal      keep;

819:   PetscOptionsHead(PetscOptionsObject,"PEP TOAR Options");

821:     PetscOptionsReal("-pep_toar_restart","Proportion of vectors kept after restart","PEPTOARSetRestart",0.5,&keep,&flg);
822:     if (flg) { PEPTOARSetRestart(pep,keep); }

824:     PetscOptionsBool("-pep_toar_locking","Choose between locking and non-locking variants","PEPTOARSetLocking",PETSC_FALSE,&lock,&flg);
825:     if (flg) { PEPTOARSetLocking(pep,lock); }

827:   PetscOptionsTail();
828:   return(0);
829: }

831: PetscErrorCode PEPView_TOAR(PEP pep,PetscViewer viewer)
832: {
834:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;
835:   PetscBool      isascii;

838:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
839:   if (isascii) {
840:     PetscViewerASCIIPrintf(viewer,"  %d%% of basis vectors kept after restart\n",(int)(100*ctx->keep));
841:     PetscViewerASCIIPrintf(viewer,"  using the %slocking variant\n",ctx->lock?"":"non-");
842:   }
843:   return(0);
844: }

846: PetscErrorCode PEPDestroy_TOAR(PEP pep)
847: {
849:   PEP_TOAR       *ctx = (PEP_TOAR*)pep->data;

852:   BVDestroy(&ctx->V);
853:   PetscFree(pep->data);
854:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",NULL);
855:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",NULL);
856:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",NULL);
857:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",NULL);
858:   return(0);
859: }

861: SLEPC_EXTERN PetscErrorCode PEPCreate_TOAR(PEP pep)
862: {
863:   PEP_TOAR       *ctx;

867:   PetscNewLog(pep,&ctx);
868:   pep->data = (void*)ctx;
869:   ctx->lock = PETSC_TRUE;

871:   pep->ops->solve          = PEPSolve_TOAR;
872:   pep->ops->setup          = PEPSetUp_TOAR;
873:   pep->ops->setfromoptions = PEPSetFromOptions_TOAR;
874:   pep->ops->destroy        = PEPDestroy_TOAR;
875:   pep->ops->view           = PEPView_TOAR;
876:   pep->ops->backtransform  = PEPBackTransform_Default;
877:   pep->ops->computevectors = PEPComputeVectors_Default;
878:   pep->ops->extractvectors = PEPExtractVectors_TOAR;

880:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetRestart_C",PEPTOARSetRestart_TOAR);
881:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetRestart_C",PEPTOARGetRestart_TOAR);
882:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARSetLocking_C",PEPTOARSetLocking_TOAR);
883:   PetscObjectComposeFunction((PetscObject)pep,"PEPTOARGetLocking_C",PEPTOARGetLocking_TOAR);
884:   return(0);
885: }