Actual source code: epskrylov.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Common subroutines for all Krylov-type solvers
 12: */

 14: #include <slepc/private/epsimpl.h>
 15: #include <slepc/private/slepcimpl.h>
 16: #include <slepcblaslapack.h>

 18: /*
 19:    EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
 20:    columns are assumed to be locked and therefore they are not modified. On
 21:    exit, the following relation is satisfied:

 23:                     OP * V - V * H = beta*v_m * e_m^T

 25:    where the columns of V are the Arnoldi vectors (which are B-orthonormal),
 26:    H is an upper Hessenberg matrix, e_m is the m-th vector of the canonical basis.
 27:    On exit, beta contains the B-norm of V[m] before normalization.
 28: */
 29: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscBool trans,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
 30: {
 32:   PetscScalar    *a;
 33:   PetscInt       j,nc,n,m = *M;
 34:   Vec            vj,vj1,buf;
 35:   BV             U;

 38:   U = (trans)?eps->W:eps->V;
 39:   BVSetActiveColumns(U,0,m);
 40:   for (j=k;j<m;j++) {
 41:     BVGetColumn(U,j,&vj);
 42:     BVGetColumn(U,j+1,&vj1);
 43:     if (trans) {
 44:       VecConjugate(vj);
 45:       STApplyTranspose(eps->st,vj,vj1);
 46:       VecConjugate(vj);
 47:       VecConjugate(vj1);
 48:     } else {
 49:       STApply(eps->st,vj,vj1);
 50:     }
 51:     BVRestoreColumn(U,j,&vj);
 52:     BVRestoreColumn(U,j+1,&vj1);
 53:     BVOrthonormalizeColumn(U,j+1,PETSC_FALSE,beta,breakdown);
 54:     if (*breakdown) {
 55:       *M = j+1;
 56:       break;
 57:     }
 58:   }
 59:   /* extract Hessenberg matrix from the BV object */
 60:   BVGetNumConstraints(U,&nc);
 61:   BVGetSizes(U,NULL,NULL,&n);
 62:   BVGetBufferVec(U,&buf);
 63:   VecGetArray(buf,&a);
 64:   for (j=k;j<*M;j++) {
 65:     PetscMemcpy(H+j*ldh,a+nc+(j+1)*(nc+n),(j+2)*sizeof(PetscScalar));
 66:   }
 67:   VecRestoreArray(buf,&a);
 68:   return(0);
 69: }

 71: /*
 72:    EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
 73:    performs the computation in a different way. The main idea is that
 74:    reorthogonalization is delayed to the next Arnoldi step. This version is
 75:    more scalable but in some cases convergence may stagnate.
 76: */
 77: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
 78: {
 80:   PetscInt       i,j,m=*M;
 81:   Vec            u,t;
 82:   PetscScalar    shh[100],*lhh,dot,dot2;
 83:   PetscReal      norm1=0.0,norm2=1.0;
 84:   Vec            vj,vj1,vj2;

 87:   if (m<=100) lhh = shh;
 88:   else {
 89:     PetscMalloc1(m,&lhh);
 90:   }
 91:   BVCreateVec(eps->V,&u);
 92:   BVCreateVec(eps->V,&t);

 94:   BVSetActiveColumns(eps->V,0,m);
 95:   for (j=k;j<m;j++) {
 96:     BVGetColumn(eps->V,j,&vj);
 97:     BVGetColumn(eps->V,j+1,&vj1);
 98:     STApply(eps->st,vj,vj1);
 99:     BVRestoreColumn(eps->V,j,&vj);
100:     BVRestoreColumn(eps->V,j+1,&vj1);

102:     BVDotColumnBegin(eps->V,j+1,H+ldh*j);
103:     if (j>k) {
104:       BVDotColumnBegin(eps->V,j,lhh);
105:       BVGetColumn(eps->V,j,&vj);
106:       VecDotBegin(vj,vj,&dot);
107:     }
108:     if (j>k+1) {
109:       BVNormVecBegin(eps->V,u,NORM_2,&norm2);
110:       BVGetColumn(eps->V,j-2,&vj2);
111:       VecDotBegin(u,vj2,&dot2);
112:     }

114:     BVDotColumnEnd(eps->V,j+1,H+ldh*j);
115:     if (j>k) {
116:       BVDotColumnEnd(eps->V,j,lhh);
117:       VecDotEnd(vj,vj,&dot);
118:       BVRestoreColumn(eps->V,j,&vj);
119:     }
120:     if (j>k+1) {
121:       BVNormVecEnd(eps->V,u,NORM_2,&norm2);
122:       VecDotEnd(u,vj2,&dot2);
123:       BVRestoreColumn(eps->V,j-2,&vj2);
124:     }

126:     if (j>k) {
127:       norm1 = PetscSqrtReal(PetscRealPart(dot));
128:       for (i=0;i<j;i++)
129:         H[ldh*j+i] = H[ldh*j+i]/norm1;
130:       H[ldh*j+j] = H[ldh*j+j]/dot;

132:       BVCopyVec(eps->V,j,t);
133:       BVScaleColumn(eps->V,j,1.0/norm1);
134:       BVScaleColumn(eps->V,j+1,1.0/norm1);
135:     }

137:     BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);

139:     if (j>k) {
140:       BVSetActiveColumns(eps->V,0,j);
141:       BVMultVec(eps->V,-1.0,1.0,t,lhh);
142:       BVSetActiveColumns(eps->V,0,m);
143:       for (i=0;i<j;i++)
144:         H[ldh*(j-1)+i] += lhh[i];
145:     }

147:     if (j>k+1) {
148:       BVGetColumn(eps->V,j-1,&vj1);
149:       VecCopy(u,vj1);
150:       BVRestoreColumn(eps->V,j-1,&vj1);
151:       BVScaleColumn(eps->V,j-1,1.0/norm2);
152:       H[ldh*(j-2)+j-1] = norm2;
153:     }

155:     if (j<m-1) {
156:       VecCopy(t,u);
157:     }
158:   }

160:   BVNormVec(eps->V,t,NORM_2,&norm2);
161:   VecScale(t,1.0/norm2);
162:   BVGetColumn(eps->V,m-1,&vj1);
163:   VecCopy(t,vj1);
164:   BVRestoreColumn(eps->V,m-1,&vj1);
165:   H[ldh*(m-2)+m-1] = norm2;

167:   BVDotColumn(eps->V,m,lhh);

169:   BVMultColumn(eps->V,-1.0,1.0,m,lhh);
170:   for (i=0;i<m;i++)
171:     H[ldh*(m-1)+i] += lhh[i];

173:   BVNormColumn(eps->V,m,NORM_2,beta);
174:   BVScaleColumn(eps->V,m,1.0 / *beta);
175:   *breakdown = PETSC_FALSE;

177:   if (m>100) { PetscFree(lhh); }
178:   VecDestroy(&u);
179:   VecDestroy(&t);
180:   return(0);
181: }

183: /*
184:    EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi,
185:    but without reorthogonalization (only delayed normalization).
186: */
187: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
188: {
190:   PetscInt       i,j,m=*M;
191:   PetscScalar    dot;
192:   PetscReal      norm=0.0;
193:   Vec            vj,vj1;

196:   BVSetActiveColumns(eps->V,0,m);
197:   for (j=k;j<m;j++) {
198:     BVGetColumn(eps->V,j,&vj);
199:     BVGetColumn(eps->V,j+1,&vj1);
200:     STApply(eps->st,vj,vj1);
201:     BVRestoreColumn(eps->V,j+1,&vj1);
202:     BVDotColumnBegin(eps->V,j+1,H+ldh*j);
203:     if (j>k) {
204:       VecDotBegin(vj,vj,&dot);
205:     }
206:     BVDotColumnEnd(eps->V,j+1,H+ldh*j);
207:     if (j>k) {
208:       VecDotEnd(vj,vj,&dot);
209:     }
210:     BVRestoreColumn(eps->V,j,&vj);

212:     if (j>k) {
213:       norm = PetscSqrtReal(PetscRealPart(dot));
214:       BVScaleColumn(eps->V,j,1.0/norm);
215:       H[ldh*(j-1)+j] = norm;

217:       for (i=0;i<j;i++)
218:         H[ldh*j+i] = H[ldh*j+i]/norm;
219:       H[ldh*j+j] = H[ldh*j+j]/dot;
220:       BVScaleColumn(eps->V,j+1,1.0/norm);
221:       *beta = norm;
222:     }
223:     BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
224:   }

226:   *breakdown = PETSC_FALSE;
227:   return(0);
228: }

230: /*
231:    EPSKrylovConvergence_Filter - Specialized version for STFILTER.
232: */
233: PetscErrorCode EPSKrylovConvergence_Filter(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal gamma,PetscInt *kout)
234: {
236:   PetscInt       k,ninside,nconv;
237:   PetscScalar    re,im;
238:   PetscReal      resnorm;

241:   ninside = 0;   /* count how many eigenvalues are located in the interval */
242:   for (k=kini;k<kini+nits;k++) {
243:     if (PetscRealPart(eps->eigr[k]) < gamma) break;
244:     ninside++;
245:   }
246:   eps->nev = ninside+kini;  /* adjust eigenvalue count */
247:   nconv = 0;   /* count how many eigenvalues satisfy the convergence criterion */
248:   for (k=kini;k<kini+ninside;k++) {
249:     /* eigenvalue */
250:     re = eps->eigr[k];
251:     im = eps->eigi[k];
252:     DSVectors(eps->ds,DS_MAT_X,&k,&resnorm);
253:     resnorm *= beta;
254:     /* error estimate */
255:     (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
256:     if (eps->errest[k] < eps->tol) nconv++;
257:     else break;
258:   }
259:   *kout = kini+nconv;
260:   PetscInfo4(eps,"Found %D eigenvalue approximations inside the inverval (gamma=%g), k=%D nconv=%D\n",ninside,(double)gamma,k,nconv);
261:   return(0);
262: }

264: /*
265:    EPSKrylovConvergence - Implements the loop that checks for convergence
266:    in Krylov methods.

268:    Input Parameters:
269:      eps   - the eigensolver; some error estimates are updated in eps->errest
270:      getall - whether all residuals must be computed
271:      kini  - initial value of k (the loop variable)
272:      nits  - number of iterations of the loop
273:      V     - set of basis vectors (used only if trueresidual is activated)
274:      nv    - number of vectors to process (dimension of Q, columns of V)
275:      beta  - norm of f (the residual vector of the Arnoldi/Lanczos factorization)
276:      corrf - correction factor for residual estimates (only in harmonic KS)

278:    Output Parameters:
279:      kout  - the first index where the convergence test failed
280: */
281: PetscErrorCode EPSKrylovConvergence(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal betat,PetscReal corrf,PetscInt *kout)
282: {
284:   PetscInt       k,newk,marker,ld,inside;
285:   PetscScalar    re,im,*Zr,*Zi,*X;
286:   PetscReal      resnorm,gamma,lerrest;
287:   PetscBool      isshift,isfilter,refined,istrivial;
288:   Vec            x=NULL,y=NULL,w[3];

291:   if (eps->which == EPS_ALL) {
292:     PetscObjectTypeCompare((PetscObject)eps->st,STFILTER,&isfilter);
293:     if (isfilter) {
294:       STFilterGetThreshold(eps->st,&gamma);
295:       EPSKrylovConvergence_Filter(eps,getall,kini,nits,beta,gamma,kout);
296:       return(0);
297:     }
298:   }
299:   RGIsTrivial(eps->rg,&istrivial);
300:   if (eps->trueres) {
301:     BVCreateVec(eps->V,&x);
302:     BVCreateVec(eps->V,&y);
303:     BVCreateVec(eps->V,&w[0]);
304:     BVCreateVec(eps->V,&w[2]);
305: #if !defined(PETSC_USE_COMPLEX)
306:     BVCreateVec(eps->V,&w[1]);
307: #else
308:     w[1] = NULL;
309: #endif
310:   }
311:   DSGetLeadingDimension(eps->ds,&ld);
312:   DSGetRefined(eps->ds,&refined);
313:   PetscObjectTypeCompare((PetscObject)eps->st,STSHIFT,&isshift);
314:   marker = -1;
315:   if (eps->trackall) getall = PETSC_TRUE;
316:   for (k=kini;k<kini+nits;k++) {
317:     /* eigenvalue */
318:     re = eps->eigr[k];
319:     im = eps->eigi[k];
320:     if (!istrivial || eps->trueres || isshift || eps->conv==EPS_CONV_NORM) {
321:       STBackTransform(eps->st,1,&re,&im);
322:     }
323:     if (!istrivial) {
324:       RGCheckInside(eps->rg,1,&re,&im,&inside);
325:       if (marker==-1 && inside<0) marker = k;
326:       if (!(eps->trueres || isshift || eps->conv==EPS_CONV_NORM)) {  /* make sure eps->converged below uses the right value */
327:         re = eps->eigr[k];
328:         im = eps->eigi[k];
329:       }
330:     }
331:     newk = k;
332:     DSVectors(eps->ds,DS_MAT_X,&newk,&resnorm);
333:     if (eps->trueres) {
334:       DSGetArray(eps->ds,DS_MAT_X,&X);
335:       Zr = X+k*ld;
336:       if (newk==k+1) Zi = X+newk*ld;
337:       else Zi = NULL;
338:       EPSComputeRitzVector(eps,Zr,Zi,eps->V,x,y);
339:       DSRestoreArray(eps->ds,DS_MAT_X,&X);
340:       EPSComputeResidualNorm_Private(eps,PETSC_FALSE,re,im,x,y,w,&resnorm);
341:     }
342:     else if (!refined) resnorm *= beta*corrf;
343:     /* error estimate */
344:     (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
345:     if (marker==-1 && eps->errest[k] >= eps->tol) marker = k;
346:     if (eps->twosided) {
347:       newk = k;
348:       DSVectors(eps->dsts,DS_MAT_X,&newk,&resnorm);
349:       resnorm *= betat;
350:       (*eps->converged)(eps,re,im,resnorm,&lerrest,eps->convergedctx);
351:       eps->errest[k] = PetscMax(eps->errest[k],lerrest);
352:       if (marker==-1 && lerrest >= eps->tol) marker = k;
353:     }
354:     if (newk==k+1) {
355:       eps->errest[k+1] = eps->errest[k];
356:       k++;
357:     }
358:     if (marker!=-1 && !getall) break;
359:   }
360:   if (marker!=-1) k = marker;
361:   *kout = k;
362:   if (eps->trueres) {
363:     VecDestroy(&x);
364:     VecDestroy(&y);
365:     VecDestroy(&w[0]);
366:     VecDestroy(&w[2]);
367: #if !defined(PETSC_USE_COMPLEX)
368:     VecDestroy(&w[1]);
369: #endif
370:   }
371:   return(0);
372: }

374: /*
375:    EPSFullLanczos - Computes an m-step Lanczos factorization with full
376:    reorthogonalization.  At each Lanczos step, the corresponding Lanczos
377:    vector is orthogonalized with respect to all previous Lanczos vectors.
378:    This is equivalent to computing an m-step Arnoldi factorization and
379:    exploting symmetry of the operator.

381:    The first k columns are assumed to be locked and therefore they are
382:    not modified. On exit, the following relation is satisfied:

384:                     OP * V - V * T = beta_m*v_m * e_m^T

386:    where the columns of V are the Lanczos vectors (which are B-orthonormal),
387:    T is a real symmetric tridiagonal matrix, and e_m is the m-th vector of
388:    the canonical basis. The tridiagonal is stored as two arrays: alpha
389:    contains the diagonal elements, beta the off-diagonal. On exit, the last
390:    element of beta contains the B-norm of V[m] before normalization.
391: */
392: PetscErrorCode EPSFullLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscInt k,PetscInt *M,PetscBool *breakdown)
393: {
395:   PetscScalar    *a;
396:   PetscInt       j,nc,n,m = *M;
397:   Vec            vj,vj1,buf;

400:   BVSetActiveColumns(eps->V,0,m);
401:   for (j=k;j<m;j++) {
402:     BVGetColumn(eps->V,j,&vj);
403:     BVGetColumn(eps->V,j+1,&vj1);
404:     STApply(eps->st,vj,vj1);
405:     BVRestoreColumn(eps->V,j,&vj);
406:     BVRestoreColumn(eps->V,j+1,&vj1);
407:     BVOrthonormalizeColumn(eps->V,j+1,PETSC_FALSE,beta+j,breakdown);
408:     if (*breakdown) {
409:       *M = j+1;
410:       break;
411:     }
412:   }
413:   /* extract tridiagonal matrix from the BV object (only alpha, beta is already in its place) */
414:   BVGetNumConstraints(eps->V,&nc);
415:   BVGetSizes(eps->V,NULL,NULL,&n);
416:   BVGetBufferVec(eps->V,&buf);
417:   VecGetArray(buf,&a);
418:   for (j=k;j<*M;j++) alpha[j] = PetscRealPart(a[nc+j+(j+1)*(nc+n)]);
419:   VecRestoreArray(buf,&a);
420:   return(0);
421: }

423: PetscErrorCode EPSPseudoLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscReal *cos,Vec w)
424: {
426:   PetscInt       j,m = *M,i,ld,l;
427:   Vec            vj,vj1;
428:   PetscScalar    *hwork,lhwork[100];
429:   PetscReal      norm,norm1,norm2,t,*f,sym=0.0,fro=0.0;
430:   PetscBLASInt   j_,one=1;

433:   DSGetLeadingDimension(eps->ds,&ld);
434:   DSGetDimensions(eps->ds,NULL,NULL,&l,NULL,NULL);
435:   if (cos) *cos = 1.0;
436:   if (m > 100) {
437:     PetscMalloc1(m,&hwork);
438:   } else hwork = lhwork;

440:   BVSetActiveColumns(eps->V,0,m);
441:   for (j=k;j<m;j++) {
442:     BVGetColumn(eps->V,j,&vj);
443:     BVGetColumn(eps->V,j+1,&vj1);
444:     STApply(eps->st,vj,vj1);
445:     BVRestoreColumn(eps->V,j,&vj);
446:     BVRestoreColumn(eps->V,j+1,&vj1);
447:     BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);
448:     alpha[j] = PetscRealPart(hwork[j]);
449:     beta[j] = PetscAbsReal(norm);
450:     DSGetArrayReal(eps->ds,DS_MAT_T,&f);
451:     if (j==k) {
452:       for (i=l;i<j-1;i++) hwork[i]-= f[2*ld+i];
453:       for (i=0;i<l;i++) hwork[i] = 0.0;
454:     }
455:     DSRestoreArrayReal(eps->ds,DS_MAT_T,&f);
456:     hwork[j-1] -= beta[j-1];
457:     PetscBLASIntCast(j,&j_);
458:     sym = SlepcAbs(BLASnrm2_(&j_,hwork,&one),sym);
459:     fro = SlepcAbs(fro,SlepcAbs(alpha[j],beta[j]));
460:     if (j>0) fro = SlepcAbs(fro,beta[j-1]);
461:     if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*eps->tol)) { *symmlost = PETSC_TRUE; *M=j+1; break; }
462:     omega[j+1] = (norm<0.0)? -1.0: 1.0;
463:     BVScaleColumn(eps->V,j+1,1.0/norm);
464:     /* */
465:     if (cos) {
466:       BVGetColumn(eps->V,j+1,&vj1);
467:       VecNorm(vj1,NORM_2,&norm1);
468:       BVApplyMatrix(eps->V,vj1,w);
469:       BVRestoreColumn(eps->V,j+1,&vj1);
470:       VecNorm(w,NORM_2,&norm2);
471:       t = 1.0/(norm1*norm2);
472:       if (*cos>t) *cos = t;
473:     }
474:   }
475:   if (m > 100) {
476:     PetscFree(hwork);
477:   }
478:   return(0);
479: }