Actual source code: epskrylov.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Common subroutines for all Krylov-type solvers
12: */
14: #include <slepc/private/epsimpl.h>
15: #include <slepc/private/slepcimpl.h>
16: #include <slepcblaslapack.h>
18: /*
19: EPSBasicArnoldi - Computes an m-step Arnoldi factorization. The first k
20: columns are assumed to be locked and therefore they are not modified. On
21: exit, the following relation is satisfied:
23: OP * V - V * H = beta*v_m * e_m^T
25: where the columns of V are the Arnoldi vectors (which are B-orthonormal),
26: H is an upper Hessenberg matrix, e_m is the m-th vector of the canonical basis.
27: On exit, beta contains the B-norm of V[m] before normalization.
28: */
29: PetscErrorCode EPSBasicArnoldi(EPS eps,PetscBool trans,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
30: {
32: PetscScalar *a;
33: PetscInt j,nc,n,m = *M;
34: Vec vj,vj1,buf;
35: BV U;
38: U = (trans)?eps->W:eps->V;
39: BVSetActiveColumns(U,0,m);
40: for (j=k;j<m;j++) {
41: BVGetColumn(U,j,&vj);
42: BVGetColumn(U,j+1,&vj1);
43: if (trans) {
44: VecConjugate(vj);
45: STApplyTranspose(eps->st,vj,vj1);
46: VecConjugate(vj);
47: VecConjugate(vj1);
48: } else {
49: STApply(eps->st,vj,vj1);
50: }
51: BVRestoreColumn(U,j,&vj);
52: BVRestoreColumn(U,j+1,&vj1);
53: BVOrthonormalizeColumn(U,j+1,PETSC_FALSE,beta,breakdown);
54: if (*breakdown) {
55: *M = j+1;
56: break;
57: }
58: }
59: /* extract Hessenberg matrix from the BV object */
60: BVGetNumConstraints(U,&nc);
61: BVGetSizes(U,NULL,NULL,&n);
62: BVGetBufferVec(U,&buf);
63: VecGetArray(buf,&a);
64: for (j=k;j<*M;j++) {
65: PetscMemcpy(H+j*ldh,a+nc+(j+1)*(nc+n),(j+2)*sizeof(PetscScalar));
66: }
67: VecRestoreArray(buf,&a);
68: return(0);
69: }
71: /*
72: EPSDelayedArnoldi - This function is equivalent to EPSBasicArnoldi but
73: performs the computation in a different way. The main idea is that
74: reorthogonalization is delayed to the next Arnoldi step. This version is
75: more scalable but in some cases convergence may stagnate.
76: */
77: PetscErrorCode EPSDelayedArnoldi(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
78: {
80: PetscInt i,j,m=*M;
81: Vec u,t;
82: PetscScalar shh[100],*lhh,dot,dot2;
83: PetscReal norm1=0.0,norm2=1.0;
84: Vec vj,vj1,vj2;
87: if (m<=100) lhh = shh;
88: else {
89: PetscMalloc1(m,&lhh);
90: }
91: BVCreateVec(eps->V,&u);
92: BVCreateVec(eps->V,&t);
94: BVSetActiveColumns(eps->V,0,m);
95: for (j=k;j<m;j++) {
96: BVGetColumn(eps->V,j,&vj);
97: BVGetColumn(eps->V,j+1,&vj1);
98: STApply(eps->st,vj,vj1);
99: BVRestoreColumn(eps->V,j,&vj);
100: BVRestoreColumn(eps->V,j+1,&vj1);
102: BVDotColumnBegin(eps->V,j+1,H+ldh*j);
103: if (j>k) {
104: BVDotColumnBegin(eps->V,j,lhh);
105: BVGetColumn(eps->V,j,&vj);
106: VecDotBegin(vj,vj,&dot);
107: }
108: if (j>k+1) {
109: BVNormVecBegin(eps->V,u,NORM_2,&norm2);
110: BVGetColumn(eps->V,j-2,&vj2);
111: VecDotBegin(u,vj2,&dot2);
112: }
114: BVDotColumnEnd(eps->V,j+1,H+ldh*j);
115: if (j>k) {
116: BVDotColumnEnd(eps->V,j,lhh);
117: VecDotEnd(vj,vj,&dot);
118: BVRestoreColumn(eps->V,j,&vj);
119: }
120: if (j>k+1) {
121: BVNormVecEnd(eps->V,u,NORM_2,&norm2);
122: VecDotEnd(u,vj2,&dot2);
123: BVRestoreColumn(eps->V,j-2,&vj2);
124: }
126: if (j>k) {
127: norm1 = PetscSqrtReal(PetscRealPart(dot));
128: for (i=0;i<j;i++)
129: H[ldh*j+i] = H[ldh*j+i]/norm1;
130: H[ldh*j+j] = H[ldh*j+j]/dot;
132: BVCopyVec(eps->V,j,t);
133: BVScaleColumn(eps->V,j,1.0/norm1);
134: BVScaleColumn(eps->V,j+1,1.0/norm1);
135: }
137: BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
139: if (j>k) {
140: BVSetActiveColumns(eps->V,0,j);
141: BVMultVec(eps->V,-1.0,1.0,t,lhh);
142: BVSetActiveColumns(eps->V,0,m);
143: for (i=0;i<j;i++)
144: H[ldh*(j-1)+i] += lhh[i];
145: }
147: if (j>k+1) {
148: BVGetColumn(eps->V,j-1,&vj1);
149: VecCopy(u,vj1);
150: BVRestoreColumn(eps->V,j-1,&vj1);
151: BVScaleColumn(eps->V,j-1,1.0/norm2);
152: H[ldh*(j-2)+j-1] = norm2;
153: }
155: if (j<m-1) {
156: VecCopy(t,u);
157: }
158: }
160: BVNormVec(eps->V,t,NORM_2,&norm2);
161: VecScale(t,1.0/norm2);
162: BVGetColumn(eps->V,m-1,&vj1);
163: VecCopy(t,vj1);
164: BVRestoreColumn(eps->V,m-1,&vj1);
165: H[ldh*(m-2)+m-1] = norm2;
167: BVDotColumn(eps->V,m,lhh);
169: BVMultColumn(eps->V,-1.0,1.0,m,lhh);
170: for (i=0;i<m;i++)
171: H[ldh*(m-1)+i] += lhh[i];
173: BVNormColumn(eps->V,m,NORM_2,beta);
174: BVScaleColumn(eps->V,m,1.0 / *beta);
175: *breakdown = PETSC_FALSE;
177: if (m>100) { PetscFree(lhh); }
178: VecDestroy(&u);
179: VecDestroy(&t);
180: return(0);
181: }
183: /*
184: EPSDelayedArnoldi1 - This function is similar to EPSDelayedArnoldi,
185: but without reorthogonalization (only delayed normalization).
186: */
187: PetscErrorCode EPSDelayedArnoldi1(EPS eps,PetscScalar *H,PetscInt ldh,PetscInt k,PetscInt *M,PetscReal *beta,PetscBool *breakdown)
188: {
190: PetscInt i,j,m=*M;
191: PetscScalar dot;
192: PetscReal norm=0.0;
193: Vec vj,vj1;
196: BVSetActiveColumns(eps->V,0,m);
197: for (j=k;j<m;j++) {
198: BVGetColumn(eps->V,j,&vj);
199: BVGetColumn(eps->V,j+1,&vj1);
200: STApply(eps->st,vj,vj1);
201: BVRestoreColumn(eps->V,j+1,&vj1);
202: BVDotColumnBegin(eps->V,j+1,H+ldh*j);
203: if (j>k) {
204: VecDotBegin(vj,vj,&dot);
205: }
206: BVDotColumnEnd(eps->V,j+1,H+ldh*j);
207: if (j>k) {
208: VecDotEnd(vj,vj,&dot);
209: }
210: BVRestoreColumn(eps->V,j,&vj);
212: if (j>k) {
213: norm = PetscSqrtReal(PetscRealPart(dot));
214: BVScaleColumn(eps->V,j,1.0/norm);
215: H[ldh*(j-1)+j] = norm;
217: for (i=0;i<j;i++)
218: H[ldh*j+i] = H[ldh*j+i]/norm;
219: H[ldh*j+j] = H[ldh*j+j]/dot;
220: BVScaleColumn(eps->V,j+1,1.0/norm);
221: *beta = norm;
222: }
223: BVMultColumn(eps->V,-1.0,1.0,j+1,H+ldh*j);
224: }
226: *breakdown = PETSC_FALSE;
227: return(0);
228: }
230: /*
231: EPSKrylovConvergence_Filter - Specialized version for STFILTER.
232: */
233: PetscErrorCode EPSKrylovConvergence_Filter(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal gamma,PetscInt *kout)
234: {
236: PetscInt k,ninside,nconv;
237: PetscScalar re,im;
238: PetscReal resnorm;
241: ninside = 0; /* count how many eigenvalues are located in the interval */
242: for (k=kini;k<kini+nits;k++) {
243: if (PetscRealPart(eps->eigr[k]) < gamma) break;
244: ninside++;
245: }
246: eps->nev = ninside+kini; /* adjust eigenvalue count */
247: nconv = 0; /* count how many eigenvalues satisfy the convergence criterion */
248: for (k=kini;k<kini+ninside;k++) {
249: /* eigenvalue */
250: re = eps->eigr[k];
251: im = eps->eigi[k];
252: DSVectors(eps->ds,DS_MAT_X,&k,&resnorm);
253: resnorm *= beta;
254: /* error estimate */
255: (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
256: if (eps->errest[k] < eps->tol) nconv++;
257: else break;
258: }
259: *kout = kini+nconv;
260: PetscInfo4(eps,"Found %D eigenvalue approximations inside the inverval (gamma=%g), k=%D nconv=%D\n",ninside,(double)gamma,k,nconv);
261: return(0);
262: }
264: /*
265: EPSKrylovConvergence - Implements the loop that checks for convergence
266: in Krylov methods.
268: Input Parameters:
269: eps - the eigensolver; some error estimates are updated in eps->errest
270: getall - whether all residuals must be computed
271: kini - initial value of k (the loop variable)
272: nits - number of iterations of the loop
273: V - set of basis vectors (used only if trueresidual is activated)
274: nv - number of vectors to process (dimension of Q, columns of V)
275: beta - norm of f (the residual vector of the Arnoldi/Lanczos factorization)
276: corrf - correction factor for residual estimates (only in harmonic KS)
278: Output Parameters:
279: kout - the first index where the convergence test failed
280: */
281: PetscErrorCode EPSKrylovConvergence(EPS eps,PetscBool getall,PetscInt kini,PetscInt nits,PetscReal beta,PetscReal betat,PetscReal corrf,PetscInt *kout)
282: {
284: PetscInt k,newk,marker,ld,inside;
285: PetscScalar re,im,*Zr,*Zi,*X;
286: PetscReal resnorm,gamma,lerrest;
287: PetscBool isshift,isfilter,refined,istrivial;
288: Vec x=NULL,y=NULL,w[3];
291: if (eps->which == EPS_ALL) {
292: PetscObjectTypeCompare((PetscObject)eps->st,STFILTER,&isfilter);
293: if (isfilter) {
294: STFilterGetThreshold(eps->st,&gamma);
295: EPSKrylovConvergence_Filter(eps,getall,kini,nits,beta,gamma,kout);
296: return(0);
297: }
298: }
299: RGIsTrivial(eps->rg,&istrivial);
300: if (eps->trueres) {
301: BVCreateVec(eps->V,&x);
302: BVCreateVec(eps->V,&y);
303: BVCreateVec(eps->V,&w[0]);
304: BVCreateVec(eps->V,&w[2]);
305: #if !defined(PETSC_USE_COMPLEX)
306: BVCreateVec(eps->V,&w[1]);
307: #else
308: w[1] = NULL;
309: #endif
310: }
311: DSGetLeadingDimension(eps->ds,&ld);
312: DSGetRefined(eps->ds,&refined);
313: PetscObjectTypeCompare((PetscObject)eps->st,STSHIFT,&isshift);
314: marker = -1;
315: if (eps->trackall) getall = PETSC_TRUE;
316: for (k=kini;k<kini+nits;k++) {
317: /* eigenvalue */
318: re = eps->eigr[k];
319: im = eps->eigi[k];
320: if (!istrivial || eps->trueres || isshift || eps->conv==EPS_CONV_NORM) {
321: STBackTransform(eps->st,1,&re,&im);
322: }
323: if (!istrivial) {
324: RGCheckInside(eps->rg,1,&re,&im,&inside);
325: if (marker==-1 && inside<0) marker = k;
326: if (!(eps->trueres || isshift || eps->conv==EPS_CONV_NORM)) { /* make sure eps->converged below uses the right value */
327: re = eps->eigr[k];
328: im = eps->eigi[k];
329: }
330: }
331: newk = k;
332: DSVectors(eps->ds,DS_MAT_X,&newk,&resnorm);
333: if (eps->trueres) {
334: DSGetArray(eps->ds,DS_MAT_X,&X);
335: Zr = X+k*ld;
336: if (newk==k+1) Zi = X+newk*ld;
337: else Zi = NULL;
338: EPSComputeRitzVector(eps,Zr,Zi,eps->V,x,y);
339: DSRestoreArray(eps->ds,DS_MAT_X,&X);
340: EPSComputeResidualNorm_Private(eps,PETSC_FALSE,re,im,x,y,w,&resnorm);
341: }
342: else if (!refined) resnorm *= beta*corrf;
343: /* error estimate */
344: (*eps->converged)(eps,re,im,resnorm,&eps->errest[k],eps->convergedctx);
345: if (marker==-1 && eps->errest[k] >= eps->tol) marker = k;
346: if (eps->twosided) {
347: newk = k;
348: DSVectors(eps->dsts,DS_MAT_X,&newk,&resnorm);
349: resnorm *= betat;
350: (*eps->converged)(eps,re,im,resnorm,&lerrest,eps->convergedctx);
351: eps->errest[k] = PetscMax(eps->errest[k],lerrest);
352: if (marker==-1 && lerrest >= eps->tol) marker = k;
353: }
354: if (newk==k+1) {
355: eps->errest[k+1] = eps->errest[k];
356: k++;
357: }
358: if (marker!=-1 && !getall) break;
359: }
360: if (marker!=-1) k = marker;
361: *kout = k;
362: if (eps->trueres) {
363: VecDestroy(&x);
364: VecDestroy(&y);
365: VecDestroy(&w[0]);
366: VecDestroy(&w[2]);
367: #if !defined(PETSC_USE_COMPLEX)
368: VecDestroy(&w[1]);
369: #endif
370: }
371: return(0);
372: }
374: /*
375: EPSFullLanczos - Computes an m-step Lanczos factorization with full
376: reorthogonalization. At each Lanczos step, the corresponding Lanczos
377: vector is orthogonalized with respect to all previous Lanczos vectors.
378: This is equivalent to computing an m-step Arnoldi factorization and
379: exploting symmetry of the operator.
381: The first k columns are assumed to be locked and therefore they are
382: not modified. On exit, the following relation is satisfied:
384: OP * V - V * T = beta_m*v_m * e_m^T
386: where the columns of V are the Lanczos vectors (which are B-orthonormal),
387: T is a real symmetric tridiagonal matrix, and e_m is the m-th vector of
388: the canonical basis. The tridiagonal is stored as two arrays: alpha
389: contains the diagonal elements, beta the off-diagonal. On exit, the last
390: element of beta contains the B-norm of V[m] before normalization.
391: */
392: PetscErrorCode EPSFullLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscInt k,PetscInt *M,PetscBool *breakdown)
393: {
395: PetscScalar *a;
396: PetscInt j,nc,n,m = *M;
397: Vec vj,vj1,buf;
400: BVSetActiveColumns(eps->V,0,m);
401: for (j=k;j<m;j++) {
402: BVGetColumn(eps->V,j,&vj);
403: BVGetColumn(eps->V,j+1,&vj1);
404: STApply(eps->st,vj,vj1);
405: BVRestoreColumn(eps->V,j,&vj);
406: BVRestoreColumn(eps->V,j+1,&vj1);
407: BVOrthonormalizeColumn(eps->V,j+1,PETSC_FALSE,beta+j,breakdown);
408: if (*breakdown) {
409: *M = j+1;
410: break;
411: }
412: }
413: /* extract tridiagonal matrix from the BV object (only alpha, beta is already in its place) */
414: BVGetNumConstraints(eps->V,&nc);
415: BVGetSizes(eps->V,NULL,NULL,&n);
416: BVGetBufferVec(eps->V,&buf);
417: VecGetArray(buf,&a);
418: for (j=k;j<*M;j++) alpha[j] = PetscRealPart(a[nc+j+(j+1)*(nc+n)]);
419: VecRestoreArray(buf,&a);
420: return(0);
421: }
423: PetscErrorCode EPSPseudoLanczos(EPS eps,PetscReal *alpha,PetscReal *beta,PetscReal *omega,PetscInt k,PetscInt *M,PetscBool *breakdown,PetscBool *symmlost,PetscReal *cos,Vec w)
424: {
426: PetscInt j,m = *M,i,ld,l;
427: Vec vj,vj1;
428: PetscScalar *hwork,lhwork[100];
429: PetscReal norm,norm1,norm2,t,*f,sym=0.0,fro=0.0;
430: PetscBLASInt j_,one=1;
433: DSGetLeadingDimension(eps->ds,&ld);
434: DSGetDimensions(eps->ds,NULL,NULL,&l,NULL,NULL);
435: if (cos) *cos = 1.0;
436: if (m > 100) {
437: PetscMalloc1(m,&hwork);
438: } else hwork = lhwork;
440: BVSetActiveColumns(eps->V,0,m);
441: for (j=k;j<m;j++) {
442: BVGetColumn(eps->V,j,&vj);
443: BVGetColumn(eps->V,j+1,&vj1);
444: STApply(eps->st,vj,vj1);
445: BVRestoreColumn(eps->V,j,&vj);
446: BVRestoreColumn(eps->V,j+1,&vj1);
447: BVOrthogonalizeColumn(eps->V,j+1,hwork,&norm,breakdown);
448: alpha[j] = PetscRealPart(hwork[j]);
449: beta[j] = PetscAbsReal(norm);
450: DSGetArrayReal(eps->ds,DS_MAT_T,&f);
451: if (j==k) {
452: for (i=l;i<j-1;i++) hwork[i]-= f[2*ld+i];
453: for (i=0;i<l;i++) hwork[i] = 0.0;
454: }
455: DSRestoreArrayReal(eps->ds,DS_MAT_T,&f);
456: hwork[j-1] -= beta[j-1];
457: PetscBLASIntCast(j,&j_);
458: sym = SlepcAbs(BLASnrm2_(&j_,hwork,&one),sym);
459: fro = SlepcAbs(fro,SlepcAbs(alpha[j],beta[j]));
460: if (j>0) fro = SlepcAbs(fro,beta[j-1]);
461: if (sym/fro>PetscMax(PETSC_SQRT_MACHINE_EPSILON,10*eps->tol)) { *symmlost = PETSC_TRUE; *M=j+1; break; }
462: omega[j+1] = (norm<0.0)? -1.0: 1.0;
463: BVScaleColumn(eps->V,j+1,1.0/norm);
464: /* */
465: if (cos) {
466: BVGetColumn(eps->V,j+1,&vj1);
467: VecNorm(vj1,NORM_2,&norm1);
468: BVApplyMatrix(eps->V,vj1,w);
469: BVRestoreColumn(eps->V,j+1,&vj1);
470: VecNorm(w,NORM_2,&norm2);
471: t = 1.0/(norm1*norm2);
472: if (*cos>t) *cos = t;
473: }
474: }
475: if (m > 100) {
476: PetscFree(hwork);
477: }
478: return(0);
479: }