Actual source code: ks-twosided.c

slepc-3.13.2 2020-05-12
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2020, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc eigensolver: "krylovschur"

 13:    Method: Two-sided Arnoldi with Krylov-Schur restart (for left eigenvectors)

 15:    References:

 17:        [1] I.N. Zwaan and M.E. Hochstenbach, "Krylov-Schur-type restarts
 18:            for the two-sided Arnoldi method", SIAM J. Matrix Anal. Appl.
 19:            38(2):297-321, 2017.

 21: */

 23:  #include <slepc/private/epsimpl.h>
 24: #include "krylovschur.h"
 25:  #include <slepcblaslapack.h>

 27: static PetscErrorCode EPSTwoSidedRQUpdate1(EPS eps,Mat M,PetscInt nv)
 28: {
 30:   PetscScalar    *T,*S,*A,*w,*pM,beta;
 31:   Vec            u;
 32:   PetscInt       ld,ncv=eps->ncv,i,l,nnv;
 33:   PetscBLASInt   info,n_,ncv_,*p,one=1;

 36:   DSGetLeadingDimension(eps->ds,&ld);
 37:   PetscMalloc3(nv,&p,ncv*ncv,&A,ncv,&w);
 38:   BVGetActiveColumns(eps->V,&l,&nnv);
 39:   BVSetActiveColumns(eps->V,0,nv);
 40:   BVSetActiveColumns(eps->W,0,nv);
 41:   BVGetColumn(eps->V,nv,&u);
 42:   BVDotVec(eps->W,u,w);
 43:   BVRestoreColumn(eps->V,nv,&u);
 44:   MatDenseGetArray(M,&pM);
 45:   PetscArraycpy(A,pM,ncv*ncv);
 46:   PetscBLASIntCast(nv,&n_);
 47:   PetscBLASIntCast(ncv,&ncv_);
 48:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n_,&n_,A,&ncv_,p,&info));
 49:   SlepcCheckLapackInfo("getrf",info);
 50:   PetscLogFlops(2.0*n_*n_*n_/3.0);
 51:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("N",&n_,&one,A,&ncv_,p,w,&ncv_,&info));
 52:   SlepcCheckLapackInfo("getrs",info);
 53:   PetscLogFlops(2.0*n_*n_-n_);
 54:   BVMultColumn(eps->V,-1.0,1.0,nv,w);
 55:   DSGetArray(eps->ds,DS_MAT_A,&S);
 56:   beta = S[(nv-1)*ld+nv];
 57:   for (i=0;i<nv;i++) S[(nv-1)*ld+i] += beta*w[i];
 58:   DSRestoreArray(eps->ds,DS_MAT_A,&S);
 59:   BVGetColumn(eps->W,nv,&u);
 60:   BVDotVec(eps->V,u,w);
 61:   BVRestoreColumn(eps->W,nv,&u);
 62:   PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n_,&one,A,&ncv_,p,w,&ncv_,&info));
 63:   BVMultColumn(eps->W,-1.0,1.0,nv,w);
 64:   DSGetArray(eps->dsts,DS_MAT_A,&T);
 65:   beta = T[(nv-1)*ld+nv];
 66:   for (i=0;i<nv;i++) T[(nv-1)*ld+i] += beta*w[i];
 67:   DSRestoreArray(eps->dsts,DS_MAT_A,&T);
 68:   PetscFree3(p,A,w);
 69:   BVSetActiveColumns(eps->V,l,nnv);
 70:   BVSetActiveColumns(eps->W,l,nnv);
 71:   return(0);
 72: }

 74: static PetscErrorCode EPSTwoSidedRQUpdate2(EPS eps,Mat M,PetscInt k)
 75: {
 77:   PetscScalar    *Q,*pM,*w,zero=0.0,sone=1.0,*c,*A;
 78:   PetscBLASInt   n_,ncv_,ld_;
 79:   PetscReal      norm;
 80:   PetscInt       l,nv,ncv=eps->ncv,ld,i,j;

 83:   DSGetLeadingDimension(eps->ds,&ld);
 84:   BVGetActiveColumns(eps->V,&l,&nv);
 85:   BVSetActiveColumns(eps->V,0,nv);
 86:   BVSetActiveColumns(eps->W,0,nv);
 87:   PetscMalloc2(ncv*ncv,&w,ncv,&c);
 88:   /* u = u - V*V'*u */
 89:   BVOrthogonalizeColumn(eps->V,k,c,&norm,NULL);
 90:   BVScaleColumn(eps->V,k,1.0/norm);
 91:   DSGetArray(eps->ds,DS_MAT_A,&A);
 92:   /* H = H + V'*u*b' */
 93:   for (j=l;j<k;j++) {
 94:     for (i=0;i<k;i++) A[i+j*ld] += c[i]*A[k+j*ld];
 95:     A[k+j*ld] *= norm;
 96:   }
 97:   DSRestoreArray(eps->ds,DS_MAT_A,&A);
 98:   BVOrthogonalizeColumn(eps->W,k,c,&norm,NULL);
 99:   BVScaleColumn(eps->W,k,1.0/norm);
100:   DSGetArray(eps->dsts,DS_MAT_A,&A);
101:   /* H = H + V'*u*b' */
102:   for (j=l;j<k;j++) {
103:     for (i=0;i<k;i++) A[i+j*ld] += c[i]*A[k+j*ld];
104:     A[k+j*ld] *= norm;
105:   }
106:   DSRestoreArray(eps->dsts,DS_MAT_A,&A);

108:   /* M = Q'*M*Q */
109:   MatDenseGetArray(M,&pM);
110:   PetscBLASIntCast(ncv,&ncv_);
111:   PetscBLASIntCast(nv,&n_);
112:   PetscBLASIntCast(ld,&ld_);
113:   DSGetArray(eps->ds,DS_MAT_Q,&Q);
114:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&n_,&n_,&sone,pM,&ncv_,Q,&ld_,&zero,w,&ncv_));
115:   DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
116:   DSGetArray(eps->dsts,DS_MAT_Q,&Q);
117:   PetscStackCallBLAS("BLASgemm",BLASgemm_("C","N",&n_,&n_,&n_,&sone,Q,&ld_,w,&ncv_,&zero,pM,&ncv_));
118:   DSRestoreArray(eps->dsts,DS_MAT_Q,&Q);
119:   PetscFree2(w,c);
120:   BVSetActiveColumns(eps->V,l,nv);
121:   BVSetActiveColumns(eps->W,l,nv);
122:   return(0);
123: }

125: PetscErrorCode EPSSolve_KrylovSchur_TwoSided(EPS eps)
126: {
127:   PetscErrorCode  ierr;
128:   EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data;
129:   Mat             M,U,Op,OpHT;
130:   PetscReal       norm,norm2,beta,betat,s,t;
131:   PetscScalar     *pM,*S,*T,*eigr,*eigi,*Q;
132:   PetscInt        ld,l,nv,nvt,ncv=eps->ncv,i,j,k,nconv,*p,cont,*idx,*idx2,id=0;
133:   PetscBool       breakdownt,breakdown,breakdownl;
134: #if defined(PETSC_USE_COMPLEX)
135:   Mat             A;
136: #endif

139:   DSGetLeadingDimension(eps->ds,&ld);
140:   EPSGetStartVector(eps,0,NULL);
141:   EPSGetLeftStartVector(eps,0,NULL);
142:   l = 0;
143:   PetscMalloc6(ncv*ncv,&pM,ncv,&eigr,ncv,&eigi,ncv,&idx,ncv,&idx2,ncv,&p);
144:   MatCreateSeqDense(PETSC_COMM_SELF,eps->ncv,eps->ncv,pM,&M);

146:   STGetOperator(eps->st,&Op);
147:   MatCreateHermitianTranspose(Op,&OpHT);

149:   /* Restart loop */
150:   while (eps->reason == EPS_CONVERGED_ITERATING) {
151:     eps->its++;

153:     /* Compute an nv-step Arnoldi factorization for Op */
154:     nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
155:     DSGetArray(eps->ds,DS_MAT_A,&S);
156:     BVMatArnoldi(eps->V,Op,S,ld,eps->nconv+l,&nv,&beta,&breakdown);
157:     DSRestoreArray(eps->ds,DS_MAT_A,&S);

159:     /* Compute an nv-step Arnoldi factorization for Op' */
160:     nvt = nv;
161:     DSGetArray(eps->dsts,DS_MAT_A,&T);
162:     BVMatArnoldi(eps->W,OpHT,T,ld,eps->nconv+l,&nvt,&betat,&breakdownt);
163:     DSRestoreArray(eps->dsts,DS_MAT_A,&T);

165:     /* Make sure both factorizations have the same length */
166:     nv = PetscMin(nv,nvt);
167:     DSSetDimensions(eps->ds,nv,0,eps->nconv,eps->nconv+l);
168:     DSSetDimensions(eps->dsts,nv,0,eps->nconv,eps->nconv+l);
169:     if (l==0) {
170:       DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
171:       DSSetState(eps->dsts,DS_STATE_INTERMEDIATE);
172:     } else {
173:       DSSetState(eps->ds,DS_STATE_RAW);
174:       DSSetState(eps->dsts,DS_STATE_RAW);
175:     }
176:     breakdown = (breakdown || breakdownt)? PETSC_TRUE: PETSC_FALSE;

178:     /* Update M, modify Rayleigh quotients S and T */
179:     BVSetActiveColumns(eps->V,eps->nconv+l,nv);
180:     BVSetActiveColumns(eps->W,eps->nconv+l,nv);
181:     BVMatProject(eps->V,NULL,eps->W,M);

183:     EPSTwoSidedRQUpdate1(eps,M,nv);

185:     /* Solve projected problem */
186:     DSSolve(eps->ds,eps->eigr,eps->eigi);
187:     DSSort(eps->ds,eps->eigr,eps->eigi,NULL,NULL,NULL);
188:     DSSynchronize(eps->ds,eps->eigr,eps->eigi);
189:     DSSolve(eps->dsts,eigr,eigi);
190: #if defined(PETSC_USE_COMPLEX)
191:     DSGetMat(eps->dsts,DS_MAT_A,&A);
192:     MatConjugate(A);
193:     DSRestoreMat(eps->dsts,DS_MAT_A,&A);
194:     DSGetMat(eps->dsts,DS_MAT_Q,&U);
195:     MatConjugate(U);
196:     DSRestoreMat(eps->dsts,DS_MAT_Q,&U);
197:     for (i=0;i<nv;i++) eigr[i] = PetscConj(eigr[i]);
198: #endif
199:     DSSort(eps->dsts,eigr,eigi,NULL,NULL,NULL);
200:     /* check correct eigenvalue correspondence */
201:     cont = 0;
202:     for (i=0;i<nv;i++) {
203:       if (SlepcAbsEigenvalue(eigr[i]-eps->eigr[i],eigi[i]-eps->eigi[i])>PETSC_SQRT_MACHINE_EPSILON) {idx2[cont] = i; idx[cont++] = i;}
204:       p[i] = -1;
205:     }
206:     if (cont) {
207:       for (i=0;i<cont;i++) {
208:         t = PETSC_MAX_REAL;
209:         for (j=0;j<cont;j++) if (idx2[j]!=-1 && (s=SlepcAbsEigenvalue(eigr[idx[j]]-eps->eigr[idx[i]],eigi[idx[j]]-eps->eigi[idx[i]]))<t) { id = j; t = s; }
210:         p[idx[i]] = idx[id];
211:         idx2[id] = -1;
212:       }
213:       for (i=0;i<nv;i++) if (p[i]==-1) p[i] = i;
214:       DSSortWithPermutation(eps->dsts,p,eigr,eigi);
215:     }
216: #if defined(PETSC_USE_COMPLEX)
217:     DSGetMat(eps->dsts,DS_MAT_A,&A);
218:     MatConjugate(A);
219:     DSRestoreMat(eps->dsts,DS_MAT_A,&A);
220:     DSGetMat(eps->dsts,DS_MAT_Q,&U);
221:     MatConjugate(U);
222:     DSRestoreMat(eps->dsts,DS_MAT_Q,&U);
223: #endif
224:     DSSynchronize(eps->dsts,eigr,eigi);

226:     /* Check convergence */
227:     BVNormColumn(eps->V,nv,NORM_2,&norm);
228:     BVNormColumn(eps->W,nv,NORM_2,&norm2);
229:     EPSKrylovConvergence(eps,PETSC_FALSE,eps->nconv,nv-eps->nconv,beta*norm,betat*norm2,1.0,&k);
230:     (*eps->stopping)(eps,eps->its,eps->max_it,k,eps->nev,&eps->reason,eps->stoppingctx);
231:     nconv = k;

233:     /* Update l */
234:     if (eps->reason != EPS_CONVERGED_ITERATING || breakdown || k==nv) l = 0;
235:     else {
236:       l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
237: #if !defined(PETSC_USE_COMPLEX)
238:       DSGetArray(eps->ds,DS_MAT_A,&S);
239:       if (S[k+l+(k+l-1)*ld] != 0.0) {
240:         if (k+l<nv-1) l = l+1;
241:         else l = l-1;
242:       }
243:       DSRestoreArray(eps->ds,DS_MAT_A,&S);
244: #endif
245:     }
246:     if (!ctx->lock && l>0) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

248:     /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
249:     BVSetActiveColumns(eps->V,eps->nconv,nv);
250:     BVSetActiveColumns(eps->W,eps->nconv,nv);
251:     DSGetMat(eps->ds,DS_MAT_Q,&U);
252:     BVMultInPlace(eps->V,U,eps->nconv,k+l);
253:     MatDestroy(&U);
254:     DSGetMat(eps->dsts,DS_MAT_Q,&U);
255:     BVMultInPlace(eps->W,U,eps->nconv,k+l);
256:     MatDestroy(&U);
257:     if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) {
258:       BVCopyColumn(eps->V,nv,k+l);
259:       BVCopyColumn(eps->W,nv,k+l);
260:     }

262:     if (eps->reason == EPS_CONVERGED_ITERATING) {
263:       if (breakdown || k==nv) {
264:         /* Start a new Arnoldi factorization */
265:         PetscInfo2(eps,"Breakdown in Krylov-Schur method (it=%D norm=%g)\n",eps->its,(double)beta);
266:         if (k<eps->nev) {
267:           EPSGetStartVector(eps,k,&breakdown);
268:           EPSGetLeftStartVector(eps,k,&breakdownl);
269:           if (breakdown || breakdownl) {
270:             eps->reason = EPS_DIVERGED_BREAKDOWN;
271:             PetscInfo(eps,"Unable to generate more start vectors\n");
272:           }
273:         }
274:       } else {
275:         /* Prepare the Rayleigh quotient for restart */
276:         DSGetArray(eps->ds,DS_MAT_A,&S);
277:         DSGetArray(eps->ds,DS_MAT_Q,&Q);
278:         for (i=k;i<k+l;i++) S[k+l+i*ld] = Q[nv-1+i*ld]*beta;
279:         DSRestoreArray(eps->ds,DS_MAT_A,&S);
280:         DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
281:         DSGetArray(eps->dsts,DS_MAT_A,&S);
282:         DSGetArray(eps->dsts,DS_MAT_Q,&Q);
283:         for (i=k;i<k+l;i++) S[k+l+i*ld] = Q[nv-1+i*ld]*betat;
284:         DSRestoreArray(eps->dsts,DS_MAT_A,&S);
285:         DSRestoreArray(eps->dsts,DS_MAT_Q,&Q);
286:       }
287:       EPSTwoSidedRQUpdate2(eps,M,k+l);
288:     }
289:     eps->nconv = k;
290:     EPSMonitor(eps,eps->its,nconv,eps->eigr,eps->eigi,eps->errest,nv);
291:   }

293:   STRestoreOperator(eps->st,&Op);
294:   MatDestroy(&OpHT);

296:   /* truncate Schur decomposition and change the state to raw so that
297:      DSVectors() computes eigenvectors from scratch */
298:   DSSetDimensions(eps->ds,eps->nconv,0,0,0);
299:   DSSetState(eps->ds,DS_STATE_RAW);
300:   DSSetDimensions(eps->dsts,eps->nconv,0,0,0);
301:   DSSetState(eps->dsts,DS_STATE_RAW);
302:   PetscFree6(pM,eigr,eigi,idx,idx2,p);
303:   MatDestroy(&M);
304:   return(0);
305: }