Actual source code: ks-slice.c

slepc-3.18.2 2023-01-26
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc eigensolver: "krylovschur"

 13:    Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems

 15:    References:

 17:        [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
 18:            solving sparse symmetric generalized eigenproblems", SIAM J.
 19:            Matrix Anal. Appl. 15(1):228-272, 1994.

 21:        [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
 22:            on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
 23:            2012.
 24: */

 26: #include <slepc/private/epsimpl.h>
 27: #include "krylovschur.h"

 29: static PetscBool  cited = PETSC_FALSE;
 30: static const char citation[] =
 31:   "@Article{slepc-slice,\n"
 32:   "   author = \"C. Campos and J. E. Roman\",\n"
 33:   "   title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
 34:   "   journal = \"Numer. Algorithms\",\n"
 35:   "   volume = \"60\",\n"
 36:   "   number = \"2\",\n"
 37:   "   pages = \"279--295\",\n"
 38:   "   year = \"2012,\"\n"
 39:   "   doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
 40:   "}\n";

 42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON

 44: static PetscErrorCode EPSSliceResetSR(EPS eps)
 45: {
 46:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
 47:   EPS_SR          sr=ctx->sr;
 48:   EPS_shift       s;

 50:   if (sr) {
 51:     if (ctx->npart>1) {
 52:       BVDestroy(&sr->V);
 53:       PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
 54:     }
 55:     /* Reviewing list of shifts to free memory */
 56:     s = sr->s0;
 57:     if (s) {
 58:       while (s->neighb[1]) {
 59:         s = s->neighb[1];
 60:         PetscFree(s->neighb[0]);
 61:       }
 62:       PetscFree(s);
 63:     }
 64:     PetscFree(sr);
 65:   }
 66:   ctx->sr = NULL;
 67:   return 0;
 68: }

 70: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
 71: {
 72:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

 74:   if (!ctx->global) return 0;
 75:   /* Reset auxiliary EPS */
 76:   EPSSliceResetSR(ctx->eps);
 77:   EPSReset(ctx->eps);
 78:   EPSSliceResetSR(eps);
 79:   PetscFree(ctx->inertias);
 80:   PetscFree(ctx->shifts);
 81:   return 0;
 82: }

 84: PetscErrorCode EPSDestroy_KrylovSchur_Slice(EPS eps)
 85: {
 86:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

 88:   if (!ctx->global) return 0;
 89:   /* Destroy auxiliary EPS */
 90:   EPSReset_KrylovSchur_Slice(eps);
 91:   EPSDestroy(&ctx->eps);
 92:   if (ctx->npart>1) {
 93:     PetscSubcommDestroy(&ctx->subc);
 94:     if (ctx->commset) {
 95:       MPI_Comm_free(&ctx->commrank);
 96:       ctx->commset = PETSC_FALSE;
 97:     }
 98:     ISDestroy(&ctx->isrow);
 99:     ISDestroy(&ctx->iscol);
100:     MatDestroyMatrices(1,&ctx->submata);
101:     MatDestroyMatrices(1,&ctx->submatb);
102:   }
103:   PetscFree(ctx->subintervals);
104:   PetscFree(ctx->nconv_loc);
105:   return 0;
106: }

108: /*
109:   EPSSliceAllocateSolution - Allocate memory storage for common variables such
110:   as eigenvalues and eigenvectors. The argument extra is used for methods
111:   that require a working basis slightly larger than ncv.
112: */
113: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
114: {
115:   EPS_KRYLOVSCHUR    *ctx=(EPS_KRYLOVSCHUR*)eps->data;
116:   PetscReal          eta;
117:   PetscInt           k;
118:   BVType             type;
119:   BVOrthogType       orthog_type;
120:   BVOrthogRefineType orthog_ref;
121:   BVOrthogBlockType  ob_type;
122:   Mat                matrix;
123:   Vec                t;
124:   EPS_SR             sr = ctx->sr;

126:   /* allocate space for eigenvalues and friends */
127:   k = PetscMax(1,sr->numEigs);
128:   PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
129:   PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);

131:   /* allocate sr->V and transfer options from eps->V */
132:   BVDestroy(&sr->V);
133:   BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
134:   if (!eps->V) EPSGetBV(eps,&eps->V);
135:   if (!((PetscObject)(eps->V))->type_name) BVSetType(sr->V,BVSVEC);
136:   else {
137:     BVGetType(eps->V,&type);
138:     BVSetType(sr->V,type);
139:   }
140:   STMatCreateVecsEmpty(eps->st,&t,NULL);
141:   BVSetSizesFromVec(sr->V,t,k);
142:   VecDestroy(&t);
143:   EPS_SetInnerProduct(eps);
144:   BVGetMatrix(eps->V,&matrix,NULL);
145:   BVSetMatrix(sr->V,matrix,PETSC_FALSE);
146:   BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
147:   BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
148:   return 0;
149: }

151: static PetscErrorCode EPSSliceGetEPS(EPS eps)
152: {
153:   EPS_KRYLOVSCHUR    *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
154:   BV                 V;
155:   BVType             type;
156:   PetscReal          eta;
157:   BVOrthogType       orthog_type;
158:   BVOrthogRefineType orthog_ref;
159:   BVOrthogBlockType  ob_type;
160:   PetscInt           i;
161:   PetscReal          h,a,b;
162:   PetscRandom        rand;
163:   EPS_SR             sr=ctx->sr;

165:   if (!ctx->eps) EPSKrylovSchurGetChildEPS(eps,&ctx->eps);

167:   /* Determine subintervals */
168:   if (ctx->npart==1) {
169:     a = eps->inta; b = eps->intb;
170:   } else {
171:     if (!ctx->subintset) { /* uniform distribution if no set by user */
173:       h = (eps->intb-eps->inta)/ctx->npart;
174:       a = eps->inta+ctx->subc->color*h;
175:       b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
176:       PetscFree(ctx->subintervals);
177:       PetscMalloc1(ctx->npart+1,&ctx->subintervals);
178:       for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
179:       ctx->subintervals[ctx->npart] = eps->intb;
180:     } else {
181:       a = ctx->subintervals[ctx->subc->color];
182:       b = ctx->subintervals[ctx->subc->color+1];
183:     }
184:   }
185:   EPSSetInterval(ctx->eps,a,b);
186:   EPSSetConvergenceTest(ctx->eps,eps->conv);
187:   EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
188:   EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);

190:   ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
191:   ctx_local->detect = ctx->detect;

193:   /* transfer options from eps->V */
194:   EPSGetBV(ctx->eps,&V);
195:   BVGetRandomContext(V,&rand);  /* make sure the random context is available when duplicating */
196:   if (!eps->V) EPSGetBV(eps,&eps->V);
197:   if (!((PetscObject)(eps->V))->type_name) BVSetType(V,BVSVEC);
198:   else {
199:     BVGetType(eps->V,&type);
200:     BVSetType(V,type);
201:   }
202:   BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
203:   BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);

205:   ctx->eps->which = eps->which;
206:   ctx->eps->max_it = eps->max_it;
207:   ctx->eps->tol = eps->tol;
208:   ctx->eps->purify = eps->purify;
209:   if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
210:   EPSSetProblemType(ctx->eps,eps->problem_type);
211:   EPSSetUp(ctx->eps);
212:   ctx->eps->nconv = 0;
213:   ctx->eps->its   = 0;
214:   for (i=0;i<ctx->eps->ncv;i++) {
215:     ctx->eps->eigr[i]   = 0.0;
216:     ctx->eps->eigi[i]   = 0.0;
217:     ctx->eps->errest[i] = 0.0;
218:   }
219:   return 0;
220: }

222: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
223: {
224:   KSP            ksp,kspr;
225:   PC             pc;
226:   Mat            F;
227:   PetscReal      nzshift=shift;
228:   PetscBool      flg;

230:   if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
231:     if (inertia) *inertia = eps->n;
232:   } else if (shift <= PETSC_MIN_REAL) {
233:     if (inertia) *inertia = 0;
234:     if (zeros) *zeros = 0;
235:   } else {
236:     /* If the shift is zero, perturb it to a very small positive value.
237:        The goal is that the nonzero pattern is the same in all cases and reuse
238:        the symbolic factorizations */
239:     nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
240:     STSetShift(eps->st,nzshift);
241:     STGetKSP(eps->st,&ksp);
242:     KSPGetPC(ksp,&pc);
243:     PetscObjectTypeCompare((PetscObject)pc,PCREDUNDANT,&flg);
244:     if (flg) {
245:       PCRedundantGetKSP(pc,&kspr);
246:       KSPGetPC(kspr,&pc);
247:     }
248:     PCFactorGetMatrix(pc,&F);
249:     MatGetInertia(F,inertia,zeros,NULL);
250:   }
251:   if (inertia) PetscInfo(eps,"Computed inertia at shift %g: %" PetscInt_FMT "\n",(double)nzshift,*inertia);
252:   return 0;
253: }

255: /*
256:    Dummy backtransform operation
257:  */
258: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
259: {
260:   return 0;
261: }

263: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
264: {
265:   EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
266:   EPS_SR          sr,sr_loc,sr_glob;
267:   PetscInt        nEigs,dssz=1,i,zeros=0,off=0,method,hiteig=0;
268:   PetscMPIInt     nproc,rank=0,aux;
269:   PetscReal       r;
270:   MPI_Request     req;
271:   Mat             A,B=NULL;
272:   DSParallelType  ptype;
273:   MPI_Comm        child;

275:   if (ctx->global) {
276:     EPSCheckHermitianDefiniteCondition(eps,PETSC_TRUE," with spectrum slicing");
277:     EPSCheckSinvertCayleyCondition(eps,PETSC_TRUE," with spectrum slicing");
281:     EPSCheckUnsupportedCondition(eps,EPS_FEATURE_ARBITRARY | EPS_FEATURE_REGION | EPS_FEATURE_STOPPING,PETSC_TRUE," with spectrum slicing");
282:     EPSCheckIgnoredCondition(eps,EPS_FEATURE_BALANCE,PETSC_TRUE," with spectrum slicing");
283:     if (eps->tol==PETSC_DEFAULT) {
284:  #if defined(PETSC_USE_REAL_SINGLE)
285:       eps->tol = SLEPC_DEFAULT_TOL;
286: #else
287:       /* use tighter tolerance */
288:       eps->tol = SLEPC_DEFAULT_TOL*1e-2;
289: #endif
290:     }
291:     if (eps->max_it==PETSC_DEFAULT) eps->max_it = 100;
292:     if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n);  /* nev not set, use default value */
294:   }
295:   eps->ops->backtransform = EPSBackTransform_Skip;

297:   /* create spectrum slicing context and initialize it */
298:   EPSSliceResetSR(eps);
299:   PetscNew(&sr);
300:   ctx->sr = sr;
301:   sr->itsKs = 0;
302:   sr->nleap = 0;
303:   sr->nMAXCompl = eps->nev/4;
304:   sr->iterCompl = eps->max_it/4;
305:   sr->sPres = NULL;
306:   sr->nS = 0;

308:   if (ctx->npart==1 || ctx->global) {
309:     /* check presence of ends and finding direction */
310:     if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
311:       sr->int0 = eps->inta;
312:       sr->int1 = eps->intb;
313:       sr->dir = 1;
314:       if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
315:         sr->hasEnd = PETSC_FALSE;
316:       } else sr->hasEnd = PETSC_TRUE;
317:     } else {
318:       sr->int0 = eps->intb;
319:       sr->int1 = eps->inta;
320:       sr->dir = -1;
321:       sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
322:     }
323:   }
324:   if (ctx->global) {
325:     EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
326:     /* create subintervals and initialize auxiliary eps for slicing runs */
327:     EPSKrylovSchurGetChildEPS(eps,&ctx->eps);
328:     /* prevent computation of factorization in global eps */
329:     STSetTransform(eps->st,PETSC_FALSE);
330:     EPSSliceGetEPS(eps);
331:     sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
332:     if (ctx->npart>1) {
333:       PetscSubcommGetChild(ctx->subc,&child);
334:       if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
335:       MPI_Comm_rank(child,&rank);
336:       if (!rank) MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
337:       MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,child);
338:       PetscFree(ctx->nconv_loc);
339:       PetscMalloc1(ctx->npart,&ctx->nconv_loc);
340:       MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
341:       if (sr->dir<0) off = 1;
342:       if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
343:         PetscMPIIntCast(sr_loc->numEigs,&aux);
344:         MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
345:         MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
346:       } else {
347:         MPI_Comm_rank(child,&rank);
348:         if (!rank) {
349:           PetscMPIIntCast(sr_loc->numEigs,&aux);
350:           MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
351:           MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
352:         }
353:         PetscMPIIntCast(ctx->npart,&aux);
354:         MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,child);
355:         MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,child);
356:       }
357:       nEigs = 0;
358:       for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
359:     } else {
360:       nEigs = sr_loc->numEigs;
361:       sr->inertia0 = sr_loc->inertia0;
362:       sr->dir = sr_loc->dir;
363:     }
364:     sr->inertia1 = sr->inertia0+sr->dir*nEigs;
365:     sr->numEigs = nEigs;
366:     eps->nev = nEigs;
367:     eps->ncv = nEigs;
368:     eps->mpd = nEigs;
369:   } else {
370:     ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
371:     sr_glob = ctx_glob->sr;
372:     if (ctx->npart>1) {
373:       sr->dir = sr_glob->dir;
374:       sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
375:       sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
376:       if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
377:       else sr->hasEnd = PETSC_TRUE;
378:     }
379:     /* sets first shift */
380:     STSetShift(eps->st,(sr->int0==0.0)?10.0/PETSC_MAX_REAL:sr->int0);
381:     STSetUp(eps->st);

383:     /* compute inertia0 */
384:     EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
385:     /* undocumented option to control what to do when an eigenvalue is found:
386:        - error out if it's the endpoint of the user-provided interval (or sub-interval)
387:        - if it's an endpoint computed internally:
388:           + if hiteig=0 error out
389:           + else if hiteig=1 the subgroup that hit the eigenvalue does nothing
390:           + otherwise the subgroup that hit the eigenvalue perturbs the shift and recomputes inertia
391:     */
392:     PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&hiteig,NULL);
393:     if (zeros) { /* error in factorization */
396:       if (hiteig==1) { /* idle subgroup */
397:         sr->inertia0 = -1;
398:       } else { /* perturb shift */
399:         sr->int0 *= (1.0+SLICE_PTOL);
400:         EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
402:       }
403:     }
404:     if (ctx->npart>1) {
405:       PetscSubcommGetChild(ctx->subc,&child);
406:       /* inertia1 is received from neighbour */
407:       MPI_Comm_rank(child,&rank);
408:       if (!rank) {
409:         if (sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1))) { /* send inertia0 to neighbour0 */
410:           MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
411:           MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
412:         }
413:         if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
414:           MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
415:           MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
416:         }
417:         if (sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
418:           sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
419:           MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
420:           MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
421:         }
422:       }
423:       if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
424:         MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,child);
425:         MPI_Bcast(&sr->int1,1,MPIU_REAL,0,child);
426:       } else sr_glob->inertia1 = sr->inertia1;
427:     }

429:     /* last process in eps comm computes inertia1 */
430:     if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
431:       EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
433:       if (!rank && sr->inertia0==-1) {
434:         sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
435:         MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
436:         MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
437:       }
438:       if (sr->hasEnd) {
439:         sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
440:         i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
441:       }
442:     }

444:     /* number of eigenvalues in interval */
445:     sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
446:     if (ctx->npart>1) {
447:       /* memory allocate for subinterval eigenpairs */
448:       EPSSliceAllocateSolution(eps,1);
449:     }
450:     dssz = eps->ncv+1;
451:     DSGetParallel(ctx->eps->ds,&ptype);
452:     DSSetParallel(eps->ds,ptype);
453:     DSGetMethod(ctx->eps->ds,&method);
454:     DSSetMethod(eps->ds,method);
455:   }
456:   DSSetType(eps->ds,DSHEP);
457:   DSSetCompact(eps->ds,PETSC_TRUE);
458:   DSAllocate(eps->ds,dssz);
459:   /* keep state of subcomm matrices to check that the user does not modify them */
460:   EPSGetOperators(eps,&A,&B);
461:   PetscObjectStateGet((PetscObject)A,&ctx->Astate);
462:   PetscObjectGetId((PetscObject)A,&ctx->Aid);
463:   if (B) {
464:     PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
465:     PetscObjectGetId((PetscObject)B,&ctx->Bid);
466:   } else {
467:     ctx->Bstate=0;
468:     ctx->Bid=0;
469:   }
470:   return 0;
471: }

473: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
474: {
475:   Vec             v,vg,v_loc;
476:   IS              is1,is2;
477:   VecScatter      vec_sc;
478:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
479:   PetscInt        nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
480:   PetscScalar     *array;
481:   EPS_SR          sr_loc;
482:   BV              V_loc;

484:   sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
485:   V_loc = sr_loc->V;

487:   /* Gather parallel eigenvectors */
488:   BVGetColumn(eps->V,0,&v);
489:   VecGetOwnershipRange(v,&n0,&m0);
490:   BVRestoreColumn(eps->V,0,&v);
491:   BVGetColumn(ctx->eps->V,0,&v);
492:   VecGetLocalSize(v,&nloc);
493:   BVRestoreColumn(ctx->eps->V,0,&v);
494:   PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
495:   VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
496:   idx = -1;
497:   for (si=0;si<ctx->npart;si++) {
498:     j = 0;
499:     for (i=n0;i<m0;i++) {
500:       idx1[j]   = i;
501:       idx2[j++] = i+eps->n*si;
502:     }
503:     ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
504:     ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
505:     BVGetColumn(eps->V,0,&v);
506:     VecScatterCreate(v,is1,vg,is2,&vec_sc);
507:     BVRestoreColumn(eps->V,0,&v);
508:     ISDestroy(&is1);
509:     ISDestroy(&is2);
510:     for (i=0;i<ctx->nconv_loc[si];i++) {
511:       BVGetColumn(eps->V,++idx,&v);
512:       if (ctx->subc->color==si) {
513:         BVGetColumn(V_loc,i,&v_loc);
514:         VecGetArray(v_loc,&array);
515:         VecPlaceArray(vg,array);
516:       }
517:       VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
518:       VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
519:       if (ctx->subc->color==si) {
520:         VecResetArray(vg);
521:         VecRestoreArray(v_loc,&array);
522:         BVRestoreColumn(V_loc,i,&v_loc);
523:       }
524:       BVRestoreColumn(eps->V,idx,&v);
525:     }
526:     VecScatterDestroy(&vec_sc);
527:   }
528:   PetscFree2(idx1,idx2);
529:   VecDestroy(&vg);
530:   return 0;
531: }

533: /*
534:   EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
535:  */
536: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
537: {
538:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

540:   if (ctx->global && ctx->npart>1) {
541:     EPSComputeVectors(ctx->eps);
542:     EPSSliceGatherEigenVectors(eps);
543:   }
544:   return 0;
545: }

547: #define SWAP(a,b,t) {t=a;a=b;b=t;}

549: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
550: {
551:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
552:   PetscInt        i=0,j,tmpi;
553:   PetscReal       v,tmpr;
554:   EPS_shift       s;

558:   if (!ctx->sr->s0) {  /* EPSSolve not called yet */
559:     *n = 2;
560:   } else {
561:     *n = 1;
562:     s = ctx->sr->s0;
563:     while (s) {
564:       (*n)++;
565:       s = s->neighb[1];
566:     }
567:   }
568:   PetscMalloc1(*n,shifts);
569:   PetscMalloc1(*n,inertias);
570:   if (!ctx->sr->s0) {  /* EPSSolve not called yet */
571:     (*shifts)[0]   = ctx->sr->int0;
572:     (*shifts)[1]   = ctx->sr->int1;
573:     (*inertias)[0] = ctx->sr->inertia0;
574:     (*inertias)[1] = ctx->sr->inertia1;
575:   } else {
576:     s = ctx->sr->s0;
577:     while (s) {
578:       (*shifts)[i]     = s->value;
579:       (*inertias)[i++] = s->inertia;
580:       s = s->neighb[1];
581:     }
582:     (*shifts)[i]   = ctx->sr->int1;
583:     (*inertias)[i] = ctx->sr->inertia1;
584:   }
585:   /* remove possible duplicate in last position */
586:   if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
587:   /* sort result */
588:   for (i=0;i<*n;i++) {
589:     v = (*shifts)[i];
590:     for (j=i+1;j<*n;j++) {
591:       if (v > (*shifts)[j]) {
592:         SWAP((*shifts)[i],(*shifts)[j],tmpr);
593:         SWAP((*inertias)[i],(*inertias)[j],tmpi);
594:         v = (*shifts)[i];
595:       }
596:     }
597:   }
598:   return 0;
599: }

601: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
602: {
603:   PetscMPIInt     rank,nproc,*disp,*ns_loc,aux;
604:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
605:   PetscInt        i,idx,j,*perm_loc,off=0,*inertias_loc,ns;
606:   PetscScalar     *eigr_loc;
607:   EPS_SR          sr_loc;
608:   PetscReal       *shifts_loc;
609:   MPI_Comm        child;

611:   eps->nconv = 0;
612:   for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
613:   sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;

615:   /* Gather the shifts used and the inertias computed */
616:   EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
617:   if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
618:   if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
619:     ns--;
620:     for (i=0;i<ns;i++) {
621:       inertias_loc[i] = inertias_loc[i+1];
622:       shifts_loc[i] = shifts_loc[i+1];
623:     }
624:   }
625:   PetscMalloc1(ctx->npart,&ns_loc);
626:   PetscSubcommGetChild(ctx->subc,&child);
627:   MPI_Comm_rank(child,&rank);
628:   PetscMPIIntCast(ns,&aux);
629:   if (!rank) MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank);
630:   PetscMPIIntCast(ctx->npart,&aux);
631:   MPI_Bcast(ns_loc,aux,MPI_INT,0,child);
632:   ctx->nshifts = 0;
633:   for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
634:   PetscFree(ctx->inertias);
635:   PetscFree(ctx->shifts);
636:   PetscMalloc1(ctx->nshifts,&ctx->inertias);
637:   PetscMalloc1(ctx->nshifts,&ctx->shifts);

639:   /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
640:   eigr_loc = sr_loc->eigr;
641:   perm_loc = sr_loc->perm;
642:   MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
643:   PetscMalloc1(ctx->npart,&disp);
644:   disp[0] = 0;
645:   for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
646:   if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
647:     PetscMPIIntCast(sr_loc->numEigs,&aux);
648:     MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
649:     MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
650:     for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
651:     PetscMPIIntCast(ns,&aux);
652:     MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
653:     MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
654:     MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
655:   } else { /* subcommunicators with different size */
656:     if (!rank) {
657:       PetscMPIIntCast(sr_loc->numEigs,&aux);
658:       MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
659:       MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
660:       for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
661:       PetscMPIIntCast(ns,&aux);
662:       MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
663:       MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
664:       MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
665:     }
666:     PetscMPIIntCast(eps->nconv,&aux);
667:     MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,child);
668:     MPI_Bcast(eps->perm,aux,MPIU_INT,0,child);
669:     MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,child);
670:     PetscMPIIntCast(ctx->nshifts,&aux);
671:     MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,child);
672:     MPI_Bcast(&eps->its,1,MPIU_INT,0,child);
673:   }
674:   /* Update global array eps->perm */
675:   idx = ctx->nconv_loc[0];
676:   for (i=1;i<ctx->npart;i++) {
677:     off += ctx->nconv_loc[i-1];
678:     for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
679:   }

681:   /* Gather parallel eigenvectors */
682:   PetscFree(ns_loc);
683:   PetscFree(disp);
684:   PetscFree(shifts_loc);
685:   PetscFree(inertias_loc);
686:   return 0;
687: }

689: /*
690:    Fills the fields of a shift structure
691: */
692: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
693: {
694:   EPS_shift       s,*pending2;
695:   PetscInt        i;
696:   EPS_SR          sr;
697:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

699:   sr = ctx->sr;
700:   if ((neighb0 && val==neighb0->value) || (neighb1 && val==neighb1->value)) {
701:     sr->nPend++;
702:     return 0;
703:   }
704:   PetscNew(&s);
705:   s->value = val;
706:   s->neighb[0] = neighb0;
707:   if (neighb0) neighb0->neighb[1] = s;
708:   s->neighb[1] = neighb1;
709:   if (neighb1) neighb1->neighb[0] = s;
710:   s->comp[0] = PETSC_FALSE;
711:   s->comp[1] = PETSC_FALSE;
712:   s->index = -1;
713:   s->neigs = 0;
714:   s->nconv[0] = s->nconv[1] = 0;
715:   s->nsch[0] = s->nsch[1]=0;
716:   /* Inserts in the stack of pending shifts */
717:   /* If needed, the array is resized */
718:   if (sr->nPend >= sr->maxPend) {
719:     sr->maxPend *= 2;
720:     PetscMalloc1(sr->maxPend,&pending2);
721:     for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
722:     PetscFree(sr->pending);
723:     sr->pending = pending2;
724:   }
725:   sr->pending[sr->nPend++]=s;
726:   return 0;
727: }

729: /* Prepare for Rational Krylov update */
730: static PetscErrorCode EPSPrepareRational(EPS eps)
731: {
732:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
733:   PetscInt        dir,i,k,ld,nv;
734:   PetscScalar     *A;
735:   EPS_SR          sr = ctx->sr;
736:   Vec             v;

738:   DSGetLeadingDimension(eps->ds,&ld);
739:   dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
740:   dir*=sr->dir;
741:   k = 0;
742:   for (i=0;i<sr->nS;i++) {
743:     if (dir*PetscRealPart(sr->S[i])>0.0) {
744:       sr->S[k] = sr->S[i];
745:       sr->S[sr->nS+k] = sr->S[sr->nS+i];
746:       BVGetColumn(sr->Vnext,k,&v);
747:       BVCopyVec(eps->V,eps->nconv+i,v);
748:       BVRestoreColumn(sr->Vnext,k,&v);
749:       k++;
750:       if (k>=sr->nS/2)break;
751:     }
752:   }
753:   /* Copy to DS */
754:   DSGetArray(eps->ds,DS_MAT_A,&A);
755:   PetscArrayzero(A,ld*ld);
756:   for (i=0;i<k;i++) {
757:     A[i*(1+ld)] = sr->S[i];
758:     A[k+i*ld] = sr->S[sr->nS+i];
759:   }
760:   sr->nS = k;
761:   DSRestoreArray(eps->ds,DS_MAT_A,&A);
762:   DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL);
763:   DSSetDimensions(eps->ds,nv,0,k);
764:   /* Append u to V */
765:   BVGetColumn(sr->Vnext,sr->nS,&v);
766:   BVCopyVec(eps->V,sr->nv,v);
767:   BVRestoreColumn(sr->Vnext,sr->nS,&v);
768:   return 0;
769: }

771: /* Provides next shift to be computed */
772: static PetscErrorCode EPSExtractShift(EPS eps)
773: {
774:   PetscInt        iner,zeros=0;
775:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
776:   EPS_SR          sr;
777:   PetscReal       newShift,diam,ptol;
778:   EPS_shift       sPres;

780:   sr = ctx->sr;
781:   if (sr->nPend > 0) {
782:     if (sr->sPres==sr->pending[sr->nPend-1]) {
783:       eps->reason = EPS_CONVERGED_ITERATING;
784:       eps->its = 0;
785:       sr->nPend--;
786:       sr->sPres->rep = PETSC_TRUE;
787:       return 0;
788:     }
789:     sr->sPrev = sr->sPres;
790:     sr->sPres = sr->pending[--sr->nPend];
791:     sPres = sr->sPres;
792:     EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
793:     if (zeros) {
794:       diam = PetscMin(PetscAbsReal(sPres->neighb[0]->value-sPres->value),PetscAbsReal(sPres->neighb[1]->value-sPres->value));
795:       ptol = PetscMin(SLICE_PTOL,diam/2);
796:       newShift = sPres->value*(1.0+ptol);
797:       if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
798:       else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
799:       EPSSliceGetInertia(eps,newShift,&iner,&zeros);
801:       sPres->value = newShift;
802:     }
803:     sr->sPres->inertia = iner;
804:     eps->target = sr->sPres->value;
805:     eps->reason = EPS_CONVERGED_ITERATING;
806:     eps->its = 0;
807:   } else sr->sPres = NULL;
808:   return 0;
809: }

811: /*
812:    Symmetric KrylovSchur adapted to spectrum slicing:
813:    Allows searching an specific amount of eigenvalues in the subintervals left and right.
814:    Returns whether the search has succeeded
815: */
816: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
817: {
818:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
819:   PetscInt        i,k,l,ld,nv,*iwork,j,count0,count1,iterCompl=0,n0,n1;
820:   Mat             U,Op,T;
821:   PetscScalar     *Q,*A;
822:   PetscReal       *a,*b,beta,lambda;
823:   EPS_shift       sPres;
824:   PetscBool       breakdown,complIterating,sch0,sch1;
825:   EPS_SR          sr = ctx->sr;

827:   /* Spectrum slicing data */
828:   sPres = sr->sPres;
829:   complIterating =PETSC_FALSE;
830:   sch1 = sch0 = PETSC_TRUE;
831:   DSGetLeadingDimension(eps->ds,&ld);
832:   PetscMalloc1(2*ld,&iwork);
833:   count0=0;count1=0; /* Found on both sides */
834:   if (!sPres->rep && sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
835:     /* Rational Krylov */
836:     DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
837:     DSGetDimensions(eps->ds,NULL,NULL,&l,NULL);
838:     DSSetDimensions(eps->ds,l+1,0,0);
839:     BVSetActiveColumns(eps->V,0,l+1);
840:     DSGetMat(eps->ds,DS_MAT_Q,&U);
841:     BVMultInPlace(eps->V,U,0,l+1);
842:     DSRestoreMat(eps->ds,DS_MAT_Q,&U);
843:   } else {
844:     /* Get the starting Lanczos vector */
845:     EPSGetStartVector(eps,0,NULL);
846:     l = 0;
847:   }
848:   /* Restart loop */
849:   while (eps->reason == EPS_CONVERGED_ITERATING) {
850:     eps->its++; sr->itsKs++;
851:     /* Compute an nv-step Lanczos factorization */
852:     nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
853:     DSSetDimensions(eps->ds,nv,eps->nconv,eps->nconv+l);
854:     DSGetMat(eps->ds,DS_MAT_T,&T);
855:     STGetOperator(eps->st,&Op);
856:     BVMatLanczos(eps->V,Op,T,eps->nconv+l,&nv,&beta,&breakdown);
857:     STRestoreOperator(eps->st,&Op);
858:     sr->nv = nv;
859:     DSRestoreMat(eps->ds,DS_MAT_T,&T);
860:     DSSetDimensions(eps->ds,nv,eps->nconv,eps->nconv+l);
861:     if (l==0) DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
862:     else DSSetState(eps->ds,DS_STATE_RAW);
863:     BVSetActiveColumns(eps->V,eps->nconv,nv);

865:     /* Solve projected problem and compute residual norm estimates */
866:     if (eps->its == 1 && l > 0) {/* After rational update */
867:       DSGetArray(eps->ds,DS_MAT_A,&A);
868:       DSGetArrayReal(eps->ds,DS_MAT_T,&a);
869:       b = a + ld;
870:       k = eps->nconv+l;
871:       A[k*ld+k-1] = A[(k-1)*ld+k];
872:       A[k*ld+k] = a[k];
873:       for (j=k+1; j< nv; j++) {
874:         A[j*ld+j] = a[j];
875:         A[j*ld+j-1] = b[j-1] ;
876:         A[(j-1)*ld+j] = b[j-1];
877:       }
878:       DSRestoreArray(eps->ds,DS_MAT_A,&A);
879:       DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
880:       DSSolve(eps->ds,eps->eigr,NULL);
881:       DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
882:       DSSetCompact(eps->ds,PETSC_TRUE);
883:     } else { /* Restart */
884:       DSSolve(eps->ds,eps->eigr,NULL);
885:       DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
886:     }
887:     DSSynchronize(eps->ds,eps->eigr,NULL);

889:     /* Residual */
890:     EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,0.0,1.0,&k);
891:     /* Checking values obtained for completing */
892:     for (i=0;i<k;i++) {
893:       sr->back[i]=eps->eigr[i];
894:     }
895:     STBackTransform(eps->st,k,sr->back,eps->eigi);
896:     count0=count1=0;
897:     for (i=0;i<k;i++) {
898:       lambda = PetscRealPart(sr->back[i]);
899:       if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
900:       if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
901:     }
902:     if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
903:     else {
904:       /* Checks completion */
905:       if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
906:         eps->reason = EPS_CONVERGED_TOL;
907:       } else {
908:         if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
909:         if (complIterating) {
910:           if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
911:         } else if (k >= eps->nev) {
912:           n0 = sPres->nsch[0]-count0;
913:           n1 = sPres->nsch[1]-count1;
914:           if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
915:             /* Iterating for completion*/
916:             complIterating = PETSC_TRUE;
917:             if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
918:             if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
919:             iterCompl = sr->iterCompl;
920:           } else eps->reason = EPS_CONVERGED_TOL;
921:         }
922:       }
923:     }
924:     /* Update l */
925:     if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
926:     else l = nv-k;
927:     if (breakdown) l=0;
928:     if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

930:     if (eps->reason == EPS_CONVERGED_ITERATING) {
931:       if (breakdown) {
932:         /* Start a new Lanczos factorization */
933:         PetscInfo(eps,"Breakdown in Krylov-Schur method (it=%" PetscInt_FMT " norm=%g)\n",eps->its,(double)beta);
934:         EPSGetStartVector(eps,k,&breakdown);
935:         if (breakdown) {
936:           eps->reason = EPS_DIVERGED_BREAKDOWN;
937:           PetscInfo(eps,"Unable to generate more start vectors\n");
938:         }
939:       } else {
940:         /* Prepare the Rayleigh quotient for restart */
941:         DSGetArrayReal(eps->ds,DS_MAT_T,&a);
942:         DSGetArray(eps->ds,DS_MAT_Q,&Q);
943:         b = a + ld;
944:         for (i=k;i<k+l;i++) {
945:           a[i] = PetscRealPart(eps->eigr[i]);
946:           b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
947:         }
948:         DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
949:         DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
950:       }
951:     }
952:     /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
953:     DSGetMat(eps->ds,DS_MAT_Q,&U);
954:     BVMultInPlace(eps->V,U,eps->nconv,k+l);
955:     DSRestoreMat(eps->ds,DS_MAT_Q,&U);

957:     /* Normalize u and append it to V */
958:     if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) BVCopyColumn(eps->V,nv,k+l);
959:     eps->nconv = k;
960:     if (eps->reason != EPS_CONVERGED_ITERATING) {
961:       /* Store approximated values for next shift */
962:       DSGetArray(eps->ds,DS_MAT_Q,&Q);
963:       sr->nS = l;
964:       for (i=0;i<l;i++) {
965:         sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
966:         sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
967:       }
968:       DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
969:     }
970:   }
971:   /* Check for completion */
972:   for (i=0;i< eps->nconv; i++) {
973:     if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
974:     else sPres->nconv[0]++;
975:   }
976:   sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
977:   sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
978:   PetscInfo(eps,"Lanczos: %" PetscInt_FMT " evals in [%g,%g]%s and %" PetscInt_FMT " evals in [%g,%g]%s\n",count0,(double)(sr->dir==1?sPres->ext[0]:sPres->value),(double)(sr->dir==1?sPres->value:sPres->ext[0]),sPres->comp[0]?"*":"",count1,(double)(sr->dir==1?sPres->value:sPres->ext[1]),(double)(sr->dir==1?sPres->ext[1]:sPres->value),sPres->comp[1]?"*":"");
980:   PetscFree(iwork);
981:   return 0;
982: }

984: /*
985:   Obtains value of subsequent shift
986: */
987: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
988: {
989:   PetscReal       lambda,d_prev;
990:   PetscInt        i,idxP;
991:   EPS_SR          sr;
992:   EPS_shift       sPres,s;
993:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

995:   sr = ctx->sr;
996:   sPres = sr->sPres;
997:   if (sPres->neighb[side]) {
998:     /* Completing a previous interval */
999:     *newS = (sPres->value + sPres->neighb[side]->value)/2;
1000:     if (PetscAbsReal(sPres->value - *newS)/PetscAbsReal(sPres->value)<=100*PETSC_SQRT_MACHINE_EPSILON) *newS = sPres->value;
1001:   } else { /* (Only for side=1). Creating a new interval. */
1002:     if (sPres->neigs==0) {/* No value has been accepted*/
1003:       if (sPres->neighb[0]) {
1004:         /* Multiplying by 10 the previous distance */
1005:         *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1006:         sr->nleap++;
1007:         /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1009:       } else { /* First shift */
1011:         /* Unaccepted values give information for next shift */
1012:         idxP=0;/* Number of values left from shift */
1013:         for (i=0;i<eps->nconv;i++) {
1014:           lambda = PetscRealPart(eps->eigr[i]);
1015:           if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1016:           else break;
1017:         }
1018:         /* Avoiding subtraction of eigenvalues (might be the same).*/
1019:         if (idxP>0) {
1020:           d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1021:         } else {
1022:           d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1023:         }
1024:         *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1025:       }
1026:     } else { /* Accepted values found */
1027:       sr->nleap = 0;
1028:       /* Average distance of values in previous subinterval */
1029:       s = sPres->neighb[0];
1030:       while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1031:         s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1032:       }
1033:       if (s) {
1034:         d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1035:       } else { /* First shift. Average distance obtained with values in this shift */
1036:         /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1037:         if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1038:           d_prev =  PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1039:         } else {
1040:           d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1041:         }
1042:       }
1043:       /* Average distance is used for next shift by adding it to value on the right or to shift */
1044:       if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1045:         *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1046:       } else { /* Last accepted value is on the left of shift. Adding to shift */
1047:         *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1048:       }
1049:     }
1050:     /* End of interval can not be surpassed */
1051:     if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1052:   }/* of neighb[side]==null */
1053:   return 0;
1054: }

1056: /*
1057:   Function for sorting an array of real values
1058: */
1059: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1060: {
1061:   PetscReal re;
1062:   PetscInt  i,j,tmp;

1064:   if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1065:   /* Insertion sort */
1066:   for (i=1;i<nr;i++) {
1067:     re = PetscRealPart(r[perm[i]]);
1068:     j = i-1;
1069:     while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1070:       tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1071:     }
1072:   }
1073:   return 0;
1074: }

1076: /* Stores the pairs obtained since the last shift in the global arrays */
1077: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1078: {
1079:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1080:   PetscReal       lambda,err,norm;
1081:   PetscInt        i,count;
1082:   PetscBool       iscayley;
1083:   EPS_SR          sr = ctx->sr;
1084:   EPS_shift       sPres;
1085:   Vec             v,w;

1087:   sPres = sr->sPres;
1088:   sPres->index = sr->indexEig;
1089:   count = sr->indexEig;
1090:   /* Back-transform */
1091:   STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1092:   PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1093:   /* Sort eigenvalues */
1094:   sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1095:   /* Values stored in global array */
1096:   for (i=0;i<eps->nconv;i++) {
1097:     lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1098:     err = eps->errest[eps->perm[i]];

1100:     if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1102:       sr->eigr[count] = lambda;
1103:       sr->errest[count] = err;
1104:       /* Explicit purification */
1105:       BVGetColumn(eps->V,eps->perm[i],&w);
1106:       if (eps->purify) {
1107:         BVGetColumn(sr->V,count,&v);
1108:         STApply(eps->st,w,v);
1109:         BVRestoreColumn(sr->V,count,&v);
1110:       } else BVInsertVec(sr->V,count,w);
1111:       BVRestoreColumn(eps->V,eps->perm[i],&w);
1112:       BVNormColumn(sr->V,count,NORM_2,&norm);
1113:       BVScaleColumn(sr->V,count,1.0/norm);
1114:       count++;
1115:     }
1116:   }
1117:   sPres->neigs = count - sr->indexEig;
1118:   sr->indexEig = count;
1119:   /* Global ordering array updating */
1120:   sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1121:   return 0;
1122: }

1124: static PetscErrorCode EPSLookForDeflation(EPS eps)
1125: {
1126:   PetscReal       val;
1127:   PetscInt        i,count0=0,count1=0;
1128:   EPS_shift       sPres;
1129:   PetscInt        ini,fin,k,idx0,idx1;
1130:   EPS_SR          sr;
1131:   Vec             v;
1132:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

1134:   sr = ctx->sr;
1135:   sPres = sr->sPres;

1137:   if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1138:   else ini = 0;
1139:   fin = sr->indexEig;
1140:   /* Selection of ends for searching new values */
1141:   if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1142:   else sPres->ext[0] = sPres->neighb[0]->value;
1143:   if (!sPres->neighb[1]) {
1144:     if (sr->hasEnd) sPres->ext[1] = sr->int1;
1145:     else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1146:   } else sPres->ext[1] = sPres->neighb[1]->value;
1147:   /* Selection of values between right and left ends */
1148:   for (i=ini;i<fin;i++) {
1149:     val=PetscRealPart(sr->eigr[sr->perm[i]]);
1150:     /* Values to the right of left shift */
1151:     if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1152:       if ((sr->dir)*(val - sPres->value) < 0) count0++;
1153:       else count1++;
1154:     } else break;
1155:   }
1156:   /* The number of values on each side are found */
1157:   if (sPres->neighb[0]) {
1158:     sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1160:   } else sPres->nsch[0] = 0;

1162:   if (sPres->neighb[1]) {
1163:     sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1165:   } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);

1167:   /* Completing vector of indexes for deflation */
1168:   idx0 = ini;
1169:   idx1 = ini+count0+count1;
1170:   k=0;
1171:   for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1172:   BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1173:   BVSetNumConstraints(sr->Vnext,k);
1174:   for (i=0;i<k;i++) {
1175:     BVGetColumn(sr->Vnext,-i-1,&v);
1176:     BVCopyVec(sr->V,sr->idxDef[i],v);
1177:     BVRestoreColumn(sr->Vnext,-i-1,&v);
1178:   }

1180:   /* For rational Krylov */
1181:   if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) EPSPrepareRational(eps);
1182:   eps->nconv = 0;
1183:   /* Get rid of temporary Vnext */
1184:   BVDestroy(&eps->V);
1185:   eps->V = sr->Vnext;
1186:   sr->Vnext = NULL;
1187:   return 0;
1188: }

1190: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1191: {
1192:   PetscInt         i,lds,ti;
1193:   PetscReal        newS;
1194:   EPS_KRYLOVSCHUR  *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1195:   EPS_SR           sr=ctx->sr;
1196:   Mat              A,B=NULL;
1197:   PetscObjectState Astate,Bstate=0;
1198:   PetscObjectId    Aid,Bid=0;

1200:   PetscCitationsRegister(citation,&cited);
1201:   if (ctx->global) {
1202:     EPSSolve_KrylovSchur_Slice(ctx->eps);
1203:     ctx->eps->state = EPS_STATE_SOLVED;
1204:     eps->reason = EPS_CONVERGED_TOL;
1205:     if (ctx->npart>1) {
1206:       /* Gather solution from subsolvers */
1207:       EPSSliceGatherSolution(eps);
1208:     } else {
1209:       eps->nconv = sr->numEigs;
1210:       eps->its   = ctx->eps->its;
1211:       PetscFree(ctx->inertias);
1212:       PetscFree(ctx->shifts);
1213:       EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1214:     }
1215:   } else {
1216:     if (ctx->npart==1) {
1217:       sr->eigr   = ctx->eps->eigr;
1218:       sr->eigi   = ctx->eps->eigi;
1219:       sr->perm   = ctx->eps->perm;
1220:       sr->errest = ctx->eps->errest;
1221:       sr->V      = ctx->eps->V;
1222:     }
1223:     /* Check that the user did not modify subcomm matrices */
1224:     EPSGetOperators(eps,&A,&B);
1225:     PetscObjectStateGet((PetscObject)A,&Astate);
1226:     PetscObjectGetId((PetscObject)A,&Aid);
1227:     if (B) {
1228:       PetscObjectStateGet((PetscObject)B,&Bstate);
1229:       PetscObjectGetId((PetscObject)B,&Bid);
1230:     }
1232:     /* Only with eigenvalues present in the interval ...*/
1233:     if (sr->numEigs==0) {
1234:       eps->reason = EPS_CONVERGED_TOL;
1235:       return 0;
1236:     }
1237:     /* Array of pending shifts */
1238:     sr->maxPend = 100; /* Initial size */
1239:     sr->nPend = 0;
1240:     PetscMalloc1(sr->maxPend,&sr->pending);
1241:     EPSCreateShift(eps,sr->int0,NULL,NULL);
1242:     /* extract first shift */
1243:     sr->sPrev = NULL;
1244:     sr->sPres = sr->pending[--sr->nPend];
1245:     sr->sPres->inertia = sr->inertia0;
1246:     eps->target = sr->sPres->value;
1247:     sr->s0 = sr->sPres;
1248:     sr->indexEig = 0;
1249:     /* Memory reservation for auxiliary variables */
1250:     lds = PetscMin(eps->mpd,eps->ncv);
1251:     PetscCalloc1(lds*lds,&sr->S);
1252:     PetscMalloc1(eps->ncv,&sr->back);
1253:     for (i=0;i<sr->numEigs;i++) {
1254:       sr->eigr[i]   = 0.0;
1255:       sr->eigi[i]   = 0.0;
1256:       sr->errest[i] = 0.0;
1257:       sr->perm[i]   = i;
1258:     }
1259:     /* Vectors for deflation */
1260:     PetscMalloc1(sr->numEigs,&sr->idxDef);
1261:     sr->indexEig = 0;
1262:     /* Main loop */
1263:     while (sr->sPres) {
1264:       /* Search for deflation */
1265:       EPSLookForDeflation(eps);
1266:       /* KrylovSchur */
1267:       EPSKrylovSchur_Slice(eps);

1269:       EPSStoreEigenpairs(eps);
1270:       /* Select new shift */
1271:       if (!sr->sPres->comp[1]) {
1272:         EPSGetNewShiftValue(eps,1,&newS);
1273:         EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1274:       }
1275:       if (!sr->sPres->comp[0]) {
1276:         /* Completing earlier interval */
1277:         EPSGetNewShiftValue(eps,0,&newS);
1278:         EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1279:       }
1280:       /* Preparing for a new search of values */
1281:       EPSExtractShift(eps);
1282:     }

1284:     /* Updating eps values prior to exit */
1285:     PetscFree(sr->S);
1286:     PetscFree(sr->idxDef);
1287:     PetscFree(sr->pending);
1288:     PetscFree(sr->back);
1289:     BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1290:     BVSetNumConstraints(sr->Vnext,0);
1291:     BVDestroy(&eps->V);
1292:     eps->V      = sr->Vnext;
1293:     eps->nconv  = sr->indexEig;
1294:     eps->reason = EPS_CONVERGED_TOL;
1295:     eps->its    = sr->itsKs;
1296:     eps->nds    = 0;
1297:     if (sr->dir<0) {
1298:       for (i=0;i<eps->nconv/2;i++) {
1299:         ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1300:       }
1301:     }
1302:   }
1303:   return 0;
1304: }