Actual source code: dsnhep.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
 15: {

 19:   DSAllocateMat_Private(ds,DS_MAT_A);
 20:   DSAllocateMat_Private(ds,DS_MAT_Q);
 21:   PetscFree(ds->perm);
 22:   PetscMalloc1(ld,&ds->perm);
 23:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 24:   return(0);
 25: }

 27: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
 28: {
 29:   PetscErrorCode    ierr;
 30:   PetscViewerFormat format;

 33:   PetscViewerGetFormat(viewer,&format);
 34:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);
 35:   DSViewMat(ds,viewer,DS_MAT_A);
 36:   if (ds->state>DS_STATE_INTERMEDIATE) {
 37:     DSViewMat(ds,viewer,DS_MAT_Q);
 38:   }
 39:   if (ds->mat[DS_MAT_X]) {
 40:     DSViewMat(ds,viewer,DS_MAT_X);
 41:   }
 42:   if (ds->mat[DS_MAT_Y]) {
 43:     DSViewMat(ds,viewer,DS_MAT_Y);
 44:   }
 45:   return(0);
 46: }

 48: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
 49: {
 50: #if defined(PETSC_MISSING_LAPACK_GESVD)
 52:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
 53: #else
 55:   PetscInt       i,j;
 56:   PetscBLASInt   info,ld,n,n1,lwork,inc=1;
 57:   PetscScalar    sdummy,done=1.0,zero=0.0;
 58:   PetscReal      *sigma;
 59:   PetscBool      iscomplex = PETSC_FALSE;
 60:   PetscScalar    *A = ds->mat[DS_MAT_A];
 61:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
 62:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
 63:   PetscScalar    *W;

 66:   if (left) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
 67:   PetscBLASIntCast(ds->n,&n);
 68:   PetscBLASIntCast(ds->ld,&ld);
 69:   n1 = n+1;
 70:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
 71:   if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
 72:   DSAllocateWork_Private(ds,5*ld,6*ld,0);
 73:   DSAllocateMat_Private(ds,DS_MAT_W);
 74:   W = ds->mat[DS_MAT_W];
 75:   lwork = 5*ld;
 76:   sigma = ds->rwork+5*ld;

 78:   /* build A-w*I in W */
 79:   for (j=0;j<n;j++)
 80:     for (i=0;i<=n;i++)
 81:       W[i+j*ld] = A[i+j*ld];
 82:   for (i=0;i<n;i++)
 83:     W[i+i*ld] -= A[(*k)+(*k)*ld];

 85:   /* compute SVD of W */
 86: #if !defined(PETSC_USE_COMPLEX)
 87:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
 88: #else
 89:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
 90: #endif
 91:   SlepcCheckLapackInfo("gesvd",info);

 93:   /* the smallest singular value is the new error estimate */
 94:   if (rnorm) *rnorm = sigma[n-1];

 96:   /* update vector with right singular vector associated to smallest singular value,
 97:      accumulating the transformation matrix Q */
 98:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
 99:   return(0);
100: #endif
101: }

103: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
104: {
106:   PetscInt       i;

109:   for (i=0;i<ds->n;i++) {
110:     DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
111:   }
112:   return(0);
113: }

115: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
116: {
117: #if defined(SLEPC_MISSING_LAPACK_TREVC)
119:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
120: #else
122:   PetscInt       i;
123:   PetscBLASInt   mm=1,mout,info,ld,n,*select,inc = 1;
124:   PetscScalar    tmp,done=1.0,zero=0.0;
125:   PetscReal      norm;
126:   PetscBool      iscomplex = PETSC_FALSE;
127:   PetscScalar    *A = ds->mat[DS_MAT_A];
128:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
129:   PetscScalar    *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
130:   PetscScalar    *Y;

133:   PetscBLASIntCast(ds->n,&n);
134:   PetscBLASIntCast(ds->ld,&ld);
135:   DSAllocateWork_Private(ds,0,0,ld);
136:   select = ds->iwork;
137:   for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;

139:   /* compute k-th eigenvector Y of A */
140:   Y = X+(*k)*ld;
141:   select[*k] = (PetscBLASInt)PETSC_TRUE;
142: #if !defined(PETSC_USE_COMPLEX)
143:   if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
144:   mm = iscomplex? 2: 1;
145:   if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
146:   DSAllocateWork_Private(ds,3*ld,0,0);
147:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
148: #else
149:   DSAllocateWork_Private(ds,2*ld,ld,0);
150:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
151: #endif
152:   SlepcCheckLapackInfo("trevc",info);
153:   if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");

155:   /* accumulate and normalize eigenvectors */
156:   if (ds->state>=DS_STATE_CONDENSED) {
157:     PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));
158:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
159: #if !defined(PETSC_USE_COMPLEX)
160:     if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
161: #endif
162:     norm = BLASnrm2_(&n,Y,&inc);
163: #if !defined(PETSC_USE_COMPLEX)
164:     if (iscomplex) {
165:       tmp = BLASnrm2_(&n,Y+ld,&inc);
166:       norm = SlepcAbsEigenvalue(norm,tmp);
167:     }
168: #endif
169:     tmp = 1.0 / norm;
170:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
171: #if !defined(PETSC_USE_COMPLEX)
172:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
173: #endif
174:   }

176:   /* set output arguments */
177:   if (iscomplex) (*k)++;
178:   if (rnorm) {
179:     if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
180:     else *rnorm = PetscAbsScalar(Y[n-1]);
181:   }
182:   return(0);
183: #endif
184: }

186: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
187: {
188: #if defined(SLEPC_MISSING_LAPACK_TREVC)
190:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
191: #else
193:   PetscInt       i;
194:   PetscBLASInt   n,ld,mout,info,inc = 1;
195:   PetscBool      iscomplex;
196:   PetscScalar    *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
197:   PetscReal      norm;
198:   const char     *side,*back;

201:   PetscBLASIntCast(ds->n,&n);
202:   PetscBLASIntCast(ds->ld,&ld);
203:   if (left) {
204:     X = NULL;
205:     Y = ds->mat[DS_MAT_Y];
206:     side = "L";
207:   } else {
208:     X = ds->mat[DS_MAT_X];
209:     Y = NULL;
210:     side = "R";
211:   }
212:   Z = left? Y: X;
213:   if (ds->state>=DS_STATE_CONDENSED) {
214:     /* DSSolve() has been called, backtransform with matrix Q */
215:     back = "B";
216:     PetscMemcpy(Z,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
217:   } else back = "A";
218: #if !defined(PETSC_USE_COMPLEX)
219:   DSAllocateWork_Private(ds,3*ld,0,0);
220:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
221: #else
222:   DSAllocateWork_Private(ds,2*ld,ld,0);
223:   PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
224: #endif
225:   SlepcCheckLapackInfo("trevc",info);

227:   /* normalize eigenvectors */
228:   for (i=0;i<n;i++) {
229:     iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
230:     norm = BLASnrm2_(&n,Z+i*ld,&inc);
231: #if !defined(PETSC_USE_COMPLEX)
232:     if (iscomplex) {
233:       tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
234:       norm = SlepcAbsEigenvalue(norm,tmp);
235:     }
236: #endif
237:     tmp = 1.0 / norm;
238:     PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
239: #if !defined(PETSC_USE_COMPLEX)
240:     if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
241: #endif
242:     if (iscomplex) i++;
243:   }
244:   return(0);
245: #endif
246: }

248: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
249: {

253:   switch (mat) {
254:     case DS_MAT_X:
255:       if (ds->refined) {
256:         if (!ds->extrarow) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
257:         if (j) {
258:           DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
259:         } else {
260:           DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
261:         }
262:       } else {
263:         if (j) {
264:           DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
265:         } else {
266:           DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
267:         }
268:       }
269:       break;
270:     case DS_MAT_Y:
271:       if (ds->refined) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
272:       if (j) {
273:         DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
274:       } else {
275:         DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
276:       }
277:       break;
278:     case DS_MAT_U:
279:     case DS_MAT_VT:
280:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
281:       break;
282:     default:
283:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
284:   }
285:   if (ds->state < DS_STATE_CONDENSED) {
286:     DSSetState(ds,DS_STATE_CONDENSED);
287:   }
288:   return(0);
289: }

291: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
292: {
293: #if defined(PETSC_MISSING_LAPACK_TRSEN)
295:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
296: #else
298:   PetscInt       i;
299:   PetscBLASInt   info,n,ld,mout,lwork,*selection;
300:   PetscScalar    *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
301: #if !defined(PETSC_USE_COMPLEX)
302:   PetscBLASInt   *iwork,liwork;
303: #endif

306:   if (!k) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
307:   PetscBLASIntCast(ds->n,&n);
308:   PetscBLASIntCast(ds->ld,&ld);
309: #if !defined(PETSC_USE_COMPLEX)
310:   lwork = n;
311:   liwork = 1;
312:   DSAllocateWork_Private(ds,lwork,0,liwork+n);
313:   work = ds->work;
314:   lwork = ds->lwork;
315:   selection = ds->iwork;
316:   iwork = ds->iwork + n;
317:   liwork = ds->liwork - n;
318: #else
319:   lwork = 1;
320:   DSAllocateWork_Private(ds,lwork,0,n);
321:   work = ds->work;
322:   selection = ds->iwork;
323: #endif
324:   /* Compute the selected eigenvalue to be in the leading position */
325:   DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
326:   PetscMemzero(selection,n*sizeof(PetscBLASInt));
327:   for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
328: #if !defined(PETSC_USE_COMPLEX)
329:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
330: #else
331:   PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
332: #endif
333:   SlepcCheckLapackInfo("trsen",info);
334:   *k = mout;
335:   return(0);
336: #endif
337: }

339: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
340: {
341: #if defined(SLEPC_MISSING_LAPACK_TREXC)
343:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
344: #else
346:   PetscScalar    re;
347:   PetscInt       i,j,pos,result;
348:   PetscBLASInt   ifst,ilst,info,n,ld;
349:   PetscScalar    *T = ds->mat[DS_MAT_A];
350:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
351: #if !defined(PETSC_USE_COMPLEX)
352:   PetscScalar    *work,im;
353: #endif

356:   PetscBLASIntCast(ds->n,&n);
357:   PetscBLASIntCast(ds->ld,&ld);
358: #if !defined(PETSC_USE_COMPLEX)
359:   DSAllocateWork_Private(ds,ld,0,0);
360:   work = ds->work;
361: #endif
362:   /* selection sort */
363:   for (i=ds->l;i<n-1;i++) {
364:     re = wr[i];
365: #if !defined(PETSC_USE_COMPLEX)
366:     im = wi[i];
367: #endif
368:     pos = 0;
369:     j=i+1; /* j points to the next eigenvalue */
370: #if !defined(PETSC_USE_COMPLEX)
371:     if (im != 0) j=i+2;
372: #endif
373:     /* find minimum eigenvalue */
374:     for (;j<n;j++) {
375: #if !defined(PETSC_USE_COMPLEX)
376:       SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
377: #else
378:       SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
379: #endif
380:       if (result > 0) {
381:         re = wr[j];
382: #if !defined(PETSC_USE_COMPLEX)
383:         im = wi[j];
384: #endif
385:         pos = j;
386:       }
387: #if !defined(PETSC_USE_COMPLEX)
388:       if (wi[j] != 0) j++;
389: #endif
390:     }
391:     if (pos) {
392:       /* interchange blocks */
393:       PetscBLASIntCast(pos+1,&ifst);
394:       PetscBLASIntCast(i+1,&ilst);
395: #if !defined(PETSC_USE_COMPLEX)
396:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
397: #else
398:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
399: #endif
400:       SlepcCheckLapackInfo("trexc",info);
401:       /* recover original eigenvalues from T matrix */
402:       for (j=i;j<n;j++) {
403:         wr[j] = T[j+j*ld];
404: #if !defined(PETSC_USE_COMPLEX)
405:         if (j<n-1 && T[j+1+j*ld] != 0.0) {
406:           /* complex conjugate eigenvalue */
407:           wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
408:                   PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
409:           wr[j+1] = wr[j];
410:           wi[j+1] = -wi[j];
411:           j++;
412:         } else {
413:           wi[j] = 0.0;
414:         }
415: #endif
416:       }
417:     }
418: #if !defined(PETSC_USE_COMPLEX)
419:     if (wi[i] != 0) i++;
420: #endif
421:   }
422:   return(0);
423: #endif
424: }

426: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
427: {

431:   if (!rr || wr == rr) {
432:     DSSort_NHEP_Total(ds,wr,wi);
433:   } else {
434:     DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
435:   }
436:   return(0);
437: }

439: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
440: {
441: #if defined(SLEPC_MISSING_LAPACK_TREXC)
443:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
444: #else
446:   PetscInt       i,j,pos,inc=1;
447:   PetscBLASInt   ifst,ilst,info,n,ld;
448:   PetscScalar    *T = ds->mat[DS_MAT_A];
449:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
450: #if !defined(PETSC_USE_COMPLEX)
451:   PetscScalar    *work;
452: #endif

455:   PetscBLASIntCast(ds->n,&n);
456:   PetscBLASIntCast(ds->ld,&ld);
457: #if !defined(PETSC_USE_COMPLEX)
458:   DSAllocateWork_Private(ds,ld,0,0);
459:   work = ds->work;
460: #endif
461:   for (i=ds->l;i<n-1;i++) {
462:     pos = perm[i];
463: #if !defined(PETSC_USE_COMPLEX)
464:     inc = (pos<n-1 && T[pos+1+pos*ld] != 0.0)? 2: 1;
465: #endif
466:     if (pos!=i) {
467: #if !defined(PETSC_USE_COMPLEX)
468:       if ((T[pos+(pos-1)*ld] != 0.0 && perm[i+1]!=pos-1) || (T[pos+1+pos*ld] != 0.0 && perm[i+1]!=pos+1)) 
469:  SETERRQ1(PETSC_COMM_SELF,1,"Invalid permutation due to a 2x2 block at position %D",pos);
470: #endif

472:       /* interchange blocks */
473:       PetscBLASIntCast(pos+1,&ifst);
474:       PetscBLASIntCast(i+1,&ilst);
475: #if !defined(PETSC_USE_COMPLEX)
476:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
477: #else
478:       PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
479: #endif
480:       SlepcCheckLapackInfo("trexc",info);
481:       for (j=i+1;j<n;j++) {
482:         if (perm[j]>=i && perm[j]<pos) perm[j]+=inc;
483:       }
484:       perm[i] = i;
485:       if (inc==2) perm[i+1] = i+1;
486:     }
487:     if (inc==2) i++;
488:   }
489:   /* recover original eigenvalues from T matrix */
490:   for (j=ds->l;j<n;j++) {
491:     wr[j] = T[j+j*ld];
492: #if !defined(PETSC_USE_COMPLEX)
493:     if (j<n-1 && T[j+1+j*ld] != 0.0) {
494:       /* complex conjugate eigenvalue */
495:       wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
496:       wr[j+1] = wr[j];
497:       wi[j+1] = -wi[j];
498:       j++;
499:     } else {
500:       wi[j] = 0.0;
501:     }
502: #endif
503:   }
504:   return(0);
505: #endif
506: }

508: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
509: {
511:   PetscInt       i;
512:   PetscBLASInt   n,ld,incx=1;
513:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;

516:   PetscBLASIntCast(ds->n,&n);
517:   PetscBLASIntCast(ds->ld,&ld);
518:   A  = ds->mat[DS_MAT_A];
519:   Q  = ds->mat[DS_MAT_Q];
520:   DSAllocateWork_Private(ds,2*ld,0,0);
521:   x = ds->work;
522:   y = ds->work+ld;
523:   for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
524:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
525:   for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
526:   ds->k = n;
527:   return(0);
528: }

530: /*
531:    Reduce a matrix A to upper Hessenberg form Q'*A*Q, where Q is an orthogonal matrix.
532:    The result overwrites A. Matrix A has the form

534:          [ S | * ]
535:      A = [-------]
536:          [ R | H ]

538:   where S is an upper (quasi-)triangular matrix of order k, H is an upper Hessenberg
539:   matrix of order n-k, and R is all zeros except the first row (the arrow).
540:   The algorithm uses elementary reflectors to annihilate entries in the arrow, and
541:   then proceeds upwards. 
542:   If ilo>1, then it is assumed that the first ilo-1 entries of the arrow are zero, and
543:   hence the first ilo-1 rows and columns of Q are set to the identity matrix.

545:   Required workspace is 2*n.
546: */
547: static PetscErrorCode ArrowHessenberg(PetscBLASInt n,PetscBLASInt k,PetscBLASInt ilo,PetscScalar *A,PetscBLASInt lda,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *work)
548: {
549: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
551:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
552: #else
553:   PetscBLASInt i,j,n0,m,inc=1,incn=-1;
554:   PetscScalar  t,*v=work,*w=work+n,tau,tauc;

557:   m = n-ilo+1;
558:   for (i=k;i>ilo;i--) {
559:     for (j=0;j<=i-ilo;j++) v[j] = A[i+(i-j-1)*lda];  /* _larfg does not allow negative inc, so use buffer */
560:     n0 = i-ilo+1;
561:     PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,v,v+1,&inc,&tau));
562:     for (j=1;j<=i-ilo;j++) v[j] = PetscConj(v[j]);
563:     tauc = PetscConj(tau);
564:     A[i+(i-1)*lda] = v[0];
565:     v[0] = 1.0;
566:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&i,&n0,v,&incn,&tauc,A+(ilo-1)*lda,&lda,w));
567:     for (j=1;j<=i-ilo;j++) A[i+(i-j-1)*lda] = 0.0;
568:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,A+ilo-1+(ilo-1)*lda,&lda,w));
569:     PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,Q+ilo-1+(ilo-1)*ldq,&ldq,w));
570:   }
571:   /* trivial in-place transposition of Q */
572:   for (j=ilo-1;j<k;j++) {
573:     for (i=j;i<k;i++) {
574:       t = Q[i+j*ldq];
575:       if (i!=j) Q[i+j*ldq] = PetscConj(Q[j+i*ldq]);
576:       Q[j+i*ldq] = PetscConj(t);
577:     }
578:   }
579:   return(0);
580: #endif
581: }

583: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
584: {
585: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
587:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
588: #else
590:   PetscScalar    *work,*tau;
591:   PetscInt       i,j;
592:   PetscBLASInt   ilo,lwork,info,n,k,ld;
593:   PetscScalar    *A = ds->mat[DS_MAT_A];
594:   PetscScalar    *Q = ds->mat[DS_MAT_Q];

597: #if !defined(PETSC_USE_COMPLEX)
599: #endif
600:   PetscBLASIntCast(ds->n,&n);
601:   PetscBLASIntCast(ds->ld,&ld);
602:   PetscBLASIntCast(ds->l+1,&ilo);
603:   PetscBLASIntCast(ds->k,&k);
604:   DSAllocateWork_Private(ds,ld+6*ld,0,0);
605:   tau  = ds->work;
606:   work = ds->work+ld;
607:   lwork = 6*ld;

609:   /* initialize orthogonal matrix */
610:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
611:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
612:   if (n==1) { /* quick return */
613:     wr[0] = A[0];
614:     wi[0] = 0.0;
615:     return(0);
616:   }

618:   /* reduce to upper Hessenberg form */
619:   if (ds->state<DS_STATE_INTERMEDIATE) {
620:     if (PETSC_FALSE && k>0) {
621:       ArrowHessenberg(n,k,ilo,A,ld,Q,ld,work);
622:     } else {
623:       PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
624:       SlepcCheckLapackInfo("gehrd",info);
625:       for (j=0;j<n-1;j++) {
626:         for (i=j+2;i<n;i++) {
627:           Q[i+j*ld] = A[i+j*ld];
628:           A[i+j*ld] = 0.0;
629:         }
630:       }
631:       PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
632:       SlepcCheckLapackInfo("orghr",info);
633:     }
634:   }

636:   /* compute the (real) Schur form */
637: #if !defined(PETSC_USE_COMPLEX)
638:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
639:   for (j=0;j<ds->l;j++) {
640:     if (j==n-1 || A[j+1+j*ld] == 0.0) {
641:       /* real eigenvalue */
642:       wr[j] = A[j+j*ld];
643:       wi[j] = 0.0;
644:     } else {
645:       /* complex eigenvalue */
646:       wr[j] = A[j+j*ld];
647:       wr[j+1] = A[j+j*ld];
648:       wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
649:               PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
650:       wi[j+1] = -wi[j];
651:       j++;
652:     }
653:   }
654: #else
655:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
656:   if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
657: #endif
658:   SlepcCheckLapackInfo("hseqr",info);
659:   return(0);
660: #endif
661: }

663: PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
664: {
666:   PetscInt       ld=ds->ld,l=ds->l,k;
667:   PetscMPIInt    n,rank,off=0,size,ldn;

670:   k = (ds->n-l)*ld;
671:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
672:   if (eigr) k += ds->n-l; 
673:   if (eigi) k += ds->n-l; 
674:   DSAllocateWork_Private(ds,k,0,0);
675:   PetscMPIIntCast(k*sizeof(PetscScalar),&size);
676:   PetscMPIIntCast(ds->n-l,&n);
677:   PetscMPIIntCast(ld*(ds->n-l),&ldn);
678:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
679:   if (!rank) {
680:     MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
681:     if (ds->state>DS_STATE_RAW) {
682:       MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
683:     }
684:     if (eigr) {
685:       MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
686:     }
687:     if (eigi) {
688:       MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
689:     }
690:   }
691:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
692:   if (rank) {
693:     MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
694:     if (ds->state>DS_STATE_RAW) {
695:       MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
696:     }
697:     if (eigr) {
698:       MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
699:     }
700:     if (eigi) {
701:       MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
702:     }
703:   }
704:   return(0);
705: }

707: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
708: {
709:   PetscInt    i,newn,ld=ds->ld,l=ds->l;
710:   PetscScalar *A;

713:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
714:   A = ds->mat[DS_MAT_A];
715:   /* be careful not to break a diagonal 2x2 block */
716:   if (A[n+(n-1)*ld]==0.0) newn = n;
717:   else {
718:     if (n<ds->n-1) newn = n+1;
719:     else newn = n-1;
720:   }
721:   if (ds->extrarow && ds->k==ds->n) {
722:     /* copy entries of extra row to the new position, then clean last row */
723:     for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
724:     for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
725:   }
726:   ds->k = 0;
727:   ds->n = newn;
728:   return(0);
729: }

731: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
732: {
733: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
735:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
736: #else
738:   PetscScalar    *work;
739:   PetscReal      *rwork;
740:   PetscBLASInt   *ipiv;
741:   PetscBLASInt   lwork,info,n,ld;
742:   PetscReal      hn,hin;
743:   PetscScalar    *A;

746:   PetscBLASIntCast(ds->n,&n);
747:   PetscBLASIntCast(ds->ld,&ld);
748:   lwork = 8*ld;
749:   DSAllocateWork_Private(ds,lwork,ld,ld);
750:   work  = ds->work;
751:   rwork = ds->rwork;
752:   ipiv  = ds->iwork;

754:   /* use workspace matrix W to avoid overwriting A */
755:   DSAllocateMat_Private(ds,DS_MAT_W);
756:   A = ds->mat[DS_MAT_W];
757:   PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);

759:   /* norm of A */
760:   if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
761:   else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);

763:   /* norm of inv(A) */
764:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
765:   SlepcCheckLapackInfo("getrf",info);
766:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
767:   SlepcCheckLapackInfo("getri",info);
768:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

770:   *cond = hn*hin;
771:   return(0);
772: #endif
773: }

775: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
776: {
777: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
779:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
780: #else
782:   PetscInt       i,j;
783:   PetscBLASInt   *ipiv,info,n,ld,one=1,ncol;
784:   PetscScalar    *A,*B,*Q,*g=gin,*ghat;
785:   PetscScalar    done=1.0,dmone=-1.0,dzero=0.0;
786:   PetscReal      gnorm;

789:   PetscBLASIntCast(ds->n,&n);
790:   PetscBLASIntCast(ds->ld,&ld);
791:   A  = ds->mat[DS_MAT_A];

793:   if (!recover) {

795:     DSAllocateWork_Private(ds,0,0,ld);
796:     ipiv = ds->iwork;
797:     if (!g) {
798:       DSAllocateWork_Private(ds,ld,0,0);
799:       g = ds->work;
800:     }
801:     /* use workspace matrix W to factor A-tau*eye(n) */
802:     DSAllocateMat_Private(ds,DS_MAT_W);
803:     B = ds->mat[DS_MAT_W];
804:     PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);

806:     /* Vector g initialy stores b = beta*e_n^T */
807:     PetscMemzero(g,n*sizeof(PetscScalar));
808:     g[n-1] = beta;

810:     /* g = (A-tau*eye(n))'\b */
811:     for (i=0;i<n;i++) B[i+i*ld] -= tau;
812:     PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
813:     SlepcCheckLapackInfo("getrf",info);
814:     PetscLogFlops(2.0*n*n*n/3.0);
815:     PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
816:     SlepcCheckLapackInfo("getrs",info);
817:     PetscLogFlops(2.0*n*n-n);

819:     /* A = A + g*b' */
820:     for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;

822:   } else { /* recover */

825:     DSAllocateWork_Private(ds,ld,0,0);
826:     ghat = ds->work;
827:     Q    = ds->mat[DS_MAT_Q];

829:     /* g^ = -Q(:,idx)'*g */
830:     PetscBLASIntCast(ds->l+ds->k,&ncol);
831:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));

833:     /* A = A + g^*b' */
834:     for (i=0;i<ds->l+ds->k;i++)
835:       for (j=ds->l;j<ds->l+ds->k;j++)
836:         A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;

838:     /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
839:     PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
840:   }

842:   /* Compute gamma factor */
843:   if (gamma) {
844:     gnorm = 0.0;
845:     for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
846:     *gamma = PetscSqrtReal(1.0+gnorm);
847:   }
848:   return(0);
849: #endif
850: }

852: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
853: {
855:   ds->ops->allocate      = DSAllocate_NHEP;
856:   ds->ops->view          = DSView_NHEP;
857:   ds->ops->vectors       = DSVectors_NHEP;
858:   ds->ops->solve[0]      = DSSolve_NHEP;
859:   ds->ops->sort          = DSSort_NHEP;
860:   ds->ops->sortperm      = DSSortWithPermutation_NHEP;
861:   ds->ops->synchronize   = DSSynchronize_NHEP;
862:   ds->ops->truncate      = DSTruncate_NHEP;
863:   ds->ops->update        = DSUpdateExtraRow_NHEP;
864:   ds->ops->cond          = DSCond_NHEP;
865:   ds->ops->transharm     = DSTranslateHarmonic_NHEP;
866:   return(0);
867: }