Actual source code: dsnhep.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_NHEP(DS ds,PetscInt ld)
15: {
19: DSAllocateMat_Private(ds,DS_MAT_A);
20: DSAllocateMat_Private(ds,DS_MAT_Q);
21: PetscFree(ds->perm);
22: PetscMalloc1(ld,&ds->perm);
23: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
24: return(0);
25: }
27: PetscErrorCode DSView_NHEP(DS ds,PetscViewer viewer)
28: {
29: PetscErrorCode ierr;
30: PetscViewerFormat format;
33: PetscViewerGetFormat(viewer,&format);
34: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) return(0);
35: DSViewMat(ds,viewer,DS_MAT_A);
36: if (ds->state>DS_STATE_INTERMEDIATE) {
37: DSViewMat(ds,viewer,DS_MAT_Q);
38: }
39: if (ds->mat[DS_MAT_X]) {
40: DSViewMat(ds,viewer,DS_MAT_X);
41: }
42: if (ds->mat[DS_MAT_Y]) {
43: DSViewMat(ds,viewer,DS_MAT_Y);
44: }
45: return(0);
46: }
48: static PetscErrorCode DSVectors_NHEP_Refined_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
49: {
50: #if defined(PETSC_MISSING_LAPACK_GESVD)
52: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
53: #else
55: PetscInt i,j;
56: PetscBLASInt info,ld,n,n1,lwork,inc=1;
57: PetscScalar sdummy,done=1.0,zero=0.0;
58: PetscReal *sigma;
59: PetscBool iscomplex = PETSC_FALSE;
60: PetscScalar *A = ds->mat[DS_MAT_A];
61: PetscScalar *Q = ds->mat[DS_MAT_Q];
62: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
63: PetscScalar *W;
66: if (left) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for left vectors");
67: PetscBLASIntCast(ds->n,&n);
68: PetscBLASIntCast(ds->ld,&ld);
69: n1 = n+1;
70: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
71: if (iscomplex) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complex eigenvalues yet");
72: DSAllocateWork_Private(ds,5*ld,6*ld,0);
73: DSAllocateMat_Private(ds,DS_MAT_W);
74: W = ds->mat[DS_MAT_W];
75: lwork = 5*ld;
76: sigma = ds->rwork+5*ld;
78: /* build A-w*I in W */
79: for (j=0;j<n;j++)
80: for (i=0;i<=n;i++)
81: W[i+j*ld] = A[i+j*ld];
82: for (i=0;i<n;i++)
83: W[i+i*ld] -= A[(*k)+(*k)*ld];
85: /* compute SVD of W */
86: #if !defined(PETSC_USE_COMPLEX)
87: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,&info));
88: #else
89: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("N","O",&n1,&n,W,&ld,sigma,&sdummy,&ld,&sdummy,&ld,ds->work,&lwork,ds->rwork,&info));
90: #endif
91: SlepcCheckLapackInfo("gesvd",info);
93: /* the smallest singular value is the new error estimate */
94: if (rnorm) *rnorm = sigma[n-1];
96: /* update vector with right singular vector associated to smallest singular value,
97: accumulating the transformation matrix Q */
98: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,W+n-1,&ld,&zero,X+(*k)*ld,&inc));
99: return(0);
100: #endif
101: }
103: static PetscErrorCode DSVectors_NHEP_Refined_All(DS ds,PetscBool left)
104: {
106: PetscInt i;
109: for (i=0;i<ds->n;i++) {
110: DSVectors_NHEP_Refined_Some(ds,&i,NULL,left);
111: }
112: return(0);
113: }
115: static PetscErrorCode DSVectors_NHEP_Eigen_Some(DS ds,PetscInt *k,PetscReal *rnorm,PetscBool left)
116: {
117: #if defined(SLEPC_MISSING_LAPACK_TREVC)
119: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
120: #else
122: PetscInt i;
123: PetscBLASInt mm=1,mout,info,ld,n,*select,inc = 1;
124: PetscScalar tmp,done=1.0,zero=0.0;
125: PetscReal norm;
126: PetscBool iscomplex = PETSC_FALSE;
127: PetscScalar *A = ds->mat[DS_MAT_A];
128: PetscScalar *Q = ds->mat[DS_MAT_Q];
129: PetscScalar *X = ds->mat[left?DS_MAT_Y:DS_MAT_X];
130: PetscScalar *Y;
133: PetscBLASIntCast(ds->n,&n);
134: PetscBLASIntCast(ds->ld,&ld);
135: DSAllocateWork_Private(ds,0,0,ld);
136: select = ds->iwork;
137: for (i=0;i<n;i++) select[i] = (PetscBLASInt)PETSC_FALSE;
139: /* compute k-th eigenvector Y of A */
140: Y = X+(*k)*ld;
141: select[*k] = (PetscBLASInt)PETSC_TRUE;
142: #if !defined(PETSC_USE_COMPLEX)
143: if ((*k)<n-1 && A[(*k)+1+(*k)*ld]!=0.0) iscomplex = PETSC_TRUE;
144: mm = iscomplex? 2: 1;
145: if (iscomplex) select[(*k)+1] = (PetscBLASInt)PETSC_TRUE;
146: DSAllocateWork_Private(ds,3*ld,0,0);
147: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,&info));
148: #else
149: DSAllocateWork_Private(ds,2*ld,ld,0);
150: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(left?"L":"R","S",select,&n,A,&ld,Y,&ld,Y,&ld,&mm,&mout,ds->work,ds->rwork,&info));
151: #endif
152: SlepcCheckLapackInfo("trevc",info);
153: if (mout != mm) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Inconsistent arguments");
155: /* accumulate and normalize eigenvectors */
156: if (ds->state>=DS_STATE_CONDENSED) {
157: PetscMemcpy(ds->work,Y,mout*ld*sizeof(PetscScalar));
158: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work,&inc,&zero,Y,&inc));
159: #if !defined(PETSC_USE_COMPLEX)
160: if (iscomplex) PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,Q,&ld,ds->work+ld,&inc,&zero,Y+ld,&inc));
161: #endif
162: norm = BLASnrm2_(&n,Y,&inc);
163: #if !defined(PETSC_USE_COMPLEX)
164: if (iscomplex) {
165: tmp = BLASnrm2_(&n,Y+ld,&inc);
166: norm = SlepcAbsEigenvalue(norm,tmp);
167: }
168: #endif
169: tmp = 1.0 / norm;
170: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y,&inc));
171: #if !defined(PETSC_USE_COMPLEX)
172: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Y+ld,&inc));
173: #endif
174: }
176: /* set output arguments */
177: if (iscomplex) (*k)++;
178: if (rnorm) {
179: if (iscomplex) *rnorm = SlepcAbsEigenvalue(Y[n-1],Y[n-1+ld]);
180: else *rnorm = PetscAbsScalar(Y[n-1]);
181: }
182: return(0);
183: #endif
184: }
186: static PetscErrorCode DSVectors_NHEP_Eigen_All(DS ds,PetscBool left)
187: {
188: #if defined(SLEPC_MISSING_LAPACK_TREVC)
190: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREVC - Lapack routine is unavailable");
191: #else
193: PetscInt i;
194: PetscBLASInt n,ld,mout,info,inc = 1;
195: PetscBool iscomplex;
196: PetscScalar *X,*Y,*Z,*A = ds->mat[DS_MAT_A],tmp;
197: PetscReal norm;
198: const char *side,*back;
201: PetscBLASIntCast(ds->n,&n);
202: PetscBLASIntCast(ds->ld,&ld);
203: if (left) {
204: X = NULL;
205: Y = ds->mat[DS_MAT_Y];
206: side = "L";
207: } else {
208: X = ds->mat[DS_MAT_X];
209: Y = NULL;
210: side = "R";
211: }
212: Z = left? Y: X;
213: if (ds->state>=DS_STATE_CONDENSED) {
214: /* DSSolve() has been called, backtransform with matrix Q */
215: back = "B";
216: PetscMemcpy(Z,ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
217: } else back = "A";
218: #if !defined(PETSC_USE_COMPLEX)
219: DSAllocateWork_Private(ds,3*ld,0,0);
220: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,&info));
221: #else
222: DSAllocateWork_Private(ds,2*ld,ld,0);
223: PetscStackCallBLAS("LAPACKtrevc",LAPACKtrevc_(side,back,NULL,&n,A,&ld,Y,&ld,X,&ld,&n,&mout,ds->work,ds->rwork,&info));
224: #endif
225: SlepcCheckLapackInfo("trevc",info);
227: /* normalize eigenvectors */
228: for (i=0;i<n;i++) {
229: iscomplex = (i<n-1 && A[i+1+i*ld]!=0.0)? PETSC_TRUE: PETSC_FALSE;
230: norm = BLASnrm2_(&n,Z+i*ld,&inc);
231: #if !defined(PETSC_USE_COMPLEX)
232: if (iscomplex) {
233: tmp = BLASnrm2_(&n,Z+(i+1)*ld,&inc);
234: norm = SlepcAbsEigenvalue(norm,tmp);
235: }
236: #endif
237: tmp = 1.0 / norm;
238: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+i*ld,&inc));
239: #if !defined(PETSC_USE_COMPLEX)
240: if (iscomplex) PetscStackCallBLAS("BLASscal",BLASscal_(&n,&tmp,Z+(i+1)*ld,&inc));
241: #endif
242: if (iscomplex) i++;
243: }
244: return(0);
245: #endif
246: }
248: PetscErrorCode DSVectors_NHEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
249: {
253: switch (mat) {
254: case DS_MAT_X:
255: if (ds->refined) {
256: if (!ds->extrarow) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Refined vectors require activating the extra row");
257: if (j) {
258: DSVectors_NHEP_Refined_Some(ds,j,rnorm,PETSC_FALSE);
259: } else {
260: DSVectors_NHEP_Refined_All(ds,PETSC_FALSE);
261: }
262: } else {
263: if (j) {
264: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_FALSE);
265: } else {
266: DSVectors_NHEP_Eigen_All(ds,PETSC_FALSE);
267: }
268: }
269: break;
270: case DS_MAT_Y:
271: if (ds->refined) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
272: if (j) {
273: DSVectors_NHEP_Eigen_Some(ds,j,rnorm,PETSC_TRUE);
274: } else {
275: DSVectors_NHEP_Eigen_All(ds,PETSC_TRUE);
276: }
277: break;
278: case DS_MAT_U:
279: case DS_MAT_VT:
280: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
281: break;
282: default:
283: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
284: }
285: if (ds->state < DS_STATE_CONDENSED) {
286: DSSetState(ds,DS_STATE_CONDENSED);
287: }
288: return(0);
289: }
291: static PetscErrorCode DSSort_NHEP_Arbitrary(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
292: {
293: #if defined(PETSC_MISSING_LAPACK_TRSEN)
295: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRSEN - Lapack routine is unavailable");
296: #else
298: PetscInt i;
299: PetscBLASInt info,n,ld,mout,lwork,*selection;
300: PetscScalar *T = ds->mat[DS_MAT_A],*Q = ds->mat[DS_MAT_Q],*work;
301: #if !defined(PETSC_USE_COMPLEX)
302: PetscBLASInt *iwork,liwork;
303: #endif
306: if (!k) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_WRONG,"Must supply argument k");
307: PetscBLASIntCast(ds->n,&n);
308: PetscBLASIntCast(ds->ld,&ld);
309: #if !defined(PETSC_USE_COMPLEX)
310: lwork = n;
311: liwork = 1;
312: DSAllocateWork_Private(ds,lwork,0,liwork+n);
313: work = ds->work;
314: lwork = ds->lwork;
315: selection = ds->iwork;
316: iwork = ds->iwork + n;
317: liwork = ds->liwork - n;
318: #else
319: lwork = 1;
320: DSAllocateWork_Private(ds,lwork,0,n);
321: work = ds->work;
322: selection = ds->iwork;
323: #endif
324: /* Compute the selected eigenvalue to be in the leading position */
325: DSSortEigenvalues_Private(ds,rr,ri,ds->perm,PETSC_FALSE);
326: PetscMemzero(selection,n*sizeof(PetscBLASInt));
327: for (i=0;i<*k;i++) selection[ds->perm[i]] = 1;
328: #if !defined(PETSC_USE_COMPLEX)
329: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,wi,&mout,NULL,NULL,work,&lwork,iwork,&liwork,&info));
330: #else
331: PetscStackCallBLAS("LAPACKtrsen",LAPACKtrsen_("N","V",selection,&n,T,&ld,Q,&ld,wr,&mout,NULL,NULL,work,&lwork,&info));
332: #endif
333: SlepcCheckLapackInfo("trsen",info);
334: *k = mout;
335: return(0);
336: #endif
337: }
339: static PetscErrorCode DSSort_NHEP_Total(DS ds,PetscScalar *wr,PetscScalar *wi)
340: {
341: #if defined(SLEPC_MISSING_LAPACK_TREXC)
343: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
344: #else
346: PetscScalar re;
347: PetscInt i,j,pos,result;
348: PetscBLASInt ifst,ilst,info,n,ld;
349: PetscScalar *T = ds->mat[DS_MAT_A];
350: PetscScalar *Q = ds->mat[DS_MAT_Q];
351: #if !defined(PETSC_USE_COMPLEX)
352: PetscScalar *work,im;
353: #endif
356: PetscBLASIntCast(ds->n,&n);
357: PetscBLASIntCast(ds->ld,&ld);
358: #if !defined(PETSC_USE_COMPLEX)
359: DSAllocateWork_Private(ds,ld,0,0);
360: work = ds->work;
361: #endif
362: /* selection sort */
363: for (i=ds->l;i<n-1;i++) {
364: re = wr[i];
365: #if !defined(PETSC_USE_COMPLEX)
366: im = wi[i];
367: #endif
368: pos = 0;
369: j=i+1; /* j points to the next eigenvalue */
370: #if !defined(PETSC_USE_COMPLEX)
371: if (im != 0) j=i+2;
372: #endif
373: /* find minimum eigenvalue */
374: for (;j<n;j++) {
375: #if !defined(PETSC_USE_COMPLEX)
376: SlepcSCCompare(ds->sc,re,im,wr[j],wi[j],&result);
377: #else
378: SlepcSCCompare(ds->sc,re,0.0,wr[j],0.0,&result);
379: #endif
380: if (result > 0) {
381: re = wr[j];
382: #if !defined(PETSC_USE_COMPLEX)
383: im = wi[j];
384: #endif
385: pos = j;
386: }
387: #if !defined(PETSC_USE_COMPLEX)
388: if (wi[j] != 0) j++;
389: #endif
390: }
391: if (pos) {
392: /* interchange blocks */
393: PetscBLASIntCast(pos+1,&ifst);
394: PetscBLASIntCast(i+1,&ilst);
395: #if !defined(PETSC_USE_COMPLEX)
396: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
397: #else
398: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
399: #endif
400: SlepcCheckLapackInfo("trexc",info);
401: /* recover original eigenvalues from T matrix */
402: for (j=i;j<n;j++) {
403: wr[j] = T[j+j*ld];
404: #if !defined(PETSC_USE_COMPLEX)
405: if (j<n-1 && T[j+1+j*ld] != 0.0) {
406: /* complex conjugate eigenvalue */
407: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) *
408: PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
409: wr[j+1] = wr[j];
410: wi[j+1] = -wi[j];
411: j++;
412: } else {
413: wi[j] = 0.0;
414: }
415: #endif
416: }
417: }
418: #if !defined(PETSC_USE_COMPLEX)
419: if (wi[i] != 0) i++;
420: #endif
421: }
422: return(0);
423: #endif
424: }
426: PetscErrorCode DSSort_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
427: {
431: if (!rr || wr == rr) {
432: DSSort_NHEP_Total(ds,wr,wi);
433: } else {
434: DSSort_NHEP_Arbitrary(ds,wr,wi,rr,ri,k);
435: }
436: return(0);
437: }
439: static PetscErrorCode DSSortWithPermutation_NHEP(DS ds,PetscInt *perm,PetscScalar *wr,PetscScalar *wi)
440: {
441: #if defined(SLEPC_MISSING_LAPACK_TREXC)
443: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TREXC - Lapack routine is unavailable");
444: #else
446: PetscInt i,j,pos,inc=1;
447: PetscBLASInt ifst,ilst,info,n,ld;
448: PetscScalar *T = ds->mat[DS_MAT_A];
449: PetscScalar *Q = ds->mat[DS_MAT_Q];
450: #if !defined(PETSC_USE_COMPLEX)
451: PetscScalar *work;
452: #endif
455: PetscBLASIntCast(ds->n,&n);
456: PetscBLASIntCast(ds->ld,&ld);
457: #if !defined(PETSC_USE_COMPLEX)
458: DSAllocateWork_Private(ds,ld,0,0);
459: work = ds->work;
460: #endif
461: for (i=ds->l;i<n-1;i++) {
462: pos = perm[i];
463: #if !defined(PETSC_USE_COMPLEX)
464: inc = (pos<n-1 && T[pos+1+pos*ld] != 0.0)? 2: 1;
465: #endif
466: if (pos!=i) {
467: #if !defined(PETSC_USE_COMPLEX)
468: if ((T[pos+(pos-1)*ld] != 0.0 && perm[i+1]!=pos-1) || (T[pos+1+pos*ld] != 0.0 && perm[i+1]!=pos+1))
469: SETERRQ1(PETSC_COMM_SELF,1,"Invalid permutation due to a 2x2 block at position %D",pos);
470: #endif
472: /* interchange blocks */
473: PetscBLASIntCast(pos+1,&ifst);
474: PetscBLASIntCast(i+1,&ilst);
475: #if !defined(PETSC_USE_COMPLEX)
476: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,work,&info));
477: #else
478: PetscStackCallBLAS("LAPACKtrexc",LAPACKtrexc_("V",&n,T,&ld,Q,&ld,&ifst,&ilst,&info));
479: #endif
480: SlepcCheckLapackInfo("trexc",info);
481: for (j=i+1;j<n;j++) {
482: if (perm[j]>=i && perm[j]<pos) perm[j]+=inc;
483: }
484: perm[i] = i;
485: if (inc==2) perm[i+1] = i+1;
486: }
487: if (inc==2) i++;
488: }
489: /* recover original eigenvalues from T matrix */
490: for (j=ds->l;j<n;j++) {
491: wr[j] = T[j+j*ld];
492: #if !defined(PETSC_USE_COMPLEX)
493: if (j<n-1 && T[j+1+j*ld] != 0.0) {
494: /* complex conjugate eigenvalue */
495: wi[j] = PetscSqrtReal(PetscAbsReal(T[j+1+j*ld])) * PetscSqrtReal(PetscAbsReal(T[j+(j+1)*ld]));
496: wr[j+1] = wr[j];
497: wi[j+1] = -wi[j];
498: j++;
499: } else {
500: wi[j] = 0.0;
501: }
502: #endif
503: }
504: return(0);
505: #endif
506: }
508: PetscErrorCode DSUpdateExtraRow_NHEP(DS ds)
509: {
511: PetscInt i;
512: PetscBLASInt n,ld,incx=1;
513: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
516: PetscBLASIntCast(ds->n,&n);
517: PetscBLASIntCast(ds->ld,&ld);
518: A = ds->mat[DS_MAT_A];
519: Q = ds->mat[DS_MAT_Q];
520: DSAllocateWork_Private(ds,2*ld,0,0);
521: x = ds->work;
522: y = ds->work+ld;
523: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
524: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
525: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
526: ds->k = n;
527: return(0);
528: }
530: /*
531: Reduce a matrix A to upper Hessenberg form Q'*A*Q, where Q is an orthogonal matrix.
532: The result overwrites A. Matrix A has the form
534: [ S | * ]
535: A = [-------]
536: [ R | H ]
538: where S is an upper (quasi-)triangular matrix of order k, H is an upper Hessenberg
539: matrix of order n-k, and R is all zeros except the first row (the arrow).
540: The algorithm uses elementary reflectors to annihilate entries in the arrow, and
541: then proceeds upwards.
542: If ilo>1, then it is assumed that the first ilo-1 entries of the arrow are zero, and
543: hence the first ilo-1 rows and columns of Q are set to the identity matrix.
545: Required workspace is 2*n.
546: */
547: static PetscErrorCode ArrowHessenberg(PetscBLASInt n,PetscBLASInt k,PetscBLASInt ilo,PetscScalar *A,PetscBLASInt lda,PetscScalar *Q,PetscBLASInt ldq,PetscScalar *work)
548: {
549: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
551: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
552: #else
553: PetscBLASInt i,j,n0,m,inc=1,incn=-1;
554: PetscScalar t,*v=work,*w=work+n,tau,tauc;
557: m = n-ilo+1;
558: for (i=k;i>ilo;i--) {
559: for (j=0;j<=i-ilo;j++) v[j] = A[i+(i-j-1)*lda]; /* _larfg does not allow negative inc, so use buffer */
560: n0 = i-ilo+1;
561: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,v,v+1,&inc,&tau));
562: for (j=1;j<=i-ilo;j++) v[j] = PetscConj(v[j]);
563: tauc = PetscConj(tau);
564: A[i+(i-1)*lda] = v[0];
565: v[0] = 1.0;
566: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&i,&n0,v,&incn,&tauc,A+(ilo-1)*lda,&lda,w));
567: for (j=1;j<=i-ilo;j++) A[i+(i-j-1)*lda] = 0.0;
568: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,A+ilo-1+(ilo-1)*lda,&lda,w));
569: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,v,&incn,&tau,Q+ilo-1+(ilo-1)*ldq,&ldq,w));
570: }
571: /* trivial in-place transposition of Q */
572: for (j=ilo-1;j<k;j++) {
573: for (i=j;i<k;i++) {
574: t = Q[i+j*ldq];
575: if (i!=j) Q[i+j*ldq] = PetscConj(Q[j+i*ldq]);
576: Q[j+i*ldq] = PetscConj(t);
577: }
578: }
579: return(0);
580: #endif
581: }
583: PetscErrorCode DSSolve_NHEP(DS ds,PetscScalar *wr,PetscScalar *wi)
584: {
585: #if defined(SLEPC_MISSING_LAPACK_GEHRD) || defined(SLEPC_MISSING_LAPACK_ORGHR) || defined(PETSC_MISSING_LAPACK_HSEQR)
587: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEHRD/ORGHR/HSEQR - Lapack routines are unavailable");
588: #else
590: PetscScalar *work,*tau;
591: PetscInt i,j;
592: PetscBLASInt ilo,lwork,info,n,k,ld;
593: PetscScalar *A = ds->mat[DS_MAT_A];
594: PetscScalar *Q = ds->mat[DS_MAT_Q];
597: #if !defined(PETSC_USE_COMPLEX)
599: #endif
600: PetscBLASIntCast(ds->n,&n);
601: PetscBLASIntCast(ds->ld,&ld);
602: PetscBLASIntCast(ds->l+1,&ilo);
603: PetscBLASIntCast(ds->k,&k);
604: DSAllocateWork_Private(ds,ld+6*ld,0,0);
605: tau = ds->work;
606: work = ds->work+ld;
607: lwork = 6*ld;
609: /* initialize orthogonal matrix */
610: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
611: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
612: if (n==1) { /* quick return */
613: wr[0] = A[0];
614: wi[0] = 0.0;
615: return(0);
616: }
618: /* reduce to upper Hessenberg form */
619: if (ds->state<DS_STATE_INTERMEDIATE) {
620: if (PETSC_FALSE && k>0) {
621: ArrowHessenberg(n,k,ilo,A,ld,Q,ld,work);
622: } else {
623: PetscStackCallBLAS("LAPACKgehrd",LAPACKgehrd_(&n,&ilo,&n,A,&ld,tau,work,&lwork,&info));
624: SlepcCheckLapackInfo("gehrd",info);
625: for (j=0;j<n-1;j++) {
626: for (i=j+2;i<n;i++) {
627: Q[i+j*ld] = A[i+j*ld];
628: A[i+j*ld] = 0.0;
629: }
630: }
631: PetscStackCallBLAS("LAPACKorghr",LAPACKorghr_(&n,&ilo,&n,Q,&ld,tau,work,&lwork,&info));
632: SlepcCheckLapackInfo("orghr",info);
633: }
634: }
636: /* compute the (real) Schur form */
637: #if !defined(PETSC_USE_COMPLEX)
638: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,wi,Q,&ld,work,&lwork,&info));
639: for (j=0;j<ds->l;j++) {
640: if (j==n-1 || A[j+1+j*ld] == 0.0) {
641: /* real eigenvalue */
642: wr[j] = A[j+j*ld];
643: wi[j] = 0.0;
644: } else {
645: /* complex eigenvalue */
646: wr[j] = A[j+j*ld];
647: wr[j+1] = A[j+j*ld];
648: wi[j] = PetscSqrtReal(PetscAbsReal(A[j+1+j*ld])) *
649: PetscSqrtReal(PetscAbsReal(A[j+(j+1)*ld]));
650: wi[j+1] = -wi[j];
651: j++;
652: }
653: }
654: #else
655: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","V",&n,&ilo,&n,A,&ld,wr,Q,&ld,work,&lwork,&info));
656: if (wi) for (i=ds->l;i<n;i++) wi[i] = 0.0;
657: #endif
658: SlepcCheckLapackInfo("hseqr",info);
659: return(0);
660: #endif
661: }
663: PetscErrorCode DSSynchronize_NHEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
664: {
666: PetscInt ld=ds->ld,l=ds->l,k;
667: PetscMPIInt n,rank,off=0,size,ldn;
670: k = (ds->n-l)*ld;
671: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
672: if (eigr) k += ds->n-l;
673: if (eigi) k += ds->n-l;
674: DSAllocateWork_Private(ds,k,0,0);
675: PetscMPIIntCast(k*sizeof(PetscScalar),&size);
676: PetscMPIIntCast(ds->n-l,&n);
677: PetscMPIIntCast(ld*(ds->n-l),&ldn);
678: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
679: if (!rank) {
680: MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
681: if (ds->state>DS_STATE_RAW) {
682: MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
683: }
684: if (eigr) {
685: MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
686: }
687: if (eigi) {
688: MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
689: }
690: }
691: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
692: if (rank) {
693: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
694: if (ds->state>DS_STATE_RAW) {
695: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
696: }
697: if (eigr) {
698: MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
699: }
700: if (eigi) {
701: MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
702: }
703: }
704: return(0);
705: }
707: PetscErrorCode DSTruncate_NHEP(DS ds,PetscInt n)
708: {
709: PetscInt i,newn,ld=ds->ld,l=ds->l;
710: PetscScalar *A;
713: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
714: A = ds->mat[DS_MAT_A];
715: /* be careful not to break a diagonal 2x2 block */
716: if (A[n+(n-1)*ld]==0.0) newn = n;
717: else {
718: if (n<ds->n-1) newn = n+1;
719: else newn = n-1;
720: }
721: if (ds->extrarow && ds->k==ds->n) {
722: /* copy entries of extra row to the new position, then clean last row */
723: for (i=l;i<newn;i++) A[newn+i*ld] = A[ds->n+i*ld];
724: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
725: }
726: ds->k = 0;
727: ds->n = newn;
728: return(0);
729: }
731: PetscErrorCode DSCond_NHEP(DS ds,PetscReal *cond)
732: {
733: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE) || defined(SLEPC_MISSING_LAPACK_LANHS)
735: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE/LANHS - Lapack routines are unavailable");
736: #else
738: PetscScalar *work;
739: PetscReal *rwork;
740: PetscBLASInt *ipiv;
741: PetscBLASInt lwork,info,n,ld;
742: PetscReal hn,hin;
743: PetscScalar *A;
746: PetscBLASIntCast(ds->n,&n);
747: PetscBLASIntCast(ds->ld,&ld);
748: lwork = 8*ld;
749: DSAllocateWork_Private(ds,lwork,ld,ld);
750: work = ds->work;
751: rwork = ds->rwork;
752: ipiv = ds->iwork;
754: /* use workspace matrix W to avoid overwriting A */
755: DSAllocateMat_Private(ds,DS_MAT_W);
756: A = ds->mat[DS_MAT_W];
757: PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);
759: /* norm of A */
760: if (ds->state<DS_STATE_INTERMEDIATE) hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
761: else hn = LAPACKlanhs_("I",&n,A,&ld,rwork);
763: /* norm of inv(A) */
764: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
765: SlepcCheckLapackInfo("getrf",info);
766: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
767: SlepcCheckLapackInfo("getri",info);
768: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
770: *cond = hn*hin;
771: return(0);
772: #endif
773: }
775: PetscErrorCode DSTranslateHarmonic_NHEP(DS ds,PetscScalar tau,PetscReal beta,PetscBool recover,PetscScalar *gin,PetscReal *gamma)
776: {
777: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRS)
779: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRS - Lapack routines are unavailable");
780: #else
782: PetscInt i,j;
783: PetscBLASInt *ipiv,info,n,ld,one=1,ncol;
784: PetscScalar *A,*B,*Q,*g=gin,*ghat;
785: PetscScalar done=1.0,dmone=-1.0,dzero=0.0;
786: PetscReal gnorm;
789: PetscBLASIntCast(ds->n,&n);
790: PetscBLASIntCast(ds->ld,&ld);
791: A = ds->mat[DS_MAT_A];
793: if (!recover) {
795: DSAllocateWork_Private(ds,0,0,ld);
796: ipiv = ds->iwork;
797: if (!g) {
798: DSAllocateWork_Private(ds,ld,0,0);
799: g = ds->work;
800: }
801: /* use workspace matrix W to factor A-tau*eye(n) */
802: DSAllocateMat_Private(ds,DS_MAT_W);
803: B = ds->mat[DS_MAT_W];
804: PetscMemcpy(B,A,sizeof(PetscScalar)*ld*ld);
806: /* Vector g initialy stores b = beta*e_n^T */
807: PetscMemzero(g,n*sizeof(PetscScalar));
808: g[n-1] = beta;
810: /* g = (A-tau*eye(n))'\b */
811: for (i=0;i<n;i++) B[i+i*ld] -= tau;
812: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,B,&ld,ipiv,&info));
813: SlepcCheckLapackInfo("getrf",info);
814: PetscLogFlops(2.0*n*n*n/3.0);
815: PetscStackCallBLAS("LAPACKgetrs",LAPACKgetrs_("C",&n,&one,B,&ld,ipiv,g,&ld,&info));
816: SlepcCheckLapackInfo("getrs",info);
817: PetscLogFlops(2.0*n*n-n);
819: /* A = A + g*b' */
820: for (i=0;i<n;i++) A[i+(n-1)*ld] += g[i]*beta;
822: } else { /* recover */
825: DSAllocateWork_Private(ds,ld,0,0);
826: ghat = ds->work;
827: Q = ds->mat[DS_MAT_Q];
829: /* g^ = -Q(:,idx)'*g */
830: PetscBLASIntCast(ds->l+ds->k,&ncol);
831: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&ncol,&dmone,Q,&ld,g,&one,&dzero,ghat,&one));
833: /* A = A + g^*b' */
834: for (i=0;i<ds->l+ds->k;i++)
835: for (j=ds->l;j<ds->l+ds->k;j++)
836: A[i+j*ld] += ghat[i]*Q[n-1+j*ld]*beta;
838: /* g~ = (I-Q(:,idx)*Q(:,idx)')*g = g+Q(:,idx)*g^ */
839: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&ncol,&done,Q,&ld,ghat,&one,&done,g,&one));
840: }
842: /* Compute gamma factor */
843: if (gamma) {
844: gnorm = 0.0;
845: for (i=0;i<n;i++) gnorm = gnorm + PetscRealPart(g[i]*PetscConj(g[i]));
846: *gamma = PetscSqrtReal(1.0+gnorm);
847: }
848: return(0);
849: #endif
850: }
852: SLEPC_EXTERN PetscErrorCode DSCreate_NHEP(DS ds)
853: {
855: ds->ops->allocate = DSAllocate_NHEP;
856: ds->ops->view = DSView_NHEP;
857: ds->ops->vectors = DSVectors_NHEP;
858: ds->ops->solve[0] = DSSolve_NHEP;
859: ds->ops->sort = DSSort_NHEP;
860: ds->ops->sortperm = DSSortWithPermutation_NHEP;
861: ds->ops->synchronize = DSSynchronize_NHEP;
862: ds->ops->truncate = DSTruncate_NHEP;
863: ds->ops->update = DSUpdateExtraRow_NHEP;
864: ds->ops->cond = DSCond_NHEP;
865: ds->ops->transharm = DSTranslateHarmonic_NHEP;
866: return(0);
867: }