Actual source code: dshep.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
15: {
19: DSAllocateMat_Private(ds,DS_MAT_A);
20: DSAllocateMat_Private(ds,DS_MAT_Q);
21: DSAllocateMatReal_Private(ds,DS_MAT_T);
22: PetscFree(ds->perm);
23: PetscMalloc1(ld,&ds->perm);
24: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
25: return(0);
26: }
28: /* 0 l k n-1
29: -----------------------------------------
30: |* . . |
31: | * . . |
32: | * . . |
33: | * . . |
34: |. . . . o o |
35: | o o |
36: | o o |
37: | o o |
38: | o o |
39: | o o |
40: |. . . . o o o o o o o x |
41: | x x x |
42: | x x x |
43: | x x x |
44: | x x x |
45: | x x x |
46: | x x x |
47: | x x x |
48: | x x x|
49: | x x|
50: -----------------------------------------
51: */
53: static PetscErrorCode DSSwitchFormat_HEP(DS ds,PetscBool tocompact)
54: {
56: PetscReal *T = ds->rmat[DS_MAT_T];
57: PetscScalar *A = ds->mat[DS_MAT_A];
58: PetscInt i,n=ds->n,k=ds->k,ld=ds->ld;
61: if (tocompact) { /* switch from dense (arrow) to compact storage */
62: PetscMemzero(T,3*ld*sizeof(PetscReal));
63: for (i=0;i<k;i++) {
64: T[i] = PetscRealPart(A[i+i*ld]);
65: T[i+ld] = PetscRealPart(A[k+i*ld]);
66: }
67: for (i=k;i<n-1;i++) {
68: T[i] = PetscRealPart(A[i+i*ld]);
69: T[i+ld] = PetscRealPart(A[i+1+i*ld]);
70: }
71: T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
72: if (ds->extrarow) T[n-1+ld] = PetscRealPart(A[n+(n-1)*ld]);
73: } else { /* switch from compact (arrow) to dense storage */
74: PetscMemzero(A,ld*ld*sizeof(PetscScalar));
75: for (i=0;i<k;i++) {
76: A[i+i*ld] = T[i];
77: A[k+i*ld] = T[i+ld];
78: A[i+k*ld] = T[i+ld];
79: }
80: A[k+k*ld] = T[k];
81: for (i=k+1;i<n;i++) {
82: A[i+i*ld] = T[i];
83: A[i-1+i*ld] = T[i-1+ld];
84: A[i+(i-1)*ld] = T[i-1+ld];
85: }
86: if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
87: }
88: return(0);
89: }
91: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
92: {
93: PetscErrorCode ierr;
94: PetscViewerFormat format;
95: PetscInt i,j,r,c,rows;
96: PetscReal value;
97: const char *methodname[] = {
98: "Implicit QR method (_steqr)",
99: "Relatively Robust Representations (_stevr)",
100: "Divide and Conquer method (_stedc)",
101: "Block Divide and Conquer method (dsbtdc)"
102: };
103: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
106: PetscViewerGetFormat(viewer,&format);
107: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
108: if (ds->bs>1) {
109: PetscViewerASCIIPrintf(viewer,"block size: %D\n",ds->bs);
110: }
111: if (ds->method<nmeth) {
112: PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
113: }
114: return(0);
115: }
116: if (ds->compact) {
117: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
118: rows = ds->extrarow? ds->n+1: ds->n;
119: if (format == PETSC_VIEWER_ASCII_MATLAB) {
120: PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",rows,ds->n);
121: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
122: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
123: for (i=0;i<ds->n;i++) {
124: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",i+1,i+1,(double)*(ds->rmat[DS_MAT_T]+i));
125: }
126: for (i=0;i<rows-1;i++) {
127: r = PetscMax(i+2,ds->k+1);
128: c = i+1;
129: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",r,c,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
130: if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
131: PetscViewerASCIIPrintf(viewer,"%D %D %18.16e\n",c,r,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
132: }
133: }
134: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
135: } else {
136: for (i=0;i<rows;i++) {
137: for (j=0;j<ds->n;j++) {
138: if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
139: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
140: else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
141: else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
142: else value = 0.0;
143: PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
144: }
145: PetscViewerASCIIPrintf(viewer,"\n");
146: }
147: }
148: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
149: PetscViewerFlush(viewer);
150: } else {
151: DSViewMat(ds,viewer,DS_MAT_A);
152: }
153: if (ds->state>DS_STATE_INTERMEDIATE) {
154: DSViewMat(ds,viewer,DS_MAT_Q);
155: }
156: return(0);
157: }
159: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
160: {
161: PetscScalar *Q = ds->mat[DS_MAT_Q];
162: PetscInt ld = ds->ld,i;
166: switch (mat) {
167: case DS_MAT_X:
168: case DS_MAT_Y:
169: if (j) {
170: if (ds->state>=DS_STATE_CONDENSED) {
171: PetscMemcpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld*sizeof(PetscScalar));
172: } else {
173: PetscMemzero(ds->mat[mat]+(*j)*ld,ld*sizeof(PetscScalar));
174: *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
175: }
176: } else {
177: if (ds->state>=DS_STATE_CONDENSED) {
178: PetscMemcpy(ds->mat[mat],Q,ld*ld*sizeof(PetscScalar));
179: } else {
180: PetscMemzero(ds->mat[mat],ld*ld*sizeof(PetscScalar));
181: for (i=0;i<ds->n;i++) *(ds->mat[mat]+i+i*ld) = 1.0;
182: }
183: }
184: if (rnorm && j) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
185: break;
186: case DS_MAT_U:
187: case DS_MAT_VT:
188: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
189: break;
190: default:
191: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
192: }
193: return(0);
194: }
196: /*
197: ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form
199: [ d 0 0 0 e ]
200: [ 0 d 0 0 e ]
201: A = [ 0 0 d 0 e ]
202: [ 0 0 0 d e ]
203: [ e e e e d ]
205: to tridiagonal form
207: [ d e 0 0 0 ]
208: [ e d e 0 0 ]
209: T = Q'*A*Q = [ 0 e d e 0 ],
210: [ 0 0 e d e ]
211: [ 0 0 0 e d ]
213: where Q is an orthogonal matrix. Rutishauser's algorithm is used to
214: perform the reduction, which requires O(n**2) flops. The accumulation
215: of the orthogonal factor Q, however, requires O(n**3) flops.
217: Arguments
218: =========
220: N (input) INTEGER
221: The order of the matrix A. N >= 0.
223: D (input/output) DOUBLE PRECISION array, dimension (N)
224: On entry, the diagonal entries of the matrix A to be
225: reduced.
226: On exit, the diagonal entries of the reduced matrix T.
228: E (input/output) DOUBLE PRECISION array, dimension (N-1)
229: On entry, the off-diagonal entries of the matrix A to be
230: reduced.
231: On exit, the subdiagonal entries of the reduced matrix T.
233: Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
234: On exit, the orthogonal matrix Q.
236: LDQ (input) INTEGER
237: The leading dimension of the array Q.
239: Note
240: ====
241: Based on Fortran code contributed by Daniel Kressner
242: */
243: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
244: {
245: #if defined(SLEPC_MISSING_LAPACK_LARTG)
247: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARTG - Lapack routine is unavailable");
248: #else
249: PetscBLASInt i,j,j2,one=1;
250: PetscReal c,s,p,off,temp;
253: if (n<=2) return(0);
255: for (j=0;j<n-2;j++) {
257: /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
258: temp = e[j+1];
259: PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
260: s = -s;
262: /* Apply rotation to diagonal elements */
263: temp = d[j+1];
264: e[j] = c*s*(temp-d[j]);
265: d[j+1] = s*s*d[j] + c*c*temp;
266: d[j] = c*c*d[j] + s*s*temp;
268: /* Apply rotation to Q */
269: j2 = j+2;
270: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));
272: /* Chase newly introduced off-diagonal entry to the top left corner */
273: for (i=j-1;i>=0;i--) {
274: off = -s*e[i];
275: e[i] = c*e[i];
276: temp = e[i+1];
277: PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
278: s = -s;
279: temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
280: p = s*temp;
281: d[i+1] += p;
282: d[i] -= p;
283: e[i] = -e[i] - c*temp;
284: j2 = j+2;
285: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
286: }
287: }
288: return(0);
289: #endif
290: }
292: /*
293: Reduce to tridiagonal form by means of ArrowTridiag.
294: */
295: static PetscErrorCode DSIntermediate_HEP(DS ds)
296: {
297: #if defined(SLEPC_MISSING_LAPACK_SYTRD) || defined(SLEPC_MISSING_LAPACK_ORGTR)
299: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYTRD/ORGTR - Lapack routine is unavailable");
300: #else
302: PetscInt i;
303: PetscBLASInt n1,n2,n3,lwork,info,l,n,ld,off;
304: PetscScalar *A,*Q,*work,*tau;
305: PetscReal *d,*e;
308: PetscBLASIntCast(ds->n,&n);
309: PetscBLASIntCast(ds->l,&l);
310: PetscBLASIntCast(ds->ld,&ld);
311: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
312: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
313: n3 = n1+n2;
314: off = l+l*ld;
315: A = ds->mat[DS_MAT_A];
316: Q = ds->mat[DS_MAT_Q];
317: d = ds->rmat[DS_MAT_T];
318: e = ds->rmat[DS_MAT_T]+ld;
319: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
320: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
322: if (ds->compact) {
324: if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);
326: } else {
328: for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }
330: if (ds->state<DS_STATE_INTERMEDIATE) {
331: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
332: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
333: tau = ds->work;
334: work = ds->work+ld;
335: lwork = ld*ld;
336: PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n3,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
337: SlepcCheckLapackInfo("sytrd",info);
338: PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n3,Q+off,&ld,tau,work,&lwork,&info));
339: SlepcCheckLapackInfo("orgtr",info);
340: } else {
341: /* copy tridiagonal to d,e */
342: for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
343: for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
344: }
345: }
346: return(0);
347: #endif
348: }
350: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
351: {
353: PetscInt n,l,i,*perm,ld=ds->ld;
354: PetscScalar *A;
355: PetscReal *d;
358: if (!ds->sc) return(0);
359: n = ds->n;
360: l = ds->l;
361: A = ds->mat[DS_MAT_A];
362: d = ds->rmat[DS_MAT_T];
363: perm = ds->perm;
364: if (!rr) {
365: DSSortEigenvaluesReal_Private(ds,d,perm);
366: } else {
367: DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
368: }
369: for (i=l;i<n;i++) wr[i] = d[perm[i]];
370: DSPermuteColumns_Private(ds,l,n,DS_MAT_Q,perm);
371: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
372: if (!ds->compact) {
373: for (i=l;i<n;i++) A[i+i*ld] = wr[i];
374: }
375: return(0);
376: }
378: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
379: {
381: PetscInt i;
382: PetscBLASInt n,ld,incx=1;
383: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
384: PetscReal *e,beta;
387: PetscBLASIntCast(ds->n,&n);
388: PetscBLASIntCast(ds->ld,&ld);
389: A = ds->mat[DS_MAT_A];
390: Q = ds->mat[DS_MAT_Q];
391: e = ds->rmat[DS_MAT_T]+ld;
393: if (ds->compact) {
394: beta = e[n-1]; /* in compact, we assume all entries are zero except the last one */
395: for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
396: ds->k = n;
397: } else {
398: DSAllocateWork_Private(ds,2*ld,0,0);
399: x = ds->work;
400: y = ds->work+ld;
401: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
402: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
403: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
404: ds->k = n;
405: }
406: return(0);
407: }
409: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
410: {
411: #if defined(PETSC_MISSING_LAPACK_STEQR)
413: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable");
414: #else
416: PetscInt i;
417: PetscBLASInt n1,n2,n3,info,l,n,ld,off;
418: PetscScalar *Q,*A;
419: PetscReal *d,*e;
422: if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
423: PetscBLASIntCast(ds->n,&n);
424: PetscBLASIntCast(ds->l,&l);
425: PetscBLASIntCast(ds->ld,&ld);
426: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
427: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
428: n3 = n1+n2;
429: off = l+l*ld;
430: Q = ds->mat[DS_MAT_Q];
431: A = ds->mat[DS_MAT_A];
432: d = ds->rmat[DS_MAT_T];
433: e = ds->rmat[DS_MAT_T]+ld;
435: /* Reduce to tridiagonal form */
436: DSIntermediate_HEP(ds);
438: /* Solve the tridiagonal eigenproblem */
439: for (i=0;i<l;i++) wr[i] = d[i];
441: DSAllocateWork_Private(ds,0,2*ld,0);
442: PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n3,d+l,e+l,Q+off,&ld,ds->rwork,&info));
443: SlepcCheckLapackInfo("steqr",info);
444: for (i=l;i<n;i++) wr[i] = d[i];
446: /* Create diagonal matrix as a result */
447: if (ds->compact) {
448: PetscMemzero(e,(n-1)*sizeof(PetscReal));
449: } else {
450: for (i=l;i<n;i++) {
451: PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
452: }
453: for (i=l;i<n;i++) A[i+i*ld] = d[i];
454: }
456: /* Set zero wi */
457: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
458: return(0);
459: #endif
460: }
462: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
463: {
464: #if defined(SLEPC_MISSING_LAPACK_STEVR)
466: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
467: #else
469: PetscInt i;
470: PetscBLASInt n1,n2,n3,lwork,liwork,info,l,n,m,ld,off,il,iu,*isuppz;
471: PetscScalar *A,*Q,*W=NULL,one=1.0,zero=0.0;
472: PetscReal *d,*e,abstol=0.0,vl,vu;
473: #if defined(PETSC_USE_COMPLEX)
474: PetscInt j;
475: PetscReal *ritz;
476: #endif
479: if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
480: PetscBLASIntCast(ds->n,&n);
481: PetscBLASIntCast(ds->l,&l);
482: PetscBLASIntCast(ds->ld,&ld);
483: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
484: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
485: n3 = n1+n2;
486: off = l+l*ld;
487: A = ds->mat[DS_MAT_A];
488: Q = ds->mat[DS_MAT_Q];
489: d = ds->rmat[DS_MAT_T];
490: e = ds->rmat[DS_MAT_T]+ld;
492: /* Reduce to tridiagonal form */
493: DSIntermediate_HEP(ds);
495: /* Solve the tridiagonal eigenproblem */
496: for (i=0;i<l;i++) wr[i] = d[i];
498: if (ds->state<DS_STATE_INTERMEDIATE) { /* Q contains useful info */
499: DSAllocateMat_Private(ds,DS_MAT_W);
500: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
501: W = ds->mat[DS_MAT_W];
502: }
503: #if defined(PETSC_USE_COMPLEX)
504: DSAllocateMatReal_Private(ds,DS_MAT_Q);
505: #endif
506: lwork = 20*ld;
507: liwork = 10*ld;
508: DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
509: isuppz = ds->iwork+liwork;
510: #if defined(PETSC_USE_COMPLEX)
511: ritz = ds->rwork+lwork;
512: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
513: for (i=l;i<n;i++) wr[i] = ritz[i];
514: #else
515: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
516: #endif
517: SlepcCheckLapackInfo("stevr",info);
518: #if defined(PETSC_USE_COMPLEX)
519: for (i=l;i<n;i++)
520: for (j=l;j<n;j++)
521: Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
522: #endif
523: if (ds->state<DS_STATE_INTERMEDIATE) { /* accumulate previous Q */
524: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
525: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
526: }
527: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
529: /* Create diagonal matrix as a result */
530: if (ds->compact) {
531: PetscMemzero(e,(n-1)*sizeof(PetscReal));
532: } else {
533: for (i=l;i<n;i++) {
534: PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
535: }
536: for (i=l;i<n;i++) A[i+i*ld] = d[i];
537: }
539: /* Set zero wi */
540: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
541: return(0);
542: #endif
543: }
545: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
546: {
547: #if defined(SLEPC_MISSING_LAPACK_STEDC)
549: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEDC - Lapack routine is unavailable");
550: #else
552: PetscInt i;
553: PetscBLASInt n1,info,l,ld,off,lrwork,liwork;
554: PetscScalar *Q,*A;
555: PetscReal *d,*e;
556: #if defined(PETSC_USE_COMPLEX)
557: PetscBLASInt lwork;
558: PetscInt j;
559: #endif
562: if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
563: PetscBLASIntCast(ds->l,&l);
564: PetscBLASIntCast(ds->ld,&ld);
565: PetscBLASIntCast(ds->n-ds->l,&n1);
566: off = l+l*ld;
567: Q = ds->mat[DS_MAT_Q];
568: A = ds->mat[DS_MAT_A];
569: d = ds->rmat[DS_MAT_T];
570: e = ds->rmat[DS_MAT_T]+ld;
572: /* Reduce to tridiagonal form */
573: DSIntermediate_HEP(ds);
575: /* Solve the tridiagonal eigenproblem */
576: for (i=0;i<l;i++) wr[i] = d[i];
578: lrwork = 5*n1*n1+3*n1+1;
579: liwork = 5*n1*n1+6*n1+6;
580: #if !defined(PETSC_USE_COMPLEX)
581: DSAllocateWork_Private(ds,0,lrwork,liwork);
582: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
583: #else
584: lwork = ld*ld;
585: DSAllocateWork_Private(ds,lwork,lrwork,liwork);
586: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
587: /* Fixing Lapack bug*/
588: for (j=ds->l;j<ds->n;j++)
589: for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
590: #endif
591: SlepcCheckLapackInfo("stedc",info);
592: for (i=l;i<ds->n;i++) wr[i] = d[i];
594: /* Create diagonal matrix as a result */
595: if (ds->compact) {
596: PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
597: } else {
598: for (i=l;i<ds->n;i++) {
599: PetscMemzero(A+l+i*ld,(ds->n-l)*sizeof(PetscScalar));
600: }
601: for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
602: }
604: /* Set zero wi */
605: if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
606: return(0);
607: #endif
608: }
610: #if !defined(PETSC_USE_COMPLEX)
611: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
612: {
614: PetscBLASInt i,j,k,m,n,info,nblks,bs,ld,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
615: PetscScalar *Q,*A;
616: PetscReal *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;
619: if (ds->l>0) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for l>1");
620: if (ds->compact) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for compact storage");
621: PetscBLASIntCast(ds->ld,&ld);
622: PetscBLASIntCast(ds->bs,&bs);
623: PetscBLASIntCast(ds->n,&n);
624: nblks = n/bs;
625: Q = ds->mat[DS_MAT_Q];
626: A = ds->mat[DS_MAT_A];
627: d = ds->rmat[DS_MAT_T];
628: e = ds->rmat[DS_MAT_T]+ld;
629: lrwork = 4*n*n+60*n+1;
630: liwork = 5*n+5*nblks-1;
631: lde = 2*bs+1;
632: DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
633: D = ds->work;
634: E = ds->work+bs*n;
635: rwork = ds->rwork;
636: ksizes = ds->iwork;
637: iwork = ds->iwork+nblks;
638: PetscMemzero(iwork,liwork*sizeof(PetscBLASInt));
640: /* Copy matrix to block tridiagonal format */
641: j=0;
642: for (i=0;i<nblks;i++) {
643: ksizes[i]=bs;
644: for (k=0;k<bs;k++)
645: for (m=0;m<bs;m++)
646: D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
647: j = j + bs;
648: }
649: j=0;
650: for (i=0;i<nblks-1;i++) {
651: for (k=0;k<bs;k++)
652: for (m=0;m<bs;m++)
653: E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
654: j = j + bs;
655: }
657: /* Solve the block tridiagonal eigenproblem */
658: BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,
659: Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
660: for (i=0;i<ds->n;i++) wr[i] = d[i];
662: /* Create diagonal matrix as a result */
663: if (ds->compact) {
664: PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
665: } else {
666: for (i=0;i<ds->n;i++) {
667: PetscMemzero(A+i*ld,ds->n*sizeof(PetscScalar));
668: }
669: for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
670: }
672: /* Set zero wi */
673: if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
674: return(0);
675: }
676: #endif
678: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n)
679: {
680: PetscInt i,ld=ds->ld,l=ds->l;
681: PetscScalar *A;
684: if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
685: A = ds->mat[DS_MAT_A];
686: if (!ds->compact && ds->extrarow && ds->k==ds->n) {
687: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
688: }
689: if (ds->extrarow) ds->k = n;
690: else ds->k = 0;
691: ds->n = n;
692: return(0);
693: }
695: PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
696: {
698: PetscInt ld=ds->ld,l=ds->l,k=0,kr=0;
699: PetscMPIInt n,rank,off=0,size,ldn,ld3;
702: if (ds->compact) kr = 3*ld;
703: else k = (ds->n-l)*ld;
704: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
705: if (eigr) k += (ds->n-l);
706: if (eigi) k += (ds->n-l);
707: DSAllocateWork_Private(ds,k+kr,0,0);
708: PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
709: PetscMPIIntCast(ds->n-l,&n);
710: PetscMPIIntCast(ld*(ds->n-l),&ldn);
711: PetscMPIIntCast(ld*3,&ld3);
712: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
713: if (!rank) {
714: if (ds->compact) {
715: MPI_Pack(ds->rmat[DS_MAT_T],ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
716: } else {
717: MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
718: }
719: if (ds->state>DS_STATE_RAW) {
720: MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
721: }
722: if (eigr) {
723: MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
724: }
725: if (eigi) {
726: MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
727: }
728: }
729: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
730: if (rank) {
731: if (ds->compact) {
732: MPI_Unpack(ds->work,size,&off,ds->rmat[DS_MAT_T],ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
733: } else {
734: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
735: }
736: if (ds->state>DS_STATE_RAW) {
737: MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
738: }
739: if (eigr) {
740: MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
741: }
742: if (eigi) {
743: MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
744: }
745: }
746: return(0);
747: }
749: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
750: {
751: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE)
753: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE - Lapack routines are unavailable");
754: #else
756: PetscScalar *work;
757: PetscReal *rwork;
758: PetscBLASInt *ipiv;
759: PetscBLASInt lwork,info,n,ld;
760: PetscReal hn,hin;
761: PetscScalar *A;
764: PetscBLASIntCast(ds->n,&n);
765: PetscBLASIntCast(ds->ld,&ld);
766: lwork = 8*ld;
767: DSAllocateWork_Private(ds,lwork,ld,ld);
768: work = ds->work;
769: rwork = ds->rwork;
770: ipiv = ds->iwork;
771: DSSwitchFormat_HEP(ds,PETSC_FALSE);
773: /* use workspace matrix W to avoid overwriting A */
774: DSAllocateMat_Private(ds,DS_MAT_W);
775: A = ds->mat[DS_MAT_W];
776: PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);
778: /* norm of A */
779: hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
781: /* norm of inv(A) */
782: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
783: SlepcCheckLapackInfo("getrf",info);
784: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
785: SlepcCheckLapackInfo("getri",info);
786: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
788: *cond = hn*hin;
789: return(0);
790: #endif
791: }
793: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
794: {
795: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
797: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
798: #else
800: PetscInt i,j,k=ds->k;
801: PetscScalar *Q,*A,*R,*tau,*work;
802: PetscBLASInt ld,n1,n0,lwork,info;
805: PetscBLASIntCast(ds->ld,&ld);
806: DSAllocateWork_Private(ds,ld*ld,0,0);
807: tau = ds->work;
808: work = ds->work+ld;
809: PetscBLASIntCast(ld*(ld-1),&lwork);
810: DSAllocateMat_Private(ds,DS_MAT_W);
811: A = ds->mat[DS_MAT_A];
812: Q = ds->mat[DS_MAT_Q];
813: R = ds->mat[DS_MAT_W];
815: /* copy I+alpha*A */
816: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
817: PetscMemzero(R,ld*ld*sizeof(PetscScalar));
818: for (i=0;i<k;i++) {
819: Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
820: Q[k+i*ld] = alpha*A[k+i*ld];
821: }
823: /* compute qr */
824: PetscBLASIntCast(k+1,&n1);
825: PetscBLASIntCast(k,&n0);
826: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
827: SlepcCheckLapackInfo("geqrf",info);
829: /* copy R from Q */
830: for (j=0;j<k;j++)
831: for (i=0;i<=j;i++)
832: R[i+j*ld] = Q[i+j*ld];
834: /* compute orthogonal matrix in Q */
835: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
836: SlepcCheckLapackInfo("orgqr",info);
838: /* compute the updated matrix of projected problem */
839: for (j=0;j<k;j++)
840: for (i=0;i<k+1;i++)
841: A[j*ld+i] = Q[i*ld+j];
842: alpha = -1.0/alpha;
843: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
844: for (i=0;i<k;i++)
845: A[ld*i+i] -= alpha;
846: return(0);
847: #endif
848: }
850: PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
851: {
853: if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
854: else *flg = PETSC_FALSE;
855: return(0);
856: }
858: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
859: {
861: ds->ops->allocate = DSAllocate_HEP;
862: ds->ops->view = DSView_HEP;
863: ds->ops->vectors = DSVectors_HEP;
864: ds->ops->solve[0] = DSSolve_HEP_QR;
865: ds->ops->solve[1] = DSSolve_HEP_MRRR;
866: ds->ops->solve[2] = DSSolve_HEP_DC;
867: #if !defined(PETSC_USE_COMPLEX)
868: ds->ops->solve[3] = DSSolve_HEP_BDC;
869: #endif
870: ds->ops->sort = DSSort_HEP;
871: ds->ops->synchronize = DSSynchronize_HEP;
872: ds->ops->truncate = DSTruncate_HEP;
873: ds->ops->update = DSUpdateExtraRow_HEP;
874: ds->ops->cond = DSCond_HEP;
875: ds->ops->transrks = DSTranslateRKS_HEP;
876: ds->ops->hermitian = DSHermitian_HEP;
877: return(0);
878: }