Actual source code: dshep.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
 15: {

 19:   DSAllocateMat_Private(ds,DS_MAT_A);
 20:   DSAllocateMat_Private(ds,DS_MAT_Q);
 21:   DSAllocateMatReal_Private(ds,DS_MAT_T);
 22:   PetscFree(ds->perm);
 23:   PetscMalloc1(ld,&ds->perm);
 24:   PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
 25:   return(0);
 26: }

 28: /*   0       l           k                 n-1
 29:     -----------------------------------------
 30:     |*       .           .                  |
 31:     |  *     .           .                  |
 32:     |    *   .           .                  |
 33:     |      * .           .                  |
 34:     |. . . . o           o                  |
 35:     |          o         o                  |
 36:     |            o       o                  |
 37:     |              o     o                  |
 38:     |                o   o                  |
 39:     |                  o o                  |
 40:     |. . . . o o o o o o o x                |
 41:     |                    x x x              |
 42:     |                      x x x            |
 43:     |                        x x x          |
 44:     |                          x x x        |
 45:     |                            x x x      |
 46:     |                              x x x    |
 47:     |                                x x x  |
 48:     |                                  x x x|
 49:     |                                    x x|
 50:     -----------------------------------------
 51: */

 53: static PetscErrorCode DSSwitchFormat_HEP(DS ds,PetscBool tocompact)
 54: {
 56:   PetscReal      *T = ds->rmat[DS_MAT_T];
 57:   PetscScalar    *A = ds->mat[DS_MAT_A];
 58:   PetscInt       i,n=ds->n,k=ds->k,ld=ds->ld;

 61:   if (tocompact) { /* switch from dense (arrow) to compact storage */
 62:     PetscMemzero(T,3*ld*sizeof(PetscReal));
 63:     for (i=0;i<k;i++) {
 64:       T[i] = PetscRealPart(A[i+i*ld]);
 65:       T[i+ld] = PetscRealPart(A[k+i*ld]);
 66:     }
 67:     for (i=k;i<n-1;i++) {
 68:       T[i]    = PetscRealPart(A[i+i*ld]);
 69:       T[i+ld] = PetscRealPart(A[i+1+i*ld]);
 70:     }
 71:     T[n-1] = PetscRealPart(A[n-1+(n-1)*ld]);
 72:     if (ds->extrarow) T[n-1+ld] = PetscRealPart(A[n+(n-1)*ld]);
 73:   } else { /* switch from compact (arrow) to dense storage */
 74:     PetscMemzero(A,ld*ld*sizeof(PetscScalar));
 75:     for (i=0;i<k;i++) {
 76:       A[i+i*ld] = T[i];
 77:       A[k+i*ld] = T[i+ld];
 78:       A[i+k*ld] = T[i+ld];
 79:     }
 80:     A[k+k*ld] = T[k];
 81:     for (i=k+1;i<n;i++) {
 82:       A[i+i*ld]     = T[i];
 83:       A[i-1+i*ld]   = T[i-1+ld];
 84:       A[i+(i-1)*ld] = T[i-1+ld];
 85:     }
 86:     if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
 87:   }
 88:   return(0);
 89: }

 91: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
 92: {
 93:   PetscErrorCode    ierr;
 94:   PetscViewerFormat format;
 95:   PetscInt          i,j,r,c,rows;
 96:   PetscReal         value;
 97:   const char        *methodname[] = {
 98:                      "Implicit QR method (_steqr)",
 99:                      "Relatively Robust Representations (_stevr)",
100:                      "Divide and Conquer method (_stedc)",
101:                      "Block Divide and Conquer method (dsbtdc)"
102:   };
103:   const int         nmeth=sizeof(methodname)/sizeof(methodname[0]);

106:   PetscViewerGetFormat(viewer,&format);
107:   if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
108:     if (ds->bs>1) {
109:       PetscViewerASCIIPrintf(viewer,"block size: %D\n",ds->bs);
110:     }
111:     if (ds->method<nmeth) {
112:       PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
113:     }
114:     return(0);
115:   }
116:   if (ds->compact) {
117:     PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
118:     rows = ds->extrarow? ds->n+1: ds->n;
119:     if (format == PETSC_VIEWER_ASCII_MATLAB) {
120:       PetscViewerASCIIPrintf(viewer,"%% Size = %D %D\n",rows,ds->n);
121:       PetscViewerASCIIPrintf(viewer,"zzz = zeros(%D,3);\n",3*ds->n);
122:       PetscViewerASCIIPrintf(viewer,"zzz = [\n");
123:       for (i=0;i<ds->n;i++) {
124:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",i+1,i+1,(double)*(ds->rmat[DS_MAT_T]+i));
125:       }
126:       for (i=0;i<rows-1;i++) {
127:         r = PetscMax(i+2,ds->k+1);
128:         c = i+1;
129:         PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",r,c,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
130:         if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
131:           PetscViewerASCIIPrintf(viewer,"%D %D  %18.16e\n",c,r,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
132:         }
133:       }
134:       PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
135:     } else {
136:       for (i=0;i<rows;i++) {
137:         for (j=0;j<ds->n;j++) {
138:           if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
139:           else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
140:           else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
141:           else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
142:           else value = 0.0;
143:           PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
144:         }
145:         PetscViewerASCIIPrintf(viewer,"\n");
146:       }
147:     }
148:     PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
149:     PetscViewerFlush(viewer);
150:   } else {
151:     DSViewMat(ds,viewer,DS_MAT_A);
152:   }
153:   if (ds->state>DS_STATE_INTERMEDIATE) {
154:     DSViewMat(ds,viewer,DS_MAT_Q);
155:   }
156:   return(0);
157: }

159: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
160: {
161:   PetscScalar    *Q = ds->mat[DS_MAT_Q];
162:   PetscInt       ld = ds->ld,i;

166:   switch (mat) {
167:     case DS_MAT_X:
168:     case DS_MAT_Y:
169:       if (j) {
170:         if (ds->state>=DS_STATE_CONDENSED) {
171:           PetscMemcpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld*sizeof(PetscScalar));
172:         } else {
173:           PetscMemzero(ds->mat[mat]+(*j)*ld,ld*sizeof(PetscScalar));
174:           *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
175:         }
176:       } else {
177:         if (ds->state>=DS_STATE_CONDENSED) {
178:           PetscMemcpy(ds->mat[mat],Q,ld*ld*sizeof(PetscScalar));
179:         } else {
180:           PetscMemzero(ds->mat[mat],ld*ld*sizeof(PetscScalar));
181:           for (i=0;i<ds->n;i++) *(ds->mat[mat]+i+i*ld) = 1.0;
182:         }
183:       }
184:       if (rnorm && j) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
185:       break;
186:     case DS_MAT_U:
187:     case DS_MAT_VT:
188:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
189:       break;
190:     default:
191:       SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
192:   }
193:   return(0);
194: }

196: /*
197:   ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form

199:                 [ d 0 0 0 e ]
200:                 [ 0 d 0 0 e ]
201:             A = [ 0 0 d 0 e ]
202:                 [ 0 0 0 d e ]
203:                 [ e e e e d ]

205:   to tridiagonal form

207:                 [ d e 0 0 0 ]
208:                 [ e d e 0 0 ]
209:    T = Q'*A*Q = [ 0 e d e 0 ],
210:                 [ 0 0 e d e ]
211:                 [ 0 0 0 e d ]

213:   where Q is an orthogonal matrix. Rutishauser's algorithm is used to
214:   perform the reduction, which requires O(n**2) flops. The accumulation
215:   of the orthogonal factor Q, however, requires O(n**3) flops.

217:   Arguments
218:   =========

220:   N       (input) INTEGER
221:           The order of the matrix A.  N >= 0.

223:   D       (input/output) DOUBLE PRECISION array, dimension (N)
224:           On entry, the diagonal entries of the matrix A to be
225:           reduced.
226:           On exit, the diagonal entries of the reduced matrix T.

228:   E       (input/output) DOUBLE PRECISION array, dimension (N-1)
229:           On entry, the off-diagonal entries of the matrix A to be
230:           reduced.
231:           On exit, the subdiagonal entries of the reduced matrix T.

233:   Q       (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
234:           On exit, the orthogonal matrix Q.

236:   LDQ     (input) INTEGER
237:           The leading dimension of the array Q.

239:   Note
240:   ====
241:   Based on Fortran code contributed by Daniel Kressner
242: */
243: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
244: {
245: #if defined(SLEPC_MISSING_LAPACK_LARTG)
247:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARTG - Lapack routine is unavailable");
248: #else
249:   PetscBLASInt i,j,j2,one=1;
250:   PetscReal    c,s,p,off,temp;

253:   if (n<=2) return(0);

255:   for (j=0;j<n-2;j++) {

257:     /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
258:     temp = e[j+1];
259:     PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
260:     s = -s;

262:     /* Apply rotation to diagonal elements */
263:     temp   = d[j+1];
264:     e[j]   = c*s*(temp-d[j]);
265:     d[j+1] = s*s*d[j] + c*c*temp;
266:     d[j]   = c*c*d[j] + s*s*temp;

268:     /* Apply rotation to Q */
269:     j2 = j+2;
270:     PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));

272:     /* Chase newly introduced off-diagonal entry to the top left corner */
273:     for (i=j-1;i>=0;i--) {
274:       off  = -s*e[i];
275:       e[i] = c*e[i];
276:       temp = e[i+1];
277:       PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
278:       s = -s;
279:       temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
280:       p = s*temp;
281:       d[i+1] += p;
282:       d[i] -= p;
283:       e[i] = -e[i] - c*temp;
284:       j2 = j+2;
285:       PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
286:     }
287:   }
288:   return(0);
289: #endif
290: }

292: /*
293:    Reduce to tridiagonal form by means of ArrowTridiag.
294: */
295: static PetscErrorCode DSIntermediate_HEP(DS ds)
296: {
297: #if defined(SLEPC_MISSING_LAPACK_SYTRD) || defined(SLEPC_MISSING_LAPACK_ORGTR)
299:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYTRD/ORGTR - Lapack routine is unavailable");
300: #else
302:   PetscInt       i;
303:   PetscBLASInt   n1,n2,n3,lwork,info,l,n,ld,off;
304:   PetscScalar    *A,*Q,*work,*tau;
305:   PetscReal      *d,*e;

308:   PetscBLASIntCast(ds->n,&n);
309:   PetscBLASIntCast(ds->l,&l);
310:   PetscBLASIntCast(ds->ld,&ld);
311:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
312:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
313:   n3 = n1+n2;
314:   off = l+l*ld;
315:   A  = ds->mat[DS_MAT_A];
316:   Q  = ds->mat[DS_MAT_Q];
317:   d  = ds->rmat[DS_MAT_T];
318:   e  = ds->rmat[DS_MAT_T]+ld;
319:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
320:   for (i=0;i<n;i++) Q[i+i*ld] = 1.0;

322:   if (ds->compact) {

324:     if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);

326:   } else {

328:     for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }

330:     if (ds->state<DS_STATE_INTERMEDIATE) {
331:       DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
332:       DSAllocateWork_Private(ds,ld+ld*ld,0,0);
333:       tau  = ds->work;
334:       work = ds->work+ld;
335:       lwork = ld*ld;
336:       PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n3,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
337:       SlepcCheckLapackInfo("sytrd",info);
338:       PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n3,Q+off,&ld,tau,work,&lwork,&info));
339:       SlepcCheckLapackInfo("orgtr",info);
340:     } else {
341:       /* copy tridiagonal to d,e */
342:       for (i=l;i<n;i++)   d[i] = PetscRealPart(A[i+i*ld]);
343:       for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
344:     }
345:   }
346:   return(0);
347: #endif
348: }

350: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
351: {
353:   PetscInt       n,l,i,*perm,ld=ds->ld;
354:   PetscScalar    *A;
355:   PetscReal      *d;

358:   if (!ds->sc) return(0);
359:   n = ds->n;
360:   l = ds->l;
361:   A = ds->mat[DS_MAT_A];
362:   d = ds->rmat[DS_MAT_T];
363:   perm = ds->perm;
364:   if (!rr) {
365:     DSSortEigenvaluesReal_Private(ds,d,perm);
366:   } else {
367:     DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
368:   }
369:   for (i=l;i<n;i++) wr[i] = d[perm[i]];
370:   DSPermuteColumns_Private(ds,l,n,DS_MAT_Q,perm);
371:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
372:   if (!ds->compact) {
373:     for (i=l;i<n;i++) A[i+i*ld] = wr[i];
374:   }
375:   return(0);
376: }

378: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
379: {
381:   PetscInt       i;
382:   PetscBLASInt   n,ld,incx=1;
383:   PetscScalar    *A,*Q,*x,*y,one=1.0,zero=0.0;
384:   PetscReal      *e,beta;

387:   PetscBLASIntCast(ds->n,&n);
388:   PetscBLASIntCast(ds->ld,&ld);
389:   A  = ds->mat[DS_MAT_A];
390:   Q  = ds->mat[DS_MAT_Q];
391:   e  = ds->rmat[DS_MAT_T]+ld;

393:   if (ds->compact) {
394:     beta = e[n-1];   /* in compact, we assume all entries are zero except the last one */
395:     for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
396:     ds->k = n;
397:   } else {
398:     DSAllocateWork_Private(ds,2*ld,0,0);
399:     x = ds->work;
400:     y = ds->work+ld;
401:     for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
402:     PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
403:     for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
404:     ds->k = n;
405:   }
406:   return(0);
407: }

409: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
410: {
411: #if defined(PETSC_MISSING_LAPACK_STEQR)
413:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEQR - Lapack routine is unavailable");
414: #else
416:   PetscInt       i;
417:   PetscBLASInt   n1,n2,n3,info,l,n,ld,off;
418:   PetscScalar    *Q,*A;
419:   PetscReal      *d,*e;

422:   if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
423:   PetscBLASIntCast(ds->n,&n);
424:   PetscBLASIntCast(ds->l,&l);
425:   PetscBLASIntCast(ds->ld,&ld);
426:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
427:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
428:   n3 = n1+n2;
429:   off = l+l*ld;
430:   Q  = ds->mat[DS_MAT_Q];
431:   A  = ds->mat[DS_MAT_A];
432:   d  = ds->rmat[DS_MAT_T];
433:   e  = ds->rmat[DS_MAT_T]+ld;

435:   /* Reduce to tridiagonal form */
436:   DSIntermediate_HEP(ds);

438:   /* Solve the tridiagonal eigenproblem */
439:   for (i=0;i<l;i++) wr[i] = d[i];

441:   DSAllocateWork_Private(ds,0,2*ld,0);
442:   PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n3,d+l,e+l,Q+off,&ld,ds->rwork,&info));
443:   SlepcCheckLapackInfo("steqr",info);
444:   for (i=l;i<n;i++) wr[i] = d[i];

446:   /* Create diagonal matrix as a result */
447:   if (ds->compact) {
448:     PetscMemzero(e,(n-1)*sizeof(PetscReal));
449:   } else {
450:     for (i=l;i<n;i++) {
451:       PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
452:     }
453:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
454:   }

456:   /* Set zero wi */
457:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
458:   return(0);
459: #endif
460: }

462: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
463: {
464: #if defined(SLEPC_MISSING_LAPACK_STEVR)
466:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEVR - Lapack routine is unavailable");
467: #else
469:   PetscInt       i;
470:   PetscBLASInt   n1,n2,n3,lwork,liwork,info,l,n,m,ld,off,il,iu,*isuppz;
471:   PetscScalar    *A,*Q,*W=NULL,one=1.0,zero=0.0;
472:   PetscReal      *d,*e,abstol=0.0,vl,vu;
473: #if defined(PETSC_USE_COMPLEX)
474:   PetscInt       j;
475:   PetscReal      *ritz;
476: #endif

479:   if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
480:   PetscBLASIntCast(ds->n,&n);
481:   PetscBLASIntCast(ds->l,&l);
482:   PetscBLASIntCast(ds->ld,&ld);
483:   PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
484:   PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
485:   n3 = n1+n2;
486:   off = l+l*ld;
487:   A  = ds->mat[DS_MAT_A];
488:   Q  = ds->mat[DS_MAT_Q];
489:   d  = ds->rmat[DS_MAT_T];
490:   e  = ds->rmat[DS_MAT_T]+ld;

492:   /* Reduce to tridiagonal form */
493:   DSIntermediate_HEP(ds);

495:   /* Solve the tridiagonal eigenproblem */
496:   for (i=0;i<l;i++) wr[i] = d[i];

498:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* Q contains useful info */
499:     DSAllocateMat_Private(ds,DS_MAT_W);
500:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
501:     W = ds->mat[DS_MAT_W];
502:   }
503: #if defined(PETSC_USE_COMPLEX)
504:   DSAllocateMatReal_Private(ds,DS_MAT_Q);
505: #endif
506:   lwork = 20*ld;
507:   liwork = 10*ld;
508:   DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
509:   isuppz = ds->iwork+liwork;
510: #if defined(PETSC_USE_COMPLEX)
511:   ritz = ds->rwork+lwork;
512:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
513:   for (i=l;i<n;i++) wr[i] = ritz[i];
514: #else
515:   PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
516: #endif
517:   SlepcCheckLapackInfo("stevr",info);
518: #if defined(PETSC_USE_COMPLEX)
519:   for (i=l;i<n;i++)
520:     for (j=l;j<n;j++)
521:       Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
522: #endif
523:   if (ds->state<DS_STATE_INTERMEDIATE) {  /* accumulate previous Q */
524:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
525:     DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
526:   }
527:   for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);

529:   /* Create diagonal matrix as a result */
530:   if (ds->compact) {
531:     PetscMemzero(e,(n-1)*sizeof(PetscReal));
532:   } else {
533:     for (i=l;i<n;i++) {
534:       PetscMemzero(A+l+i*ld,(n-l)*sizeof(PetscScalar));
535:     }
536:     for (i=l;i<n;i++) A[i+i*ld] = d[i];
537:   }

539:   /* Set zero wi */
540:   if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
541:   return(0);
542: #endif
543: }

545: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
546: {
547: #if defined(SLEPC_MISSING_LAPACK_STEDC)
549:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"STEDC - Lapack routine is unavailable");
550: #else
552:   PetscInt       i;
553:   PetscBLASInt   n1,info,l,ld,off,lrwork,liwork;
554:   PetscScalar    *Q,*A;
555:   PetscReal      *d,*e;
556: #if defined(PETSC_USE_COMPLEX)
557:   PetscBLASInt   lwork;
558:   PetscInt       j;
559: #endif

562:   if (ds->bs>1) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for bs>1");
563:   PetscBLASIntCast(ds->l,&l);
564:   PetscBLASIntCast(ds->ld,&ld);
565:   PetscBLASIntCast(ds->n-ds->l,&n1);
566:   off = l+l*ld;
567:   Q  = ds->mat[DS_MAT_Q];
568:   A  = ds->mat[DS_MAT_A];
569:   d  = ds->rmat[DS_MAT_T];
570:   e  = ds->rmat[DS_MAT_T]+ld;

572:   /* Reduce to tridiagonal form */
573:   DSIntermediate_HEP(ds);

575:   /* Solve the tridiagonal eigenproblem */
576:   for (i=0;i<l;i++) wr[i] = d[i];

578:   lrwork = 5*n1*n1+3*n1+1;
579:   liwork = 5*n1*n1+6*n1+6;
580: #if !defined(PETSC_USE_COMPLEX)
581:   DSAllocateWork_Private(ds,0,lrwork,liwork);
582:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
583: #else
584:   lwork = ld*ld;
585:   DSAllocateWork_Private(ds,lwork,lrwork,liwork);
586:   PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
587:   /* Fixing Lapack bug*/
588:   for (j=ds->l;j<ds->n;j++)
589:     for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
590: #endif
591:   SlepcCheckLapackInfo("stedc",info);
592:   for (i=l;i<ds->n;i++) wr[i] = d[i];

594:   /* Create diagonal matrix as a result */
595:   if (ds->compact) {
596:     PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
597:   } else {
598:     for (i=l;i<ds->n;i++) {
599:       PetscMemzero(A+l+i*ld,(ds->n-l)*sizeof(PetscScalar));
600:     }
601:     for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
602:   }

604:   /* Set zero wi */
605:   if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
606:   return(0);
607: #endif
608: }

610: #if !defined(PETSC_USE_COMPLEX)
611: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
612: {
614:   PetscBLASInt   i,j,k,m,n,info,nblks,bs,ld,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
615:   PetscScalar    *Q,*A;
616:   PetscReal      *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;

619:   if (ds->l>0) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"This method is not prepared for l>1");
620:   if (ds->compact) SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented for compact storage");
621:   PetscBLASIntCast(ds->ld,&ld);
622:   PetscBLASIntCast(ds->bs,&bs);
623:   PetscBLASIntCast(ds->n,&n);
624:   nblks = n/bs;
625:   Q  = ds->mat[DS_MAT_Q];
626:   A  = ds->mat[DS_MAT_A];
627:   d  = ds->rmat[DS_MAT_T];
628:   e  = ds->rmat[DS_MAT_T]+ld;
629:   lrwork = 4*n*n+60*n+1;
630:   liwork = 5*n+5*nblks-1;
631:   lde = 2*bs+1;
632:   DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
633:   D      = ds->work;
634:   E      = ds->work+bs*n;
635:   rwork  = ds->rwork;
636:   ksizes = ds->iwork;
637:   iwork  = ds->iwork+nblks;
638:   PetscMemzero(iwork,liwork*sizeof(PetscBLASInt));

640:   /* Copy matrix to block tridiagonal format */
641:   j=0;
642:   for (i=0;i<nblks;i++) {
643:     ksizes[i]=bs;
644:     for (k=0;k<bs;k++)
645:       for (m=0;m<bs;m++)
646:         D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
647:     j = j + bs;
648:   }
649:   j=0;
650:   for (i=0;i<nblks-1;i++) {
651:     for (k=0;k<bs;k++)
652:       for (m=0;m<bs;m++)
653:         E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
654:     j = j + bs;
655:   }

657:   /* Solve the block tridiagonal eigenproblem */
658:   BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,
659:            Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
660:   for (i=0;i<ds->n;i++) wr[i] = d[i];

662:   /* Create diagonal matrix as a result */
663:   if (ds->compact) {
664:     PetscMemzero(e,(ds->n-1)*sizeof(PetscReal));
665:   } else {
666:     for (i=0;i<ds->n;i++) {
667:       PetscMemzero(A+i*ld,ds->n*sizeof(PetscScalar));
668:     }
669:     for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
670:   }

672:   /* Set zero wi */
673:   if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
674:   return(0);
675: }
676: #endif

678: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n)
679: {
680:   PetscInt    i,ld=ds->ld,l=ds->l;
681:   PetscScalar *A;

684:   if (ds->state==DS_STATE_CONDENSED) ds->t = ds->n;
685:   A = ds->mat[DS_MAT_A];
686:   if (!ds->compact && ds->extrarow && ds->k==ds->n) {
687:     for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
688:   }
689:   if (ds->extrarow) ds->k = n;
690:   else ds->k = 0;
691:   ds->n = n;
692:   return(0);
693: }

695: PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
696: {
698:   PetscInt       ld=ds->ld,l=ds->l,k=0,kr=0;
699:   PetscMPIInt    n,rank,off=0,size,ldn,ld3;

702:   if (ds->compact) kr = 3*ld;
703:   else k = (ds->n-l)*ld;
704:   if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
705:   if (eigr) k += (ds->n-l); 
706:   if (eigi) k += (ds->n-l); 
707:   DSAllocateWork_Private(ds,k+kr,0,0);
708:   PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
709:   PetscMPIIntCast(ds->n-l,&n);
710:   PetscMPIIntCast(ld*(ds->n-l),&ldn);
711:   PetscMPIIntCast(ld*3,&ld3);
712:   MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
713:   if (!rank) {
714:     if (ds->compact) {
715:       MPI_Pack(ds->rmat[DS_MAT_T],ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
716:     } else {
717:       MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
718:     }
719:     if (ds->state>DS_STATE_RAW) {
720:       MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
721:     }
722:     if (eigr) {
723:       MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
724:     }
725:     if (eigi) {
726:       MPI_Pack(eigi+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
727:     }
728:   }
729:   MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
730:   if (rank) {
731:     if (ds->compact) {
732:       MPI_Unpack(ds->work,size,&off,ds->rmat[DS_MAT_T],ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
733:     } else {
734:       MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
735:     }
736:     if (ds->state>DS_STATE_RAW) {
737:       MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
738:     }
739:     if (eigr) {
740:       MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
741:     }
742:     if (eigi) {
743:       MPI_Unpack(ds->work,size,&off,eigi+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
744:     }
745:   }
746:   return(0);
747: }

749: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
750: {
751: #if defined(PETSC_MISSING_LAPACK_GETRF) || defined(PETSC_MISSING_LAPACK_GETRI) || defined(SLEPC_MISSING_LAPACK_LANGE)
753:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GETRF/GETRI/LANGE - Lapack routines are unavailable");
754: #else
756:   PetscScalar    *work;
757:   PetscReal      *rwork;
758:   PetscBLASInt   *ipiv;
759:   PetscBLASInt   lwork,info,n,ld;
760:   PetscReal      hn,hin;
761:   PetscScalar    *A;

764:   PetscBLASIntCast(ds->n,&n);
765:   PetscBLASIntCast(ds->ld,&ld);
766:   lwork = 8*ld;
767:   DSAllocateWork_Private(ds,lwork,ld,ld);
768:   work  = ds->work;
769:   rwork = ds->rwork;
770:   ipiv  = ds->iwork;
771:   DSSwitchFormat_HEP(ds,PETSC_FALSE);

773:   /* use workspace matrix W to avoid overwriting A */
774:   DSAllocateMat_Private(ds,DS_MAT_W);
775:   A = ds->mat[DS_MAT_W];
776:   PetscMemcpy(A,ds->mat[DS_MAT_A],sizeof(PetscScalar)*ds->ld*ds->ld);

778:   /* norm of A */
779:   hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);

781:   /* norm of inv(A) */
782:   PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
783:   SlepcCheckLapackInfo("getrf",info);
784:   PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
785:   SlepcCheckLapackInfo("getri",info);
786:   hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);

788:   *cond = hn*hin;
789:   return(0);
790: #endif
791: }

793: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
794: {
795: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
797:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
798: #else
800:   PetscInt       i,j,k=ds->k;
801:   PetscScalar    *Q,*A,*R,*tau,*work;
802:   PetscBLASInt   ld,n1,n0,lwork,info;

805:   PetscBLASIntCast(ds->ld,&ld);
806:   DSAllocateWork_Private(ds,ld*ld,0,0);
807:   tau = ds->work;
808:   work = ds->work+ld;
809:   PetscBLASIntCast(ld*(ld-1),&lwork);
810:   DSAllocateMat_Private(ds,DS_MAT_W);
811:   A  = ds->mat[DS_MAT_A];
812:   Q  = ds->mat[DS_MAT_Q];
813:   R  = ds->mat[DS_MAT_W];

815:   /* copy I+alpha*A */
816:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
817:   PetscMemzero(R,ld*ld*sizeof(PetscScalar));
818:   for (i=0;i<k;i++) {
819:     Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
820:     Q[k+i*ld] = alpha*A[k+i*ld];
821:   }

823:   /* compute qr */
824:   PetscBLASIntCast(k+1,&n1);
825:   PetscBLASIntCast(k,&n0);
826:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
827:   SlepcCheckLapackInfo("geqrf",info);

829:   /* copy R from Q */
830:   for (j=0;j<k;j++)
831:     for (i=0;i<=j;i++)
832:       R[i+j*ld] = Q[i+j*ld];

834:   /* compute orthogonal matrix in Q */
835:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
836:   SlepcCheckLapackInfo("orgqr",info);

838:   /* compute the updated matrix of projected problem */
839:   for (j=0;j<k;j++)
840:     for (i=0;i<k+1;i++)
841:       A[j*ld+i] = Q[i*ld+j];
842:   alpha = -1.0/alpha;
843:   PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
844:   for (i=0;i<k;i++)
845:     A[ld*i+i] -= alpha;
846:   return(0);
847: #endif
848: }

850: PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
851: {
853:   if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
854:   else *flg = PETSC_FALSE;
855:   return(0);
856: }

858: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
859: {
861:   ds->ops->allocate      = DSAllocate_HEP;
862:   ds->ops->view          = DSView_HEP;
863:   ds->ops->vectors       = DSVectors_HEP;
864:   ds->ops->solve[0]      = DSSolve_HEP_QR;
865:   ds->ops->solve[1]      = DSSolve_HEP_MRRR;
866:   ds->ops->solve[2]      = DSSolve_HEP_DC;
867: #if !defined(PETSC_USE_COMPLEX)
868:   ds->ops->solve[3]      = DSSolve_HEP_BDC;
869: #endif
870:   ds->ops->sort          = DSSort_HEP;
871:   ds->ops->synchronize   = DSSynchronize_HEP;
872:   ds->ops->truncate      = DSTruncate_HEP;
873:   ds->ops->update        = DSUpdateExtraRow_HEP;
874:   ds->ops->cond          = DSCond_HEP;
875:   ds->ops->transrks      = DSTranslateRKS_HEP;
876:   ds->ops->hermitian     = DSHermitian_HEP;
877:   return(0);
878: }