Actual source code: lmedense.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    Routines for solving dense matrix equations, in some cases calling SLICOT
 12: */

 14: #include <slepc/private/lmeimpl.h>     /*I "slepclme.h" I*/
 15: #include <slepcblaslapack.h>

 17: /*
 18:    LMERankSVD - given a square matrix L, compute its SVD U*S*V', and determine the
 19:    numerical rank. On exit, U contains U*S and L is overwritten with V'
 20: */
 21: PetscErrorCode LMERankSVD(LME lme,PetscInt n,PetscScalar *L,PetscScalar *U,PetscInt *rank)
 22: {
 23: #if defined(PETSC_MISSING_LAPACK_GESVD)
 25:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
 26: #else
 28:   PetscInt       i,j,rk=0;
 29:   PetscScalar    *work;
 30:   PetscReal      tol,*sg,*rwork;
 31:   PetscBLASInt   n_,info,lw_;

 34:   PetscCalloc3(n,&sg,10*n,&work,5*n,&rwork);
 35:   PetscBLASIntCast(n,&n_);
 36:   lw_ = 10*n_;
 37: #if !defined (PETSC_USE_COMPLEX)
 38:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,L,&n_,sg,U,&n_,NULL,&n_,work,&lw_,&info));
 39: #else
 40:   PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,L,&n_,sg,U,&n_,NULL,&n_,work,&lw_,rwork,&info));
 41: #endif
 42:   SlepcCheckLapackInfo("gesvd",info);
 43:   tol = 10*PETSC_MACHINE_EPSILON*n*sg[0];
 44:   for (j=0;j<n;j++) {
 45:     if (sg[j]>tol) {
 46:       for (i=0;i<n;i++) U[i+j*n] *= sg[j];
 47:       rk++;
 48:     } else break;
 49:   }
 50:   *rank = rk;
 51:   PetscFree3(sg,work,rwork);
 52:   return(0);
 53: #endif
 54: }

 56: #if defined(PETSC_USE_INFO)
 57: /*
 58:    LyapunovResidual - compute the residual norm ||H*L*L'+L*L'*H'+r*r'||
 59: */
 60: static PetscErrorCode LyapunovResidual(PetscScalar *H,PetscInt m,PetscInt ldh,PetscScalar *r,PetscScalar *L,PetscInt ldl,PetscReal *res)
 61: {
 63:   PetscBLASInt   n,ld;
 64:   PetscInt       i,j;
 65:   PetscScalar    *M,*R,zero=0.0,done=1.0;

 68:   *res = 0;
 69:   PetscBLASIntCast(ldh,&ld);
 70:   PetscBLASIntCast(m,&n);
 71:   PetscMalloc2(m*m,&M,m*m,&R);

 73:   /* R = r*r' */
 74:   for (i=0;i<m;i++) {
 75:     for (j=0;j<m;j++) R[i+j*m] = r[i]*r[j];
 76:   }
 77:   /* M = H*L */
 78:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,H,&ld,L,&n,&zero,M,&n));
 79:   /* R = R+M*L' */
 80:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,M,&n,L,&n,&done,R,&n));
 81:   /* R = R+L*M' */
 82:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,L,&n,M,&n,&done,R,&n));
 83:   
 84:   *res = LAPACKlange_("F",&n,&n,R,&n,NULL);
 85:   PetscFree2(M,R);
 86:   return(0);
 87: }
 88: #endif

 90: #if defined(SLEPC_HAVE_SLICOT)
 91: /*
 92:    LyapunovFact_SLICOT - implementation used when SLICOT is available
 93: */
 94: static PetscErrorCode LyapunovChol_SLICOT(PetscScalar *H,PetscInt m,PetscInt ldh,PetscScalar *r,PetscScalar *L,PetscInt ldl,PetscReal *res)
 95: {
 96: #if defined(PETSC_MISSING_LAPACK_HSEQR)
 98:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable");
 99: #else
101:   PetscBLASInt   ilo=1,lwork,info,n,ld,ld1,ione=1;
102:   PetscInt       i,j;
103:   PetscReal      scal;
104:   PetscScalar    *Q,*W,*z,*wr,*work,zero=0.0,done=1.0,alpha,beta;
105: #if !defined(PETSC_USE_COMPLEX)
106:   PetscScalar    *wi;
107: #endif

110:   PetscBLASIntCast(ldh,&ld);
111:   PetscBLASIntCast(ldl,&ld1);
112:   PetscBLASIntCast(m,&n);
113:   PetscBLASIntCast(5*m,&lwork);

115:   /* compute the (real) Schur form of H */
116: #if !defined(PETSC_USE_COMPLEX)
117:   PetscCalloc6(m*m,&Q,m*m,&W,m,&z,m,&wr,m,&wi,5*m,&work);
118:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,H,&ld,wr,wi,Q,&n,work,&lwork,&info));
119: #else
120:   PetscCalloc5(m*m,&Q,m*m,&W,m,&z,m,&wr,5*m,&work);
121:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,H,&ld,wr,Q,&n,work,&lwork,&info));
122: #endif
123:   SlepcCheckLapackInfo("hseqr",info);
124: #if defined(PETSC_USE_DEBUG)
125:   for (i=0;i<m;i++) if (PetscRealPart(wr[i])>0.0) SETERRQ(PETSC_COMM_SELF,1,"Positive eigenvalue found, the coefficient matrix is not stable");
126: #endif

128:   /* copy r into first column of W */
129:   PetscMemcpy(W,r,m*sizeof(PetscScalar));

131:   /* solve Lyapunov equation (Hammarling) */
132:   PetscStackCallBLAS("SLICOTsb03od",SLICOTsb03od_("C","F","N",&n,&ione,H,&ld,Q,&n,W,&n,&scal,wr,wi,work,&lwork,&info));
133:   if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SLICOT subroutine SB03OD %d",(int)info);
134:   if (scal!=1.0) SETERRQ1(PETSC_COMM_SELF,1,"Current implementation cannot handle scale factor %g",scal);

136:   /* Tranpose L = W' */
137:   for (j=0;j<m;j++) {
138:     for (i=j;i<m;i++) L[i+j*ldl] = W[j+i*m];
139:   }

141:   /* resnorm = norm(H(m+1,:)*L*L'), use z = L*L(m,:)' */
142:   PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,L,&ld1,W+(m-1)*m,&ione,&zero,z,&ione));
143:   *res = 0.0;
144:   beta = H[m+(m-1)*ldh];
145:   for (j=0;j<m;j++) {
146:     alpha = beta*z[j];
147:     *res += alpha*alpha;
148:   }
149:   *res = PetscSqrtReal(*res);

151: #if !defined(PETSC_USE_COMPLEX)
152:   PetscFree6(Q,W,z,wr,wi,work);
153: #else
154:   PetscFree5(Q,W,z,wr,work);
155: #endif
156:   return(0);
157: #endif
158: }

160: #else

162: #if 0
163: /*
164:    AbsEig - given a matrix A that may be slightly indefinite (hence Cholesky fails)
165:    modify it by taking the absolute value of the eigenvalues: [U,S] = eig(A); A = U*abs(S)*U';
166: */
167: static PetscErrorCode AbsEig(PetscScalar *A,PetscInt m)
168: {
169: #if defined(PETSC_MISSING_LAPACK_SYEV) || defined(SLEPC_MISSING_LAPACK_LACPY)
171:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYEV/LACPY - Lapack routines are unavailable");
172: #else
174:   PetscInt       i,j;
175:   PetscBLASInt   n,ld,lwork,info;
176:   PetscScalar    *Q,*W,*work,a,one=1.0,zero=0.0;
177:   PetscReal      *eig,dummy;
178: #if defined(PETSC_USE_COMPLEX)
179:   PetscReal      *rwork,rdummy;
180: #endif

183:   PetscBLASIntCast(m,&n);
184:   ld = n;

186:   /* workspace query and memory allocation */
187:   lwork = -1;
188: #if defined(PETSC_USE_COMPLEX)
189:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,A,&ld,&dummy,&a,&lwork,&rdummy,&info));
190:   PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
191:   PetscMalloc5(m,&eig,m*m,&Q,m*n,&W,lwork,&work,PetscMax(1,3*m-2),&rwork);
192: #else
193:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,A,&ld,&dummy,&a,&lwork,&info));
194:   PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
195:   PetscMalloc4(m,&eig,m*m,&Q,m*n,&W,lwork,&work);
196: #endif

198:   /* compute eigendecomposition */
199:   PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L",&n,&n,A,&ld,Q,&ld));
200: #if defined(PETSC_USE_COMPLEX)
201:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
202: #else
203:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
204: #endif
205:   SlepcCheckLapackInfo("syev",info);

207:   /* W = f(Lambda)*Q' */
208:   for (i=0;i<n;i++) {
209:     for (j=0;j<n;j++) W[i+j*ld] = Q[j+i*ld]*PetscAbsScalar(eig[i]);
210:   }
211:   /* A = Q*W */
212:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,W,&ld,&zero,A,&ld));
213: #if defined(PETSC_USE_COMPLEX)
214:   PetscFree5(eig,Q,W,work,rwork);
215: #else
216:   PetscFree4(eig,Q,W,work);
217: #endif
218:   return(0);
219: #endif
220: }
221: #endif

223: /*
224:    Compute the lower Cholesky factor of A
225:  */
226: static PetscErrorCode CholeskyFactor(PetscScalar *A,PetscInt m,PetscInt lda)
227: {
228: #if defined(PETSC_MISSING_LAPACK_POTRF)
230:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable");
231: #else
233:   PetscInt       i;
234:   PetscScalar    *S;
235:   PetscBLASInt   info,n,ld;

238:   PetscBLASIntCast(m,&n);
239:   PetscBLASIntCast(lda,&ld);
240:   PetscMalloc1(m*m,&S);

242:   /* save a copy of matrix in S */
243:   for (i=0;i<m;i++) {
244:     PetscMemcpy(S+i*m,A+i*lda,m*sizeof(PetscScalar));
245:   }

247:   /* compute upper Cholesky factor in R */
248:   PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&n,A,&ld,&info));
249:   PetscLogFlops((1.0*n*n*n)/3.0);

251:   if (info) {  /* LAPACKpotrf failed, retry on diagonally perturbed matrix */
252:     for (i=0;i<m;i++) {
253:       PetscMemcpy(A+i*lda,S+i*m,m*sizeof(PetscScalar));
254:       A[i+i*lda] += 50.0*PETSC_MACHINE_EPSILON;
255:     }
256:     PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&n,A,&ld,&info));
257:     SlepcCheckLapackInfo("potrf",info);
258:     PetscLogFlops((1.0*n*n*n)/3.0);
259:   }

261:   /* Zero out entries above the diagonal */
262:   for (i=1;i<m;i++) {
263:     PetscMemzero(A+i*lda,i*sizeof(PetscScalar));
264:   }
265:   PetscFree(S);
266:   return(0);
267: #endif
268: }

270: /*
271:    LyapunovFact_LAPACK - alternative implementation when SLICOT is not available
272: */
273: static PetscErrorCode LyapunovChol_LAPACK(PetscScalar *H,PetscInt m,PetscInt ldh,PetscScalar *r,PetscScalar *L,PetscInt ldl,PetscReal *res)
274: {
275: #if defined(PETSC_MISSING_LAPACK_HSEQR) || defined(SLEPC_MISSING_LAPACK_TRSYL)
277:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEQR/TRSYL - Lapack routines are unavailable");
278: #else
280:   PetscBLASInt   ilo=1,lwork,info,n,ld,ld1,ione=1;
281:   PetscInt       i,j;
282:   PetscReal      scal,beta;
283:   PetscScalar    *Q,*C,*W,*z,*wr,*work,zero=0.0,done=1.0;
284: #if !defined(PETSC_USE_COMPLEX)
285:   PetscScalar    *wi;
286: #endif

289:   PetscBLASIntCast(ldh,&ld);
290:   PetscBLASIntCast(ldl,&ld1);
291:   PetscBLASIntCast(m,&n);
292:   lwork = n;
293:   C = L;

295:   /* compute the (real) Schur form of H */
296: #if !defined(PETSC_USE_COMPLEX)
297:   PetscMalloc6(m*m,&Q,m*m,&W,m,&z,m,&wr,m,&wi,m,&work);
298:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,H,&ld,wr,wi,Q,&n,work,&lwork,&info));
299: #else
300:   PetscMalloc5(m*m,&Q,m*m,&W,m,&z,m,&wr,m,&work);
301:   PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,H,&ld,wr,Q,&n,work,&lwork,&info));
302: #endif
303:   SlepcCheckLapackInfo("hseqr",info);
304: #if defined(PETSC_USE_DEBUG)
305:   for (i=0;i<m;i++) if (PetscRealPart(wr[i])>0.0) SETERRQ(PETSC_COMM_SELF,1,"Positive eigenvalue found, the coefficient matrix is not stable");
306: #endif

308:   /* C = z*z', z = Q'*r */
309:   PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&done,Q,&n,r,&ione,&zero,z,&ione));
310:   for (i=0;i<m;i++) {
311:     for (j=0;j<m;j++) C[i+j*ldl] = -z[i]*PetscConj(z[j]);
312:   }

314:   /* solve triangular Sylvester equation */
315:   PetscStackCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","C",&ione,&n,&n,H,&ld,H,&ld,C,&ld1,&scal,&info));
316:   SlepcCheckLapackInfo("trsyl",info);
317:   if (scal!=1.0) SETERRQ1(PETSC_COMM_SELF,1,"Current implementation cannot handle scale factor %g",scal);
318:   
319:   /* back-transform C = Q*C*Q' */
320:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,Q,&n,C,&n,&zero,W,&n));
321:   PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,W,&n,Q,&n,&zero,C,&ld1));

323:   /* resnorm = norm(H(m+1,:)*Y) */
324:   beta = PetscAbsScalar(H[m+(m-1)*ldh]);
325:   *res = beta*BLASnrm2_(&n,C+m-1,&n);

327: #if 0
328:   /* avoid problems due to (small) negative eigenvalues */
329:   AbsEig(C,m);
330: #endif

332:   /* L = chol(C,'lower') */
333:   CholeskyFactor(C,m,ldl);

335: #if !defined(PETSC_USE_COMPLEX)
336:   PetscFree6(Q,W,z,wr,wi,work);
337: #else
338:   PetscFree5(Q,W,z,wr,work);
339: #endif
340:   return(0);
341: #endif
342: }

344: #endif /* SLEPC_HAVE_SLICOT */

346: /*@C
347:    LMEDenseLyapunovFact - Computes the Cholesky factor of the solution of a
348:    dense Lyapunov equation with rank-1 right-hand side.

350:    Logically Collective on LME

352:    Input Parameters:
353: +  lme - linear matrix equation solver context
354: .  H   - coefficient matrix
355: .  m   - problem size
356: .  ldh - leading dimension of H
357: .  r   - right-hand side vector
358: -  ldl - leading dimension of L

360:    Output Parameter:
361: +  L   - Cholesky factor of the solution
362: -  res - residual norm

364:    Note:
365:    The Lyapunov equation has the form H*X + X*H' = -r*r', where H represents
366:    the leading mxm submatrix of argument H, and the solution X = L*L'.

368:    H is assumed to be in upper Hessenberg form, with dimensions (m+1)xm.
369:    The last row is used to compute the residual norm, assuming H and r come
370:    from the projection onto an Arnoldi basis.

372:    Level: developer

374: .seealso: LMESolve()
375: @*/
376: PetscErrorCode LMEDenseLyapunovChol(LME lme,PetscScalar *H,PetscInt m,PetscInt ldh,PetscScalar *r,PetscScalar *L,PetscInt ldl,PetscReal *res)
377: {
379: #if defined(PETSC_USE_INFO)
380:   PetscInt       i;
381:   PetscScalar    *Hcopy;
382:   PetscReal      error;
383: #endif

386: #if defined(PETSC_USE_INFO)
387:   if (PetscLogPrintInfo) {
388:     PetscMalloc1(m*m,&Hcopy);
389:     for (i=0;i<m;i++) {
390:       PetscMemcpy(Hcopy+i*m,H+i*ldh,m*sizeof(PetscScalar));
391:     }
392:   }
393: #endif

395: #if defined(SLEPC_HAVE_SLICOT)
396:   LyapunovChol_SLICOT(H,m,ldh,r,L,ldl,res);
397: #else
398:   LyapunovChol_LAPACK(H,m,ldh,r,L,ldl,res);
399: #endif

401: #if defined(PETSC_USE_INFO)
402:   if (PetscLogPrintInfo) {
403:     LyapunovResidual(Hcopy,m,m,r,L,ldl,&error);
404:     PetscInfo1(lme,"Residual norm of dense Lyapunov equation = %g\n",error);
405:     PetscFree(Hcopy);
406:   }
407: #endif
408:   return(0);
409: }