Actual source code: lmedense.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Routines for solving dense matrix equations, in some cases calling SLICOT
12: */
14: #include <slepc/private/lmeimpl.h> /*I "slepclme.h" I*/
15: #include <slepcblaslapack.h>
17: /*
18: LMERankSVD - given a square matrix L, compute its SVD U*S*V', and determine the
19: numerical rank. On exit, U contains U*S and L is overwritten with V'
20: */
21: PetscErrorCode LMERankSVD(LME lme,PetscInt n,PetscScalar *L,PetscScalar *U,PetscInt *rank)
22: {
23: #if defined(PETSC_MISSING_LAPACK_GESVD)
25: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GESVD - Lapack routine is unavailable");
26: #else
28: PetscInt i,j,rk=0;
29: PetscScalar *work;
30: PetscReal tol,*sg,*rwork;
31: PetscBLASInt n_,info,lw_;
34: PetscCalloc3(n,&sg,10*n,&work,5*n,&rwork);
35: PetscBLASIntCast(n,&n_);
36: lw_ = 10*n_;
37: #if !defined (PETSC_USE_COMPLEX)
38: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,L,&n_,sg,U,&n_,NULL,&n_,work,&lw_,&info));
39: #else
40: PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("S","O",&n_,&n_,L,&n_,sg,U,&n_,NULL,&n_,work,&lw_,rwork,&info));
41: #endif
42: SlepcCheckLapackInfo("gesvd",info);
43: tol = 10*PETSC_MACHINE_EPSILON*n*sg[0];
44: for (j=0;j<n;j++) {
45: if (sg[j]>tol) {
46: for (i=0;i<n;i++) U[i+j*n] *= sg[j];
47: rk++;
48: } else break;
49: }
50: *rank = rk;
51: PetscFree3(sg,work,rwork);
52: return(0);
53: #endif
54: }
56: #if defined(PETSC_USE_INFO)
57: /*
58: LyapunovResidual - compute the residual norm ||H*L*L'+L*L'*H'+r*r'||
59: */
60: static PetscErrorCode LyapunovResidual(PetscScalar *H,PetscInt m,PetscInt ldh,PetscScalar *r,PetscScalar *L,PetscInt ldl,PetscReal *res)
61: {
63: PetscBLASInt n,ld;
64: PetscInt i,j;
65: PetscScalar *M,*R,zero=0.0,done=1.0;
68: *res = 0;
69: PetscBLASIntCast(ldh,&ld);
70: PetscBLASIntCast(m,&n);
71: PetscMalloc2(m*m,&M,m*m,&R);
73: /* R = r*r' */
74: for (i=0;i<m;i++) {
75: for (j=0;j<m;j++) R[i+j*m] = r[i]*r[j];
76: }
77: /* M = H*L */
78: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,H,&ld,L,&n,&zero,M,&n));
79: /* R = R+M*L' */
80: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,M,&n,L,&n,&done,R,&n));
81: /* R = R+L*M' */
82: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,L,&n,M,&n,&done,R,&n));
83:
84: *res = LAPACKlange_("F",&n,&n,R,&n,NULL);
85: PetscFree2(M,R);
86: return(0);
87: }
88: #endif
90: #if defined(SLEPC_HAVE_SLICOT)
91: /*
92: LyapunovFact_SLICOT - implementation used when SLICOT is available
93: */
94: static PetscErrorCode LyapunovChol_SLICOT(PetscScalar *H,PetscInt m,PetscInt ldh,PetscScalar *r,PetscScalar *L,PetscInt ldl,PetscReal *res)
95: {
96: #if defined(PETSC_MISSING_LAPACK_HSEQR)
98: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEQR - Lapack routine is unavailable");
99: #else
101: PetscBLASInt ilo=1,lwork,info,n,ld,ld1,ione=1;
102: PetscInt i,j;
103: PetscReal scal;
104: PetscScalar *Q,*W,*z,*wr,*work,zero=0.0,done=1.0,alpha,beta;
105: #if !defined(PETSC_USE_COMPLEX)
106: PetscScalar *wi;
107: #endif
110: PetscBLASIntCast(ldh,&ld);
111: PetscBLASIntCast(ldl,&ld1);
112: PetscBLASIntCast(m,&n);
113: PetscBLASIntCast(5*m,&lwork);
115: /* compute the (real) Schur form of H */
116: #if !defined(PETSC_USE_COMPLEX)
117: PetscCalloc6(m*m,&Q,m*m,&W,m,&z,m,&wr,m,&wi,5*m,&work);
118: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,H,&ld,wr,wi,Q,&n,work,&lwork,&info));
119: #else
120: PetscCalloc5(m*m,&Q,m*m,&W,m,&z,m,&wr,5*m,&work);
121: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,H,&ld,wr,Q,&n,work,&lwork,&info));
122: #endif
123: SlepcCheckLapackInfo("hseqr",info);
124: #if defined(PETSC_USE_DEBUG)
125: for (i=0;i<m;i++) if (PetscRealPart(wr[i])>0.0) SETERRQ(PETSC_COMM_SELF,1,"Positive eigenvalue found, the coefficient matrix is not stable");
126: #endif
128: /* copy r into first column of W */
129: PetscMemcpy(W,r,m*sizeof(PetscScalar));
131: /* solve Lyapunov equation (Hammarling) */
132: PetscStackCallBLAS("SLICOTsb03od",SLICOTsb03od_("C","F","N",&n,&ione,H,&ld,Q,&n,W,&n,&scal,wr,wi,work,&lwork,&info));
133: if (info) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SLICOT subroutine SB03OD %d",(int)info);
134: if (scal!=1.0) SETERRQ1(PETSC_COMM_SELF,1,"Current implementation cannot handle scale factor %g",scal);
136: /* Tranpose L = W' */
137: for (j=0;j<m;j++) {
138: for (i=j;i<m;i++) L[i+j*ldl] = W[j+i*m];
139: }
141: /* resnorm = norm(H(m+1,:)*L*L'), use z = L*L(m,:)' */
142: PetscStackCallBLAS("BLASgemv",BLASgemv_("N",&n,&n,&done,L,&ld1,W+(m-1)*m,&ione,&zero,z,&ione));
143: *res = 0.0;
144: beta = H[m+(m-1)*ldh];
145: for (j=0;j<m;j++) {
146: alpha = beta*z[j];
147: *res += alpha*alpha;
148: }
149: *res = PetscSqrtReal(*res);
151: #if !defined(PETSC_USE_COMPLEX)
152: PetscFree6(Q,W,z,wr,wi,work);
153: #else
154: PetscFree5(Q,W,z,wr,work);
155: #endif
156: return(0);
157: #endif
158: }
160: #else
162: #if 0
163: /*
164: AbsEig - given a matrix A that may be slightly indefinite (hence Cholesky fails)
165: modify it by taking the absolute value of the eigenvalues: [U,S] = eig(A); A = U*abs(S)*U';
166: */
167: static PetscErrorCode AbsEig(PetscScalar *A,PetscInt m)
168: {
169: #if defined(PETSC_MISSING_LAPACK_SYEV) || defined(SLEPC_MISSING_LAPACK_LACPY)
171: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYEV/LACPY - Lapack routines are unavailable");
172: #else
174: PetscInt i,j;
175: PetscBLASInt n,ld,lwork,info;
176: PetscScalar *Q,*W,*work,a,one=1.0,zero=0.0;
177: PetscReal *eig,dummy;
178: #if defined(PETSC_USE_COMPLEX)
179: PetscReal *rwork,rdummy;
180: #endif
183: PetscBLASIntCast(m,&n);
184: ld = n;
186: /* workspace query and memory allocation */
187: lwork = -1;
188: #if defined(PETSC_USE_COMPLEX)
189: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,A,&ld,&dummy,&a,&lwork,&rdummy,&info));
190: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
191: PetscMalloc5(m,&eig,m*m,&Q,m*n,&W,lwork,&work,PetscMax(1,3*m-2),&rwork);
192: #else
193: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,A,&ld,&dummy,&a,&lwork,&info));
194: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
195: PetscMalloc4(m,&eig,m*m,&Q,m*n,&W,lwork,&work);
196: #endif
198: /* compute eigendecomposition */
199: PetscStackCallBLAS("LAPACKlacpy",LAPACKlacpy_("L",&n,&n,A,&ld,Q,&ld));
200: #if defined(PETSC_USE_COMPLEX)
201: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
202: #else
203: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
204: #endif
205: SlepcCheckLapackInfo("syev",info);
207: /* W = f(Lambda)*Q' */
208: for (i=0;i<n;i++) {
209: for (j=0;j<n;j++) W[i+j*ld] = Q[j+i*ld]*PetscAbsScalar(eig[i]);
210: }
211: /* A = Q*W */
212: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&one,Q,&ld,W,&ld,&zero,A,&ld));
213: #if defined(PETSC_USE_COMPLEX)
214: PetscFree5(eig,Q,W,work,rwork);
215: #else
216: PetscFree4(eig,Q,W,work);
217: #endif
218: return(0);
219: #endif
220: }
221: #endif
223: /*
224: Compute the lower Cholesky factor of A
225: */
226: static PetscErrorCode CholeskyFactor(PetscScalar *A,PetscInt m,PetscInt lda)
227: {
228: #if defined(PETSC_MISSING_LAPACK_POTRF)
230: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF - Lapack routine is unavailable");
231: #else
233: PetscInt i;
234: PetscScalar *S;
235: PetscBLASInt info,n,ld;
238: PetscBLASIntCast(m,&n);
239: PetscBLASIntCast(lda,&ld);
240: PetscMalloc1(m*m,&S);
242: /* save a copy of matrix in S */
243: for (i=0;i<m;i++) {
244: PetscMemcpy(S+i*m,A+i*lda,m*sizeof(PetscScalar));
245: }
247: /* compute upper Cholesky factor in R */
248: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&n,A,&ld,&info));
249: PetscLogFlops((1.0*n*n*n)/3.0);
251: if (info) { /* LAPACKpotrf failed, retry on diagonally perturbed matrix */
252: for (i=0;i<m;i++) {
253: PetscMemcpy(A+i*lda,S+i*m,m*sizeof(PetscScalar));
254: A[i+i*lda] += 50.0*PETSC_MACHINE_EPSILON;
255: }
256: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&n,A,&ld,&info));
257: SlepcCheckLapackInfo("potrf",info);
258: PetscLogFlops((1.0*n*n*n)/3.0);
259: }
261: /* Zero out entries above the diagonal */
262: for (i=1;i<m;i++) {
263: PetscMemzero(A+i*lda,i*sizeof(PetscScalar));
264: }
265: PetscFree(S);
266: return(0);
267: #endif
268: }
270: /*
271: LyapunovFact_LAPACK - alternative implementation when SLICOT is not available
272: */
273: static PetscErrorCode LyapunovChol_LAPACK(PetscScalar *H,PetscInt m,PetscInt ldh,PetscScalar *r,PetscScalar *L,PetscInt ldl,PetscReal *res)
274: {
275: #if defined(PETSC_MISSING_LAPACK_HSEQR) || defined(SLEPC_MISSING_LAPACK_TRSYL)
277: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"HSEQR/TRSYL - Lapack routines are unavailable");
278: #else
280: PetscBLASInt ilo=1,lwork,info,n,ld,ld1,ione=1;
281: PetscInt i,j;
282: PetscReal scal,beta;
283: PetscScalar *Q,*C,*W,*z,*wr,*work,zero=0.0,done=1.0;
284: #if !defined(PETSC_USE_COMPLEX)
285: PetscScalar *wi;
286: #endif
289: PetscBLASIntCast(ldh,&ld);
290: PetscBLASIntCast(ldl,&ld1);
291: PetscBLASIntCast(m,&n);
292: lwork = n;
293: C = L;
295: /* compute the (real) Schur form of H */
296: #if !defined(PETSC_USE_COMPLEX)
297: PetscMalloc6(m*m,&Q,m*m,&W,m,&z,m,&wr,m,&wi,m,&work);
298: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,H,&ld,wr,wi,Q,&n,work,&lwork,&info));
299: #else
300: PetscMalloc5(m*m,&Q,m*m,&W,m,&z,m,&wr,m,&work);
301: PetscStackCallBLAS("LAPACKhseqr",LAPACKhseqr_("S","I",&n,&ilo,&n,H,&ld,wr,Q,&n,work,&lwork,&info));
302: #endif
303: SlepcCheckLapackInfo("hseqr",info);
304: #if defined(PETSC_USE_DEBUG)
305: for (i=0;i<m;i++) if (PetscRealPart(wr[i])>0.0) SETERRQ(PETSC_COMM_SELF,1,"Positive eigenvalue found, the coefficient matrix is not stable");
306: #endif
308: /* C = z*z', z = Q'*r */
309: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&done,Q,&n,r,&ione,&zero,z,&ione));
310: for (i=0;i<m;i++) {
311: for (j=0;j<m;j++) C[i+j*ldl] = -z[i]*PetscConj(z[j]);
312: }
314: /* solve triangular Sylvester equation */
315: PetscStackCallBLAS("LAPACKtrsyl",LAPACKtrsyl_("N","C",&ione,&n,&n,H,&ld,H,&ld,C,&ld1,&scal,&info));
316: SlepcCheckLapackInfo("trsyl",info);
317: if (scal!=1.0) SETERRQ1(PETSC_COMM_SELF,1,"Current implementation cannot handle scale factor %g",scal);
318:
319: /* back-transform C = Q*C*Q' */
320: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&n,&n,&done,Q,&n,C,&n,&zero,W,&n));
321: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&n,&n,&done,W,&n,Q,&n,&zero,C,&ld1));
323: /* resnorm = norm(H(m+1,:)*Y) */
324: beta = PetscAbsScalar(H[m+(m-1)*ldh]);
325: *res = beta*BLASnrm2_(&n,C+m-1,&n);
327: #if 0
328: /* avoid problems due to (small) negative eigenvalues */
329: AbsEig(C,m);
330: #endif
332: /* L = chol(C,'lower') */
333: CholeskyFactor(C,m,ldl);
335: #if !defined(PETSC_USE_COMPLEX)
336: PetscFree6(Q,W,z,wr,wi,work);
337: #else
338: PetscFree5(Q,W,z,wr,work);
339: #endif
340: return(0);
341: #endif
342: }
344: #endif /* SLEPC_HAVE_SLICOT */
346: /*@C
347: LMEDenseLyapunovFact - Computes the Cholesky factor of the solution of a
348: dense Lyapunov equation with rank-1 right-hand side.
350: Logically Collective on LME
352: Input Parameters:
353: + lme - linear matrix equation solver context
354: . H - coefficient matrix
355: . m - problem size
356: . ldh - leading dimension of H
357: . r - right-hand side vector
358: - ldl - leading dimension of L
360: Output Parameter:
361: + L - Cholesky factor of the solution
362: - res - residual norm
364: Note:
365: The Lyapunov equation has the form H*X + X*H' = -r*r', where H represents
366: the leading mxm submatrix of argument H, and the solution X = L*L'.
368: H is assumed to be in upper Hessenberg form, with dimensions (m+1)xm.
369: The last row is used to compute the residual norm, assuming H and r come
370: from the projection onto an Arnoldi basis.
372: Level: developer
374: .seealso: LMESolve()
375: @*/
376: PetscErrorCode LMEDenseLyapunovChol(LME lme,PetscScalar *H,PetscInt m,PetscInt ldh,PetscScalar *r,PetscScalar *L,PetscInt ldl,PetscReal *res)
377: {
379: #if defined(PETSC_USE_INFO)
380: PetscInt i;
381: PetscScalar *Hcopy;
382: PetscReal error;
383: #endif
386: #if defined(PETSC_USE_INFO)
387: if (PetscLogPrintInfo) {
388: PetscMalloc1(m*m,&Hcopy);
389: for (i=0;i<m;i++) {
390: PetscMemcpy(Hcopy+i*m,H+i*ldh,m*sizeof(PetscScalar));
391: }
392: }
393: #endif
395: #if defined(SLEPC_HAVE_SLICOT)
396: LyapunovChol_SLICOT(H,m,ldh,r,L,ldl,res);
397: #else
398: LyapunovChol_LAPACK(H,m,ldh,r,L,ldl,res);
399: #endif
401: #if defined(PETSC_USE_INFO)
402: if (PetscLogPrintInfo) {
403: LyapunovResidual(Hcopy,m,m,r,L,ldl,&error);
404: PetscInfo1(lme,"Residual norm of dense Lyapunov equation = %g\n",error);
405: PetscFree(Hcopy);
406: }
407: #endif
408: return(0);
409: }