Actual source code: fnlog.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Logarithm function log(x)
12: */
14: #include <slepc/private/fnimpl.h> /*I "slepcfn.h" I*/
15: #include <slepcblaslapack.h>
17: PetscErrorCode FNEvaluateFunction_Log(FN fn,PetscScalar x,PetscScalar *y)
18: {
20: #if !defined(PETSC_USE_COMPLEX)
21: if (x<0.0) SETERRQ(PETSC_COMM_SELF,1,"Function not defined in the requested value");
22: #endif
23: *y = PetscLogScalar(x);
24: return(0);
25: }
27: PetscErrorCode FNEvaluateDerivative_Log(FN fn,PetscScalar x,PetscScalar *y)
28: {
30: if (x==0.0) SETERRQ(PETSC_COMM_SELF,1,"Derivative not defined in the requested value");
31: #if !defined(PETSC_USE_COMPLEX)
32: if (x<0.0) SETERRQ(PETSC_COMM_SELF,1,"Derivative not defined in the requested value");
33: #endif
34: *y = 1.0/x;
35: return(0);
36: }
38: /*
39: Block structure of a quasitriangular matrix T. Returns a list of n-1 numbers, where
40: structure(j) encodes the block type of the j:j+1,j:j+1 diagonal block as one of:
41: 0 - not the start of a block
42: 1 - start of a 2x2 triangular block
43: 2 - start of a 2x2 quasi-triangular (full) block
44: */
45: static PetscErrorCode qtri_struct(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscInt *structure)
46: {
47: PetscBLASInt j;
50: if (n==1) return(0);
51: else if (n==2) {
52: structure[0] = (T[1]==0.0)? 1: 2;
53: return(0);
54: }
55: j = 0;
56: while (j<n-2) {
57: if (T[j+1+j*ld] != 0.0) { /* Start of a 2x2 full block */
58: structure[j++] = 2;
59: structure[j++] = 0;
60: } else if (T[j+1+j*ld] == 0.0 && T[j+2+(j+1)*ld] == 0.0) { /* Start of a 2x2 triangular block */
61: structure[j++] = 1;
62: } else { /* Next block must start a 2x2 full block */
63: structure[j++] = 0;
64: }
65: }
66: if (T[n-1+(n-2)*ld] != 0.0) { /* 2x2 full block at the end */
67: structure[n-2] = 2;
68: } else if (structure[n-3] == 0 || structure[n-3] == 1) {
69: structure[n-2] = 1;
70: }
71: return(0);
72: }
74: /*
75: Compute scaling parameter (s) and order of Pade approximant (m).
76: wr,wi is overwritten. Required workspace is 3*n*n.
77: On output, Troot contains the sth square root of T.
78: */
79: static PetscErrorCode logm_params(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscScalar *wr,PetscScalar *wi,PetscInt maxroots,PetscInt *s,PetscInt *m,PetscScalar *Troot,PetscScalar *work)
80: {
81: PetscErrorCode ierr;
82: PetscInt i,j,k,p,s0;
83: PetscReal inrm,eta,a2,a3,a4,d2,d3,d4,d5;
84: PetscScalar *TrootmI=work+2*n*n;
85: PetscBool foundm=PETSC_FALSE,more;
86: PetscRandom rand;
87: const PetscReal xvals[] = { 1.586970738772063e-005, 2.313807884242979e-003, 1.938179313533253e-002,
88: 6.209171588994762e-002, 1.276404810806775e-001, 2.060962623452836e-001, 2.879093714241194e-001 };
89: const PetscInt mmax=sizeof(xvals)/sizeof(xvals[0]);
92: PetscRandomCreate(PETSC_COMM_SELF,&rand);
93: /* get initial s0 so that T^(1/2^s0) < xvals(mmax) */
94: *s = 0;
95: do {
96: inrm = SlepcAbsEigenvalue(wr[0]-1.0,wi[0]);
97: for (i=1;i<n;i++) inrm = PetscMax(inrm,SlepcAbsEigenvalue(wr[i]-1.0,wi[i]));
98: if (inrm < xvals[mmax-1]) break;
99: for (i=0;i<n;i++) {
100: #if defined(PETSC_USE_COMPLEX)
101: wr[i] = PetscSqrtScalar(wr[i]);
102: #else
103: #if defined(PETSC_HAVE_COMPLEX)
104: PetscComplex z = PetscSqrtComplex(wr[i]+wi[i]*PETSC_i);
105: wr[i] = PetscRealPartComplex(z);
106: wi[i] = PetscImaginaryPartComplex(z);
107: #else
108: SETERRQ(PETSC_COMM_SELF,1,"This operation requires a compiler with C99 or C++ complex support");
109: #endif
110: #endif
111: }
112: *s = *s + 1;
113: } while (*s<maxroots);
114: s0 = *s;
115: if (*s == maxroots) { PetscInfo(NULL,"Too many matrix square roots\n"); }
117: /* Troot = T */
118: for (j=0;j<n;j++) {
119: PetscMemcpy(Troot+j*ld,T+j*ld,PetscMin(j+2,n)*sizeof(PetscScalar));
120: }
121: for (k=1;k<=PetscMin(*s,maxroots);k++) {
122: SlepcSqrtmSchur(n,Troot,ld,PETSC_FALSE);
123: }
124: /* Compute value of s and m needed */
125: /* TrootmI = Troot - I */
126: for (j=0;j<n;j++) {
127: PetscMemcpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)*sizeof(PetscScalar));
128: TrootmI[j+j*ld] -= 1.0;
129: }
130: SlepcNormAm(n,TrootmI,2,work,rand,&d2);
131: d2 = PetscPowReal(d2,1.0/2.0);
132: SlepcNormAm(n,TrootmI,3,work,rand,&d3);
133: d3 = PetscPowReal(d3,1.0/3.0);
134: a2 = PetscMax(d2,d3);
135: if (a2 <= xvals[1]) {
136: *m = (a2 <= xvals[0])? 1: 2;
137: foundm = PETSC_TRUE;
138: }
139: p = 0;
140: while (!foundm) {
141: more = PETSC_FALSE; /* More norm checks needed */
142: if (*s > s0) {
143: SlepcNormAm(n,TrootmI,3,work,rand,&d3);
144: d3 = PetscPowReal(d3,1.0/3.0);
145: }
146: SlepcNormAm(n,TrootmI,4,work,rand,&d4);
147: d4 = PetscPowReal(d4,1.0/4.0);
148: a3 = PetscMax(d3,d4);
149: if (a3 <= xvals[mmax-1]) {
150: for (j=2;j<mmax;j++) if (a3 <= xvals[j]) break;
151: if (j <= 5) {
152: *m = j+1;
153: break;
154: } else if (a3/2.0 <= xvals[4] && p < 2) {
155: more = PETSC_TRUE;
156: p = p + 1;
157: }
158: }
159: if (!more) {
160: SlepcNormAm(n,TrootmI,5,work,rand,&d5);
161: d5 = PetscPowReal(d5,1.0/5.0);
162: a4 = PetscMax(d4,d5);
163: eta = PetscMin(a3,a4);
164: if (eta <= xvals[mmax-1]) {
165: for (j=5;j<mmax;j++) if (eta <= xvals[j]) break;
166: *m = j + 1;
167: break;
168: }
169: }
170: if (*s == maxroots) {
171: PetscInfo(NULL,"Too many matrix square roots\n");
172: *m = mmax; /* No good value found so take largest */
173: break;
174: }
175: SlepcSqrtmSchur(n,Troot,ld,PETSC_FALSE);
176: /* TrootmI = Troot - I */
177: for (j=0;j<n;j++) {
178: PetscMemcpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n)*sizeof(PetscScalar));
179: TrootmI[j+j*ld] -= 1.0;
180: }
181: *s = *s + 1;
182: }
183: PetscRandomDestroy(&rand);
184: return(0);
185: }
187: /*
188: Computes a^(1/2^s) - 1 accurately, avoiding subtractive cancellation
189: */
190: static PetscScalar sqrt_obo(PetscScalar a,PetscInt s)
191: {
192: PetscScalar val,z0,r;
193: PetscReal angle;
194: PetscInt i,n0;
197: if (s == 0) val = a-1.0;
198: else {
199: n0 = s;
200: angle = PetscAtan2Real(PetscImaginaryPart(a),PetscRealPart(a));
201: if (angle >= PETSC_PI/2.0) {
202: a = PetscSqrtScalar(a);
203: n0 = s - 1;
204: }
205: z0 = a - 1.0;
206: a = PetscSqrtScalar(a);
207: r = 1.0 + a;
208: for (i=0;i<n0-1;i++) {
209: a = PetscSqrtScalar(a);
210: r = r*(1.0+a);
211: }
212: val = z0/r;
213: }
214: PetscFunctionReturn(val);
215: }
217: /*
218: Square root of 2x2 matrix T from block diagonal of Schur form. Overwrites T.
219: */
220: static PetscErrorCode sqrtm_tbt(PetscScalar *T)
221: {
222: PetscScalar t11,t12,t21,t22,r11,r22;
223: #if !defined(PETSC_USE_COMPLEX)
224: PetscScalar mu;
225: #endif
228: t11 = T[0]; t21 = T[1]; t12 = T[2]; t22 = T[3];
229: if (t21 != 0.0) {
230: /* Compute square root of 2x2 quasitriangular block */
231: /* The algorithm assumes the special structure of real Schur form */
232: #if defined(PETSC_USE_COMPLEX)
233: SETERRQ(PETSC_COMM_SELF,1,"Should not reach this line in complex scalars");
234: #else
235: mu = PetscSqrtReal(-t21*t12);
236: if (t11 > 0.0) r11 = PetscSqrtReal((t11+SlepcAbsEigenvalue(t11,mu))/2.0);
237: else r11 = mu / PetscSqrtReal(2.0*(-t11+SlepcAbsEigenvalue(t11,mu)));
238: T[0] = r11;
239: T[1] = t21/(2.0*r11);
240: T[2] = t12/(2.0*r11);
241: T[3] = r11;
242: #endif
243: } else {
244: /* Compute square root of 2x2 upper triangular block */
245: r11 = PetscSqrtScalar(t11);
246: r22 = PetscSqrtScalar(t22);
247: T[0] = r11;
248: T[2] = t12/(r11+r22);
249: T[3] = r22;
250: }
251: return(0);
252: }
254: /*
255: Inverse hyperbolic tangent of x
256: */
257: PETSC_STATIC_INLINE PetscScalar myatanh(PetscScalar x)
258: {
259: PetscScalar t;
262: #if !defined(PETSC_USE_COMPLEX)
263: if (x<=-1.0 || x>=1.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"x should be in (-1,1)");
264: t = 0.5*PetscLogScalar((1.0+x)/(1.0-x));
265: #else
266: t = 0.5*PetscLogScalar(1.0+x)-0.5*PetscLogScalar(1.0-x);
267: #endif
268: PetscFunctionReturn(t);
269: }
271: #if defined(PETSC_USE_COMPLEX)
272: /*
273: Unwinding number of z
274: */
275: PETSC_STATIC_INLINE PetscReal unwinding(PetscScalar z)
276: {
277: PetscReal u;
280: u = PetscCeilReal((PetscImaginaryPart(z)-PETSC_PI)/(2.0*PETSC_PI));
281: PetscFunctionReturn(u);
282: }
283: #endif
285: /*
286: Power of 2-by-2 upper triangular matrix A.
287: Returns the (1,2) element of the pth power of A, where p is an arbitrary real number
288: */
289: static PetscScalar powerm2by2(PetscScalar A11,PetscScalar A22,PetscScalar A12,PetscReal p)
290: {
291: PetscScalar a1,a2,a1p,a2p,loga1,loga2,w,dd,x12;
294: a1 = A11;
295: a2 = A22;
296: if (a1 == a2) {
297: x12 = p*A12*PetscPowScalarReal(a1,p-1);
298: } else if (PetscAbsScalar(a1) < 0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2) < 0.5*PetscAbsScalar(a1)) {
299: a1p = PetscPowScalarReal(a1,p);
300: a2p = PetscPowScalarReal(a2,p);
301: x12 = A12*(a2p-a1p)/(a2-a1);
302: } else { /* Close eigenvalues */
303: loga1 = PetscLogScalar(a1);
304: loga2 = PetscLogScalar(a2);
305: w = myatanh((a2-a1)/(a2+a1));
306: #if defined(PETSC_USE_COMPLEX)
307: w += PETSC_i*PETSC_PI*unwinding(loga2-loga1);
308: #endif
309: dd = 2.0*PetscExpScalar(p*(loga1+loga2)/2.0)*PetscSinhScalar(p*w)/(a2-a1);
310: x12 = A12*dd;
311: }
312: PetscFunctionReturn(x12);
313: }
315: /*
316: Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
317: */
318: static PetscErrorCode recompute_diag_blocks_sqrt(PetscBLASInt n,PetscScalar *Troot,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct,PetscInt s)
319: {
321: PetscScalar a,A[4],P[4],M[4],Z0[4],det;
322: PetscInt i,j,last_block=0;
325: for (j=0;j<n-1;j++) {
326: switch (blockStruct[j]) {
327: case 0: /* Not start of a block */
328: if (last_block != 0) {
329: last_block = 0;
330: } else { /* In a 1x1 block */
331: a = T[j+j*ld];
332: Troot[j+j*ld] = sqrt_obo(a,s);
333: }
334: break;
335: default: /* In a 2x2 block */
336: last_block = blockStruct[j];
337: if (s == 0) {
338: Troot[j+j*ld] = T[j+j*ld]-1.0;
339: Troot[j+1+j*ld] = T[j+1+j*ld];
340: Troot[j+(j+1)*ld] = T[j+(j+1)*ld];
341: Troot[j+1+(j+1)*ld] = T[j+1+(j+1)*ld]-1.0;
342: continue;
343: }
344: A[0] = T[j+j*ld]; A[1] = T[j+1+j*ld]; A[2] = T[j+(j+1)*ld]; A[3] = T[j+1+(j+1)*ld];
345: sqrtm_tbt(A);
346: /* Z0 = A - I */
347: Z0[0] = A[0]-1.0; Z0[1] = A[1]; Z0[2] = A[2]; Z0[3] = A[3]-1.0;
348: if (s == 1) {
349: Troot[j+j*ld] = Z0[0];
350: Troot[j+1+j*ld] = Z0[1];
351: Troot[j+(j+1)*ld] = Z0[2];
352: Troot[j+1+(j+1)*ld] = Z0[3];
353: continue;
354: }
355: sqrtm_tbt(A);
356: /* P = A + I */
357: P[0] = A[0]+1.0; P[1] = A[1]; P[2] = A[2]; P[3] = A[3]+1.0;
358: for (i=0;i<s-2;i++) {
359: sqrtm_tbt(A);
360: /* P = P*(I + A) */
361: M[0] = P[0]*(A[0]+1.0)+P[2]*A[1];
362: M[1] = P[1]*(A[0]+1.0)+P[3]*A[1];
363: M[2] = P[0]*A[2]+P[2]*(A[3]+1.0);
364: M[3] = P[1]*A[2]+P[3]*(A[3]+1.0);
365: P[0] = M[0]; P[1] = M[1]; P[2] = M[2]; P[3] = M[3];
366: }
367: /* Troot(j:j+1,j:j+1) = Z0 / P (via Cramer) */
368: det = P[0]*P[3]-P[1]*P[2];
369: Troot[j+j*ld] = (Z0[0]*P[3]-P[1]*Z0[2])/det;
370: Troot[j+(j+1)*ld] = (P[0]*Z0[2]-Z0[0]*P[2])/det;
371: Troot[j+1+j*ld] = (Z0[1]*P[3]-P[1]*Z0[3])/det;
372: Troot[j+1+(j+1)*ld] = (P[0]*Z0[3]-Z0[1]*P[2])/det;
373: /* If block is upper triangular recompute the (1,2) element.
374: Skip when T(j,j) or T(j+1,j+1) < 0 since the implementation of atanh is nonstandard */
375: if (T[j+1+j*ld]==0.0 && PetscRealPart(T[j+j*ld])>=0.0 && PetscRealPart(T[j+1+(j+1)*ld])>=0.0) {
376: Troot[j+(j+1)*ld] = powerm2by2(T[j+j*ld],T[j+1+(j+1)*ld],T[j+(j+1)*ld],1.0/PetscPowInt(2,s));
377: }
378: }
379: }
380: /* If last diagonal entry is not in a block it will have been missed */
381: if (blockStruct[n-2] == 0) {
382: a = T[n-1+(n-1)*ld];
383: Troot[n-1+(n-1)*ld] = sqrt_obo(a,s);
384: }
385: return(0);
386: }
388: /*
389: Nodes x and weights w for n-point Gauss-Legendre quadrature (Q is n*n workspace)
391: G. H. Golub and J. H. Welsch, Calculation of Gauss quadrature
392: rules, Math. Comp., 23(106):221-230, 1969.
393: */
394: static PetscErrorCode gauss_legendre(PetscBLASInt n,PetscScalar *x,PetscScalar *w,PetscScalar *Q)
395: {
396: #if defined(PETSC_MISSING_LAPACK_SYEV)
398: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYEV - Lapack routine is unavailable");
399: #else
401: PetscScalar v,a,*work;
402: PetscReal *eig,dummy;
403: PetscBLASInt k,ld=n,lwork,info;
404: #if defined(PETSC_USE_COMPLEX)
405: PetscReal *rwork,rdummy;
406: #endif
409: PetscMemzero(Q,n*n*sizeof(PetscScalar));
410: for (k=1;k<n;k++) {
411: v = k/PetscSqrtReal(4.0*k*k-1.0);
412: Q[k+(k-1)*n] = v;
413: Q[(k-1)+k*n] = v;
414: }
416: /* workspace query and memory allocation */
417: lwork = -1;
418: #if defined(PETSC_USE_COMPLEX)
419: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&rdummy,&info));
420: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
421: PetscMalloc3(n,&eig,lwork,&work,PetscMax(1,3*n-2),&rwork);
422: #else
423: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&info));
424: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
425: PetscMalloc2(n,&eig,lwork,&work);
426: #endif
428: /* compute eigendecomposition */
429: #if defined(PETSC_USE_COMPLEX)
430: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
431: #else
432: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
433: #endif
434: SlepcCheckLapackInfo("syev",info);
436: for (k=0;k<n;k++) {
437: x[k] = eig[k];
438: w[k] = 2.0*Q[k*n]*Q[k*n];
439: }
440: #if defined(PETSC_USE_COMPLEX)
441: PetscFree3(eig,work,rwork);
442: #else
443: PetscFree2(eig,work);
444: #endif
445: PetscLogFlops(9.0*n*n*n+2.0*n*n*n);
446: return(0);
447: #endif
448: }
450: /*
451: Pade approximation to log(1 + T) via partial fractions
452: */
453: static PetscErrorCode pade_approx(PetscBLASInt n,PetscScalar *T,PetscScalar *L,PetscBLASInt ld,PetscInt m,PetscScalar *work)
454: {
456: PetscScalar *K,*W,*nodes,*wts;
457: PetscBLASInt *ipiv,info;
458: PetscInt i,j,k;
461: K = work;
462: W = work+n*n;
463: nodes = work+2*n*n;
464: wts = work+2*n*n+m;
465: ipiv = (PetscBLASInt*)(work+2*n*n+2*m);
466: gauss_legendre(m,nodes,wts,L);
467: /* Convert from [-1,1] to [0,1] */
468: for (i=0;i<m;i++) {
469: nodes[i] = (nodes[i]+1.0)/2.0;
470: wts[i] = wts[i]/2.0;
471: }
472: PetscMemzero(L,n*n*sizeof(PetscScalar));
473: for (k=0;k<m;k++) {
474: for (i=0;i<n;i++) for (j=0;j<n;j++) K[i+j*ld] = nodes[k]*T[i+j*ld];
475: for (i=0;i<n;i++) K[i+i*ld] += 1.0;
476: for (i=0;i<n;i++) for (j=0;j<n;j++) W[i+j*ld] = T[i+j*ld];
477: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,K,&n,ipiv,W,&n,&info));
478: for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] += wts[k]*W[i+j*ld];
479: }
480: return(0);
481: }
483: /*
484: Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
485: */
486: static PetscErrorCode recompute_diag_blocks_log(PetscBLASInt n,PetscScalar *L,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct)
487: {
488: PetscScalar a1,a2,a12,loga1,loga2,z,dd;
489: PetscInt j,last_block=0;
490: #if !defined(PETSC_USE_COMPLEX)
491: PetscScalar f,t;
492: #endif
495: for (j=0;j<n-1;j++) {
496: switch (blockStruct[j]) {
497: case 0: /* Not start of a block */
498: if (last_block != 0) {
499: last_block = 0;
500: } else { /* In a 1x1 block */
501: L[j+j*ld] = PetscLogScalar(T[j+j*ld]);
502: }
503: break;
504: case 1: /* Start of upper-tri block */
505: last_block = 1;
506: a1 = T[j+j*ld];
507: a2 = T[j+1+(j+1)*ld];
508: loga1 = PetscLogScalar(a1);
509: loga2 = PetscLogScalar(a2);
510: L[j+j*ld] = loga1;
511: L[j+1+(j+1)*ld] = loga2;
512: if ((PetscRealPart(a1)<0.0 && PetscImaginaryPart(a1)==0.0) || (PetscRealPart(a2)<0.0 && PetscImaginaryPart(a1)==0.0)) {
513: /* Problems with 2 x 2 formula for (1,2) block
514: since atanh is nonstandard, just redo diagonal part */
515: continue;
516: }
517: if (a1 == a2) {
518: a12 = T[j+(j+1)*ld]/a1;
519: } else if (PetscAbsScalar(a1)<0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2)<0.5*PetscAbsScalar(a1)) {
520: a12 = T[j+(j+1)*ld]*(loga2-loga1)/(a2-a1);
521: } else { /* Close eigenvalues */
522: z = (a2-a1)/(a2+a1);
523: dd = 2.0*myatanh(z);
524: #if defined(PETSC_USE_COMPLEX)
525: dd += 2.0*PETSC_i*PETSC_PI*unwinding(loga2-loga1);
526: #endif
527: dd /= (a2-a1);
528: a12 = T[j+(j+1)*ld]*dd;
529: }
530: L[j+(j+1)*ld] = a12;
531: break;
532: case 2: /* Start of quasi-tri block */
533: last_block = 2;
534: #if defined(PETSC_USE_COMPLEX)
535: SETERRQ(PETSC_COMM_SELF,1,"Should not reach this line in complex scalars");
536: #else
537: f = 0.5*PetscLogScalar(T[j+j*ld]*T[j+j*ld]-T[j+(j+1)*ld]*T[j+1+j*ld]);
538: t = PetscAtan2Real(PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]),T[j+j*ld])/PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]);
539: L[j+j*ld] = f;
540: L[j+1+j*ld] = t*T[j+1+j*ld];
541: L[j+(j+1)*ld] = t*T[j+(j+1)*ld];
542: L[j+1+(j+1)*ld] = f;
543: #endif
544: }
545: }
546: return(0);
547: }
548: /*
549: * Matrix logarithm implementation based on algorithm and matlab code by N. Higham and co-authors
550: *
551: * H. Al-Mohy and N. J. Higham, "Improved inverse scaling and squaring
552: * algorithms for the matrix logarithm", SIAM J. Sci. Comput. 34(4):C153-C169, 2012.
553: */
554: static PetscErrorCode SlepcLogmPade(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
555: {
556: #if !defined(PETSC_HAVE_COMPLEX)
558: SETERRQ(PETSC_COMM_SELF,1,"This function requires C99 or C++ complex support");
559: #elif defined(SLEPC_MISSING_LAPACK_GEES)
561: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEES - Lapack routine is unavailable");
562: #else
564: PetscBLASInt k,sdim,lwork,info;
565: PetscScalar *wr,*wi=NULL,*W,*Q,*Troot,*L,*work,one=1.0,zero=0.0,alpha;
566: PetscInt i,j,s=0,m=0,*blockformat;
567: #if defined(PETSC_USE_COMPLEX)
568: PetscReal *rwork;
569: #endif
572: lwork = 3*n*n; /* gees needs only 5*n, but work is also passed to logm_params */
573: k = firstonly? 1: n;
575: /* compute Schur decomposition A*Q = Q*T */
576: PetscCalloc7(n,&wr,n*k,&W,n*n,&Q,n*n,&Troot,n*n,&L,lwork,&work,n-1,&blockformat);
577: #if !defined(PETSC_USE_COMPLEX)
578: PetscMalloc1(n,&wi);
579: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
580: #else
581: PetscMalloc1(n,&rwork);
582: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
583: #endif
584: SlepcCheckLapackInfo("gees",info);
586: #if !defined(PETSC_USE_COMPLEX)
587: /* check for negative real eigenvalues */
588: for (i=0;i<n;i++) {
589: if (wr[i]<0.0 && wi[i]==0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix has negative real eigenvalue; rerun with complex scalars");
590: }
591: #endif
593: /* get block structure of Schur factor */
594: qtri_struct(n,T,ld,blockformat);
596: /* get parameters */
597: logm_params(n,T,ld,wr,wi,100,&s,&m,Troot,work);
599: /* compute Troot - I = T(1/2^s) - I more accurately */
600: recompute_diag_blocks_sqrt(n,Troot,T,ld,blockformat,s);
602: /* compute Pade approximant */
603: pade_approx(n,Troot,L,ld,m,work);
605: /* scale back up, L = 2^s * L */
606: alpha = PetscPowInt(2,s);
607: for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] *= alpha;
609: /* recompute diagonal blocks */
610: recompute_diag_blocks_log(n,L,T,ld,blockformat);
612: /* backtransform B = Q*L*Q' */
613: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,L,&ld,Q,&ld,&zero,W,&ld));
614: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
616: /* flop count: Schur decomposition, and backtransform */
617: PetscLogFlops(25.0*n*n*n+4.0*n*n*k);
619: PetscFree7(wr,W,Q,Troot,L,work,blockformat);
620: #if !defined(PETSC_USE_COMPLEX)
621: PetscFree(wi);
622: #else
623: PetscFree(rwork);
624: #endif
625: return(0);
626: #endif
627: }
629: PetscErrorCode FNEvaluateFunctionMat_Log_Higham(FN fn,Mat A,Mat B)
630: {
632: PetscBLASInt n;
633: PetscScalar *T;
634: PetscInt m;
637: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
638: MatDenseGetArray(B,&T);
639: MatGetSize(A,&m,NULL);
640: PetscBLASIntCast(m,&n);
641: SlepcLogmPade(n,T,n,PETSC_FALSE);
642: MatDenseRestoreArray(B,&T);
643: return(0);
644: }
646: PetscErrorCode FNEvaluateFunctionMatVec_Log_Higham(FN fn,Mat A,Vec v)
647: {
649: PetscBLASInt n;
650: PetscScalar *T;
651: PetscInt m;
652: Mat B;
655: FN_AllocateWorkMat(fn,A,&B);
656: MatDenseGetArray(B,&T);
657: MatGetSize(A,&m,NULL);
658: PetscBLASIntCast(m,&n);
659: SlepcLogmPade(n,T,n,PETSC_TRUE);
660: MatDenseRestoreArray(B,&T);
661: MatGetColumnVector(B,v,0);
662: FN_FreeWorkMat(fn,&B);
663: return(0);
664: }
666: PetscErrorCode FNView_Log(FN fn,PetscViewer viewer)
667: {
669: PetscBool isascii;
670: char str[50];
671: const char *methodname[] = {
672: "scaling & squaring, [m/m] Pade approximant (Higham)"
673: };
674: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
677: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
678: if (isascii) {
679: if (fn->beta==(PetscScalar)1.0) {
680: if (fn->alpha==(PetscScalar)1.0) {
681: PetscViewerASCIIPrintf(viewer," Logarithm: log(x)\n");
682: } else {
683: SlepcSNPrintfScalar(str,50,fn->alpha,PETSC_TRUE);
684: PetscViewerASCIIPrintf(viewer," Logarithm: log(%s*x)\n",str);
685: }
686: } else {
687: SlepcSNPrintfScalar(str,50,fn->beta,PETSC_TRUE);
688: if (fn->alpha==(PetscScalar)1.0) {
689: PetscViewerASCIIPrintf(viewer," Logarithm: %s*log(x)\n",str);
690: } else {
691: PetscViewerASCIIPrintf(viewer," Logarithm: %s",str);
692: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
693: SlepcSNPrintfScalar(str,50,fn->alpha,PETSC_TRUE);
694: PetscViewerASCIIPrintf(viewer,"*log(%s*x)\n",str);
695: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
696: }
697: }
698: if (fn->method<nmeth) {
699: PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]);
700: }
701: }
702: return(0);
703: }
705: SLEPC_EXTERN PetscErrorCode FNCreate_Log(FN fn)
706: {
708: fn->ops->evaluatefunction = FNEvaluateFunction_Log;
709: fn->ops->evaluatederivative = FNEvaluateDerivative_Log;
710: fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Log_Higham;
711: fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Log_Higham;
712: fn->ops->view = FNView_Log;
713: return(0);
714: }