Actual source code: fnlog.c
slepc-3.16.2 2022-02-01
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2021, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Logarithm function log(x)
12: */
14: #include <slepc/private/fnimpl.h>
15: #include <slepcblaslapack.h>
17: PetscErrorCode FNEvaluateFunction_Log(FN fn,PetscScalar x,PetscScalar *y)
18: {
20: #if !defined(PETSC_USE_COMPLEX)
21: if (x<0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Function not defined in the requested value");
22: #endif
23: *y = PetscLogScalar(x);
24: return(0);
25: }
27: PetscErrorCode FNEvaluateDerivative_Log(FN fn,PetscScalar x,PetscScalar *y)
28: {
30: if (x==0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
31: #if !defined(PETSC_USE_COMPLEX)
32: if (x<0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Derivative not defined in the requested value");
33: #endif
34: *y = 1.0/x;
35: return(0);
36: }
38: /*
39: Block structure of a quasitriangular matrix T. Returns a list of n-1 numbers, where
40: structure(j) encodes the block type of the j:j+1,j:j+1 diagonal block as one of:
41: 0 - not the start of a block
42: 1 - start of a 2x2 triangular block
43: 2 - start of a 2x2 quasi-triangular (full) block
44: */
45: static PetscErrorCode qtri_struct(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscInt *structure)
46: {
47: PetscBLASInt j;
50: #if defined(PETSC_USE_COMPLEX)
51: for (j=0;j<n-1;j++) structure[j] = 1;
52: #else
53: if (n==1) return(0);
54: else if (n==2) {
55: structure[0] = (T[1]==0.0)? 1: 2;
56: return(0);
57: }
58: j = 0;
59: while (j<n-2) {
60: if (T[j+1+j*ld] != 0.0) { /* Start of a 2x2 full block */
61: structure[j++] = 2;
62: structure[j++] = 0;
63: } else if (T[j+1+j*ld] == 0.0 && T[j+2+(j+1)*ld] == 0.0) { /* Start of a 2x2 triangular block */
64: structure[j++] = 1;
65: } else { /* Next block must start a 2x2 full block */
66: structure[j++] = 0;
67: }
68: }
69: if (T[n-1+(n-2)*ld] != 0.0) { /* 2x2 full block at the end */
70: structure[n-2] = 2;
71: } else if (structure[n-3] == 0 || structure[n-3] == 1) {
72: structure[n-2] = 1;
73: }
74: #endif
75: return(0);
76: }
78: /*
79: Compute scaling parameter (s) and order of Pade approximant (m).
80: wr,wi is overwritten. Required workspace is 3*n*n.
81: On output, Troot contains the sth square root of T.
82: */
83: static PetscErrorCode FNlogm_params(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscScalar *wr,PetscScalar *wi,PetscInt maxroots,PetscInt *s,PetscInt *m,PetscScalar *Troot,PetscScalar *work)
84: {
85: PetscErrorCode ierr;
86: PetscInt i,j,k,p,s0;
87: PetscReal inrm,eta,a2,a3,a4,d2,d3,d4,d5;
88: PetscScalar *TrootmI=work+2*n*n;
89: PetscBool foundm=PETSC_FALSE,more;
90: PetscRandom rand;
91: const PetscReal xvals[] = { 1.586970738772063e-005, 2.313807884242979e-003, 1.938179313533253e-002,
92: 6.209171588994762e-002, 1.276404810806775e-001, 2.060962623452836e-001, 2.879093714241194e-001 };
93: const PetscInt mmax=sizeof(xvals)/sizeof(xvals[0]);
96: PetscRandomCreate(PETSC_COMM_SELF,&rand);
97: /* get initial s0 so that T^(1/2^s0) < xvals(mmax) */
98: *s = 0;
99: do {
100: inrm = SlepcAbsEigenvalue(wr[0]-PetscRealConstant(1.0),wi[0]);
101: for (i=1;i<n;i++) inrm = PetscMax(inrm,SlepcAbsEigenvalue(wr[i]-PetscRealConstant(1.0),wi[i]));
102: if (inrm < xvals[mmax-1]) break;
103: for (i=0;i<n;i++) {
104: #if defined(PETSC_USE_COMPLEX)
105: wr[i] = PetscSqrtScalar(wr[i]);
106: #else
107: #if defined(PETSC_HAVE_COMPLEX)
108: PetscComplex z = PetscSqrtComplex(PetscCMPLX(wr[i],wi[i]));
109: wr[i] = PetscRealPartComplex(z);
110: wi[i] = PetscImaginaryPartComplex(z);
111: #else
112: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This operation requires a compiler with C99 or C++ complex support");
113: #endif
114: #endif
115: }
116: *s = *s + 1;
117: } while (*s<maxroots);
118: s0 = *s;
119: if (*s == maxroots) { PetscInfo(fn,"Too many matrix square roots\n"); }
121: /* Troot = T */
122: for (j=0;j<n;j++) {
123: PetscArraycpy(Troot+j*ld,T+j*ld,PetscMin(j+2,n));
124: }
125: for (k=1;k<=PetscMin(*s,maxroots);k++) {
126: FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE);
127: }
128: /* Compute value of s and m needed */
129: /* TrootmI = Troot - I */
130: for (j=0;j<n;j++) {
131: PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n));
132: TrootmI[j+j*ld] -= 1.0;
133: }
134: SlepcNormAm(n,TrootmI,2,work,rand,&d2);
135: d2 = PetscPowReal(d2,1.0/2.0);
136: SlepcNormAm(n,TrootmI,3,work,rand,&d3);
137: d3 = PetscPowReal(d3,1.0/3.0);
138: a2 = PetscMax(d2,d3);
139: if (a2 <= xvals[1]) {
140: *m = (a2 <= xvals[0])? 1: 2;
141: foundm = PETSC_TRUE;
142: }
143: p = 0;
144: while (!foundm) {
145: more = PETSC_FALSE; /* More norm checks needed */
146: if (*s > s0) {
147: SlepcNormAm(n,TrootmI,3,work,rand,&d3);
148: d3 = PetscPowReal(d3,1.0/3.0);
149: }
150: SlepcNormAm(n,TrootmI,4,work,rand,&d4);
151: d4 = PetscPowReal(d4,1.0/4.0);
152: a3 = PetscMax(d3,d4);
153: if (a3 <= xvals[mmax-1]) {
154: for (j=2;j<mmax;j++) if (a3 <= xvals[j]) break;
155: if (j <= 5) {
156: *m = j+1;
157: break;
158: } else if (a3/2.0 <= xvals[4] && p < 2) {
159: more = PETSC_TRUE;
160: p = p + 1;
161: }
162: }
163: if (!more) {
164: SlepcNormAm(n,TrootmI,5,work,rand,&d5);
165: d5 = PetscPowReal(d5,1.0/5.0);
166: a4 = PetscMax(d4,d5);
167: eta = PetscMin(a3,a4);
168: if (eta <= xvals[mmax-1]) {
169: for (j=5;j<mmax;j++) if (eta <= xvals[j]) break;
170: *m = j + 1;
171: break;
172: }
173: }
174: if (*s == maxroots) {
175: PetscInfo(fn,"Too many matrix square roots\n");
176: *m = mmax; /* No good value found so take largest */
177: break;
178: }
179: FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE);
180: /* TrootmI = Troot - I */
181: for (j=0;j<n;j++) {
182: PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n));
183: TrootmI[j+j*ld] -= 1.0;
184: }
185: *s = *s + 1;
186: }
187: PetscRandomDestroy(&rand);
188: return(0);
189: }
191: #if !defined(PETSC_USE_COMPLEX)
192: /*
193: Computes a^(1/2^s) - 1 accurately, avoiding subtractive cancellation
194: */
195: static PetscScalar sqrt_obo(PetscScalar a,PetscInt s)
196: {
197: PetscScalar val,z0,r;
198: PetscReal angle;
199: PetscInt i,n0;
202: if (s == 0) val = a-1.0;
203: else {
204: n0 = s;
205: angle = PetscAtan2Real(PetscImaginaryPart(a),PetscRealPart(a));
206: if (angle >= PETSC_PI/2.0) {
207: a = PetscSqrtScalar(a);
208: n0 = s - 1;
209: }
210: z0 = a - 1.0;
211: a = PetscSqrtScalar(a);
212: r = 1.0 + a;
213: for (i=0;i<n0-1;i++) {
214: a = PetscSqrtScalar(a);
215: r = r*(1.0+a);
216: }
217: val = z0/r;
218: }
219: PetscFunctionReturn(val);
220: }
221: #endif
223: /*
224: Square root of 2x2 matrix T from block diagonal of Schur form. Overwrites T.
225: */
226: static PetscErrorCode sqrtm_tbt(PetscScalar *T)
227: {
228: PetscScalar t11,t12,t21,t22,r11,r22;
229: #if !defined(PETSC_USE_COMPLEX)
230: PetscScalar mu;
231: #endif
234: t11 = T[0]; t21 = T[1]; t12 = T[2]; t22 = T[3];
235: if (t21 != 0.0) {
236: /* Compute square root of 2x2 quasitriangular block */
237: /* The algorithm assumes the special structure of real Schur form */
238: #if defined(PETSC_USE_COMPLEX)
239: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Should not reach this line in complex scalars");
240: #else
241: mu = PetscSqrtReal(-t21*t12);
242: if (t11 > 0.0) r11 = PetscSqrtReal((t11+SlepcAbsEigenvalue(t11,mu))/2.0);
243: else r11 = mu / PetscSqrtReal(2.0*(-t11+SlepcAbsEigenvalue(t11,mu)));
244: T[0] = r11;
245: T[1] = t21/(2.0*r11);
246: T[2] = t12/(2.0*r11);
247: T[3] = r11;
248: #endif
249: } else {
250: /* Compute square root of 2x2 upper triangular block */
251: r11 = PetscSqrtScalar(t11);
252: r22 = PetscSqrtScalar(t22);
253: T[0] = r11;
254: T[2] = t12/(r11+r22);
255: T[3] = r22;
256: }
257: return(0);
258: }
260: #if defined(PETSC_USE_COMPLEX)
261: /*
262: Unwinding number of z
263: */
264: PETSC_STATIC_INLINE PetscReal unwinding(PetscScalar z)
265: {
266: PetscReal u;
269: u = PetscCeilReal((PetscImaginaryPart(z)-PETSC_PI)/(2.0*PETSC_PI));
270: PetscFunctionReturn(u);
271: }
272: #endif
274: /*
275: Power of 2-by-2 upper triangular matrix A.
276: Returns the (1,2) element of the pth power of A, where p is an arbitrary real number
277: */
278: static PetscScalar powerm2by2(PetscScalar A11,PetscScalar A22,PetscScalar A12,PetscReal p)
279: {
280: PetscScalar a1,a2,a1p,a2p,loga1,loga2,w,dd,x12;
283: a1 = A11;
284: a2 = A22;
285: if (a1 == a2) {
286: x12 = p*A12*PetscPowScalarReal(a1,p-1);
287: } else if (PetscAbsScalar(a1) < 0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2) < 0.5*PetscAbsScalar(a1)) {
288: a1p = PetscPowScalarReal(a1,p);
289: a2p = PetscPowScalarReal(a2,p);
290: x12 = A12*(a2p-a1p)/(a2-a1);
291: } else { /* Close eigenvalues */
292: loga1 = PetscLogScalar(a1);
293: loga2 = PetscLogScalar(a2);
294: w = PetscAtanhScalar((a2-a1)/(a2+a1));
295: #if defined(PETSC_USE_COMPLEX)
296: w += PETSC_i*PETSC_PI*unwinding(loga2-loga1);
297: #endif
298: dd = 2.0*PetscExpScalar(p*(loga1+loga2)/2.0)*PetscSinhScalar(p*w)/(a2-a1);
299: x12 = A12*dd;
300: }
301: PetscFunctionReturn(x12);
302: }
304: /*
305: Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
306: */
307: static PetscErrorCode recompute_diag_blocks_sqrt(PetscBLASInt n,PetscScalar *Troot,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct,PetscInt s)
308: {
310: PetscScalar A[4],P[4],M[4],Z0[4],det;
311: PetscInt i,j;
312: #if !defined(PETSC_USE_COMPLEX)
313: PetscInt last_block=0;
314: PetscScalar a;
315: #endif
318: for (j=0;j<n-1;j++) {
319: #if !defined(PETSC_USE_COMPLEX)
320: switch (blockStruct[j]) {
321: case 0: /* Not start of a block */
322: if (last_block != 0) {
323: last_block = 0;
324: } else { /* In a 1x1 block */
325: a = T[j+j*ld];
326: Troot[j+j*ld] = sqrt_obo(a,s);
327: }
328: break;
329: default: /* In a 2x2 block */
330: last_block = blockStruct[j];
331: #endif
332: if (s == 0) {
333: Troot[j+j*ld] = T[j+j*ld]-1.0;
334: Troot[j+1+j*ld] = T[j+1+j*ld];
335: Troot[j+(j+1)*ld] = T[j+(j+1)*ld];
336: Troot[j+1+(j+1)*ld] = T[j+1+(j+1)*ld]-1.0;
337: continue;
338: }
339: A[0] = T[j+j*ld]; A[1] = T[j+1+j*ld]; A[2] = T[j+(j+1)*ld]; A[3] = T[j+1+(j+1)*ld];
340: sqrtm_tbt(A);
341: /* Z0 = A - I */
342: Z0[0] = A[0]-1.0; Z0[1] = A[1]; Z0[2] = A[2]; Z0[3] = A[3]-1.0;
343: if (s == 1) {
344: Troot[j+j*ld] = Z0[0];
345: Troot[j+1+j*ld] = Z0[1];
346: Troot[j+(j+1)*ld] = Z0[2];
347: Troot[j+1+(j+1)*ld] = Z0[3];
348: continue;
349: }
350: sqrtm_tbt(A);
351: /* P = A + I */
352: P[0] = A[0]+1.0; P[1] = A[1]; P[2] = A[2]; P[3] = A[3]+1.0;
353: for (i=0;i<s-2;i++) {
354: sqrtm_tbt(A);
355: /* P = P*(I + A) */
356: M[0] = P[0]*(A[0]+1.0)+P[2]*A[1];
357: M[1] = P[1]*(A[0]+1.0)+P[3]*A[1];
358: M[2] = P[0]*A[2]+P[2]*(A[3]+1.0);
359: M[3] = P[1]*A[2]+P[3]*(A[3]+1.0);
360: P[0] = M[0]; P[1] = M[1]; P[2] = M[2]; P[3] = M[3];
361: }
362: /* Troot(j:j+1,j:j+1) = Z0 / P (via Cramer) */
363: det = P[0]*P[3]-P[1]*P[2];
364: Troot[j+j*ld] = (Z0[0]*P[3]-P[1]*Z0[2])/det;
365: Troot[j+(j+1)*ld] = (P[0]*Z0[2]-Z0[0]*P[2])/det;
366: Troot[j+1+j*ld] = (Z0[1]*P[3]-P[1]*Z0[3])/det;
367: Troot[j+1+(j+1)*ld] = (P[0]*Z0[3]-Z0[1]*P[2])/det;
368: /* If block is upper triangular recompute the (1,2) element.
369: Skip when T(j,j) or T(j+1,j+1) < 0 since the implementation of atanh is nonstandard */
370: if (T[j+1+j*ld]==0.0 && PetscRealPart(T[j+j*ld])>=0.0 && PetscRealPart(T[j+1+(j+1)*ld])>=0.0) {
371: Troot[j+(j+1)*ld] = powerm2by2(T[j+j*ld],T[j+1+(j+1)*ld],T[j+(j+1)*ld],1.0/PetscPowInt(2,s));
372: }
373: #if !defined(PETSC_USE_COMPLEX)
374: }
375: #endif
376: }
377: #if !defined(PETSC_USE_COMPLEX)
378: /* If last diagonal entry is not in a block it will have been missed */
379: if (blockStruct[n-2] == 0) {
380: a = T[n-1+(n-1)*ld];
381: Troot[n-1+(n-1)*ld] = sqrt_obo(a,s);
382: }
383: #endif
384: return(0);
385: }
387: /*
388: Nodes x and weights w for n-point Gauss-Legendre quadrature (Q is n*n workspace)
390: G. H. Golub and J. H. Welsch, Calculation of Gauss quadrature
391: rules, Math. Comp., 23(106):221-230, 1969.
392: */
393: static PetscErrorCode gauss_legendre(PetscBLASInt n,PetscScalar *x,PetscScalar *w,PetscScalar *Q)
394: {
396: PetscScalar v,a,*work;
397: PetscReal *eig,dummy;
398: PetscBLASInt k,ld=n,lwork,info;
399: #if defined(PETSC_USE_COMPLEX)
400: PetscReal *rwork,rdummy;
401: #endif
404: PetscArrayzero(Q,n*n);
405: for (k=1;k<n;k++) {
406: v = k/PetscSqrtReal(4.0*k*k-1.0);
407: Q[k+(k-1)*n] = v;
408: Q[(k-1)+k*n] = v;
409: }
411: /* workspace query and memory allocation */
412: lwork = -1;
413: #if defined(PETSC_USE_COMPLEX)
414: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&rdummy,&info));
415: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
416: PetscMalloc3(n,&eig,lwork,&work,PetscMax(1,3*n-2),&rwork);
417: #else
418: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&info));
419: PetscBLASIntCast((PetscInt)a,&lwork);
420: PetscMalloc2(n,&eig,lwork,&work);
421: #endif
423: /* compute eigendecomposition */
424: #if defined(PETSC_USE_COMPLEX)
425: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
426: #else
427: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
428: #endif
429: SlepcCheckLapackInfo("syev",info);
431: for (k=0;k<n;k++) {
432: x[k] = eig[k];
433: w[k] = 2.0*Q[k*n]*Q[k*n];
434: }
435: #if defined(PETSC_USE_COMPLEX)
436: PetscFree3(eig,work,rwork);
437: #else
438: PetscFree2(eig,work);
439: #endif
440: PetscLogFlops(9.0*n*n*n+2.0*n*n*n);
441: return(0);
442: }
444: /*
445: Pade approximation to log(1 + T) via partial fractions
446: */
447: static PetscErrorCode pade_approx(PetscBLASInt n,PetscScalar *T,PetscScalar *L,PetscBLASInt ld,PetscInt m,PetscScalar *work)
448: {
450: PetscScalar *K,*W,*nodes,*wts;
451: PetscBLASInt *ipiv,info;
452: PetscInt i,j,k;
455: K = work;
456: W = work+n*n;
457: nodes = work+2*n*n;
458: wts = work+2*n*n+m;
459: ipiv = (PetscBLASInt*)(work+2*n*n+2*m);
460: gauss_legendre(m,nodes,wts,L);
461: /* Convert from [-1,1] to [0,1] */
462: for (i=0;i<m;i++) {
463: nodes[i] = (nodes[i]+1.0)/2.0;
464: wts[i] = wts[i]/2.0;
465: }
466: PetscArrayzero(L,n*n);
467: for (k=0;k<m;k++) {
468: for (i=0;i<n;i++) for (j=0;j<n;j++) K[i+j*ld] = nodes[k]*T[i+j*ld];
469: for (i=0;i<n;i++) K[i+i*ld] += 1.0;
470: for (i=0;i<n;i++) for (j=0;j<n;j++) W[i+j*ld] = T[i+j*ld];
471: PetscStackCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,K,&n,ipiv,W,&n,&info));
472: for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] += wts[k]*W[i+j*ld];
473: }
474: return(0);
475: }
477: /*
478: Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
479: */
480: static PetscErrorCode recompute_diag_blocks_log(PetscBLASInt n,PetscScalar *L,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct)
481: {
482: PetscScalar a1,a2,a12,loga1,loga2,z,dd;
483: PetscInt j;
484: #if !defined(PETSC_USE_COMPLEX)
485: PetscInt last_block=0;
486: PetscScalar f,t;
487: #endif
490: for (j=0;j<n-1;j++) {
491: #if !defined(PETSC_USE_COMPLEX)
492: switch (blockStruct[j]) {
493: case 0: /* Not start of a block */
494: if (last_block != 0) {
495: last_block = 0;
496: } else { /* In a 1x1 block */
497: L[j+j*ld] = PetscLogScalar(T[j+j*ld]);
498: }
499: break;
500: case 1: /* Start of upper-tri block */
501: last_block = 1;
502: #endif
503: a1 = T[j+j*ld];
504: a2 = T[j+1+(j+1)*ld];
505: loga1 = PetscLogScalar(a1);
506: loga2 = PetscLogScalar(a2);
507: L[j+j*ld] = loga1;
508: L[j+1+(j+1)*ld] = loga2;
509: if ((PetscRealPart(a1)<0.0 && PetscImaginaryPart(a1)==0.0) || (PetscRealPart(a2)<0.0 && PetscImaginaryPart(a1)==0.0)) {
510: /* Problems with 2 x 2 formula for (1,2) block
511: since atanh is nonstandard, just redo diagonal part */
512: continue;
513: }
514: if (a1 == a2) {
515: a12 = T[j+(j+1)*ld]/a1;
516: } else if (PetscAbsScalar(a1)<0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2)<0.5*PetscAbsScalar(a1)) {
517: a12 = T[j+(j+1)*ld]*(loga2-loga1)/(a2-a1);
518: } else { /* Close eigenvalues */
519: z = (a2-a1)/(a2+a1);
520: dd = 2.0*PetscAtanhScalar(z);
521: #if defined(PETSC_USE_COMPLEX)
522: dd += 2.0*PETSC_i*PETSC_PI*unwinding(loga2-loga1);
523: #endif
524: dd /= (a2-a1);
525: a12 = T[j+(j+1)*ld]*dd;
526: }
527: L[j+(j+1)*ld] = a12;
528: #if !defined(PETSC_USE_COMPLEX)
529: break;
530: case 2: /* Start of quasi-tri block */
531: last_block = 2;
532: f = 0.5*PetscLogScalar(T[j+j*ld]*T[j+j*ld]-T[j+(j+1)*ld]*T[j+1+j*ld]);
533: t = PetscAtan2Real(PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]),T[j+j*ld])/PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]);
534: L[j+j*ld] = f;
535: L[j+1+j*ld] = t*T[j+1+j*ld];
536: L[j+(j+1)*ld] = t*T[j+(j+1)*ld];
537: L[j+1+(j+1)*ld] = f;
538: }
539: #endif
540: }
541: return(0);
542: }
543: /*
544: * Matrix logarithm implementation based on algorithm and matlab code by N. Higham and co-authors
545: *
546: * H. Al-Mohy and N. J. Higham, "Improved inverse scaling and squaring
547: * algorithms for the matrix logarithm", SIAM J. Sci. Comput. 34(4):C153-C169, 2012.
548: */
549: static PetscErrorCode FNLogmPade(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
550: {
551: #if !defined(PETSC_HAVE_COMPLEX)
553: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This function requires C99 or C++ complex support");
554: #else
556: PetscBLASInt k,sdim,lwork,info;
557: PetscScalar *wr,*wi=NULL,*W,*Q,*Troot,*L,*work,one=1.0,zero=0.0,alpha;
558: PetscInt i,j,s=0,m=0,*blockformat;
559: #if defined(PETSC_USE_COMPLEX)
560: PetscReal *rwork;
561: #endif
564: lwork = 3*n*n; /* gees needs only 5*n, but work is also passed to FNlogm_params */
565: k = firstonly? 1: n;
567: /* compute Schur decomposition A*Q = Q*T */
568: PetscCalloc7(n,&wr,n*k,&W,n*n,&Q,n*n,&Troot,n*n,&L,lwork,&work,n-1,&blockformat);
569: #if !defined(PETSC_USE_COMPLEX)
570: PetscMalloc1(n,&wi);
571: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
572: #else
573: PetscMalloc1(n,&rwork);
574: PetscStackCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
575: #endif
576: SlepcCheckLapackInfo("gees",info);
578: #if !defined(PETSC_USE_COMPLEX)
579: /* check for negative real eigenvalues */
580: for (i=0;i<n;i++) {
581: if (wr[i]<0.0 && wi[i]==0.0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix has negative real eigenvalue; rerun with complex scalars");
582: }
583: #endif
585: /* get block structure of Schur factor */
586: qtri_struct(n,T,ld,blockformat);
588: /* get parameters */
589: FNlogm_params(fn,n,T,ld,wr,wi,100,&s,&m,Troot,work);
591: /* compute Troot - I = T(1/2^s) - I more accurately */
592: recompute_diag_blocks_sqrt(n,Troot,T,ld,blockformat,s);
594: /* compute Pade approximant */
595: pade_approx(n,Troot,L,ld,m,work);
597: /* scale back up, L = 2^s * L */
598: alpha = PetscPowInt(2,s);
599: for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] *= alpha;
601: /* recompute diagonal blocks */
602: recompute_diag_blocks_log(n,L,T,ld,blockformat);
604: /* backtransform B = Q*L*Q' */
605: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,L,&ld,Q,&ld,&zero,W,&ld));
606: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
608: /* flop count: Schur decomposition, and backtransform */
609: PetscLogFlops(25.0*n*n*n+4.0*n*n*k);
611: PetscFree7(wr,W,Q,Troot,L,work,blockformat);
612: #if !defined(PETSC_USE_COMPLEX)
613: PetscFree(wi);
614: #else
615: PetscFree(rwork);
616: #endif
617: return(0);
618: #endif
619: }
621: PetscErrorCode FNEvaluateFunctionMat_Log_Higham(FN fn,Mat A,Mat B)
622: {
624: PetscBLASInt n = 0;
625: PetscScalar *T;
626: PetscInt m;
629: if (A!=B) { MatCopy(A,B,SAME_NONZERO_PATTERN); }
630: MatDenseGetArray(B,&T);
631: MatGetSize(A,&m,NULL);
632: PetscBLASIntCast(m,&n);
633: FNLogmPade(fn,n,T,n,PETSC_FALSE);
634: MatDenseRestoreArray(B,&T);
635: return(0);
636: }
638: PetscErrorCode FNEvaluateFunctionMatVec_Log_Higham(FN fn,Mat A,Vec v)
639: {
641: PetscBLASInt n = 0;
642: PetscScalar *T;
643: PetscInt m;
644: Mat B;
647: FN_AllocateWorkMat(fn,A,&B);
648: MatDenseGetArray(B,&T);
649: MatGetSize(A,&m,NULL);
650: PetscBLASIntCast(m,&n);
651: FNLogmPade(fn,n,T,n,PETSC_TRUE);
652: MatDenseRestoreArray(B,&T);
653: MatGetColumnVector(B,v,0);
654: FN_FreeWorkMat(fn,&B);
655: return(0);
656: }
658: PetscErrorCode FNView_Log(FN fn,PetscViewer viewer)
659: {
661: PetscBool isascii;
662: char str[50];
663: const char *methodname[] = {
664: "scaling & squaring, [m/m] Pade approximant (Higham)"
665: };
666: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
669: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
670: if (isascii) {
671: if (fn->beta==(PetscScalar)1.0) {
672: if (fn->alpha==(PetscScalar)1.0) {
673: PetscViewerASCIIPrintf(viewer," Logarithm: log(x)\n");
674: } else {
675: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
676: PetscViewerASCIIPrintf(viewer," Logarithm: log(%s*x)\n",str);
677: }
678: } else {
679: SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
680: if (fn->alpha==(PetscScalar)1.0) {
681: PetscViewerASCIIPrintf(viewer," Logarithm: %s*log(x)\n",str);
682: } else {
683: PetscViewerASCIIPrintf(viewer," Logarithm: %s",str);
684: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
685: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
686: PetscViewerASCIIPrintf(viewer,"*log(%s*x)\n",str);
687: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
688: }
689: }
690: if (fn->method<nmeth) {
691: PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]);
692: }
693: }
694: return(0);
695: }
697: SLEPC_EXTERN PetscErrorCode FNCreate_Log(FN fn)
698: {
700: fn->ops->evaluatefunction = FNEvaluateFunction_Log;
701: fn->ops->evaluatederivative = FNEvaluateDerivative_Log;
702: fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Log_Higham;
703: fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Log_Higham;
704: fn->ops->view = FNView_Log;
705: return(0);
706: }