Actual source code: fnlog.c
slepc-3.18.2 2023-01-26
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: Logarithm function log(x)
12: */
14: #include <slepc/private/fnimpl.h>
15: #include <slepcblaslapack.h>
17: PetscErrorCode FNEvaluateFunction_Log(FN fn,PetscScalar x,PetscScalar *y)
18: {
19: #if !defined(PETSC_USE_COMPLEX)
21: #endif
22: *y = PetscLogScalar(x);
23: return 0;
24: }
26: PetscErrorCode FNEvaluateDerivative_Log(FN fn,PetscScalar x,PetscScalar *y)
27: {
29: #if !defined(PETSC_USE_COMPLEX)
31: #endif
32: *y = 1.0/x;
33: return 0;
34: }
36: /*
37: Block structure of a quasitriangular matrix T. Returns a list of n-1 numbers, where
38: structure(j) encodes the block type of the j:j+1,j:j+1 diagonal block as one of:
39: 0 - not the start of a block
40: 1 - start of a 2x2 triangular block
41: 2 - start of a 2x2 quasi-triangular (full) block
42: */
43: static PetscErrorCode qtri_struct(PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscInt *structure)
44: {
45: PetscBLASInt j;
47: #if defined(PETSC_USE_COMPLEX)
48: for (j=0;j<n-1;j++) structure[j] = 1;
49: #else
50: if (n==1) return 0;
51: else if (n==2) {
52: structure[0] = (T[1]==0.0)? 1: 2;
53: return 0;
54: }
55: j = 0;
56: while (j<n-2) {
57: if (T[j+1+j*ld] != 0.0) { /* Start of a 2x2 full block */
58: structure[j++] = 2;
59: structure[j++] = 0;
60: } else if (T[j+1+j*ld] == 0.0 && T[j+2+(j+1)*ld] == 0.0) { /* Start of a 2x2 triangular block */
61: structure[j++] = 1;
62: } else { /* Next block must start a 2x2 full block */
63: structure[j++] = 0;
64: }
65: }
66: if (T[n-1+(n-2)*ld] != 0.0) { /* 2x2 full block at the end */
67: structure[n-2] = 2;
68: } else if (structure[n-3] == 0 || structure[n-3] == 1) {
69: structure[n-2] = 1;
70: }
71: #endif
72: return 0;
73: }
75: /*
76: Compute scaling parameter (s) and order of Pade approximant (m).
77: wr,wi is overwritten. Required workspace is 3*n*n.
78: On output, Troot contains the sth square root of T.
79: */
80: static PetscErrorCode FNlogm_params(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscScalar *wr,PetscScalar *wi,PetscInt maxroots,PetscInt *s,PetscInt *m,PetscScalar *Troot,PetscScalar *work)
81: {
82: PetscInt i,j,k,p,s0;
83: PetscReal inrm,eta,a2,a3,a4,d2,d3,d4,d5;
84: PetscScalar *TrootmI=work+2*n*n;
85: PetscBool foundm=PETSC_FALSE,more;
86: PetscRandom rand;
87: const PetscReal xvals[] = { 1.586970738772063e-005, 2.313807884242979e-003, 1.938179313533253e-002,
88: 6.209171588994762e-002, 1.276404810806775e-001, 2.060962623452836e-001, 2.879093714241194e-001 };
89: const PetscInt mmax=PETSC_STATIC_ARRAY_LENGTH(xvals);
91: PetscRandomCreate(PETSC_COMM_SELF,&rand);
92: /* get initial s0 so that T^(1/2^s0) < xvals(mmax) */
93: *s = 0;
94: do {
95: inrm = SlepcAbsEigenvalue(wr[0]-PetscRealConstant(1.0),wi[0]);
96: for (i=1;i<n;i++) inrm = PetscMax(inrm,SlepcAbsEigenvalue(wr[i]-PetscRealConstant(1.0),wi[i]));
97: if (inrm < xvals[mmax-1]) break;
98: for (i=0;i<n;i++) {
99: #if defined(PETSC_USE_COMPLEX)
100: wr[i] = PetscSqrtScalar(wr[i]);
101: #else
102: #if defined(PETSC_HAVE_COMPLEX)
103: PetscComplex z = PetscSqrtComplex(PetscCMPLX(wr[i],wi[i]));
104: wr[i] = PetscRealPartComplex(z);
105: wi[i] = PetscImaginaryPartComplex(z);
106: #else
107: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This operation requires a compiler with C99 or C++ complex support");
108: #endif
109: #endif
110: }
111: *s = *s + 1;
112: } while (*s<maxroots);
113: s0 = *s;
114: if (*s == maxroots) PetscInfo(fn,"Too many matrix square roots\n");
116: /* Troot = T */
117: for (j=0;j<n;j++) PetscArraycpy(Troot+j*ld,T+j*ld,PetscMin(j+2,n));
118: for (k=1;k<=PetscMin(*s,maxroots);k++) FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE);
119: /* Compute value of s and m needed */
120: /* TrootmI = Troot - I */
121: for (j=0;j<n;j++) {
122: PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n));
123: TrootmI[j+j*ld] -= 1.0;
124: }
125: SlepcNormAm(n,TrootmI,2,work,rand,&d2);
126: d2 = PetscPowReal(d2,1.0/2.0);
127: SlepcNormAm(n,TrootmI,3,work,rand,&d3);
128: d3 = PetscPowReal(d3,1.0/3.0);
129: a2 = PetscMax(d2,d3);
130: if (a2 <= xvals[1]) {
131: *m = (a2 <= xvals[0])? 1: 2;
132: foundm = PETSC_TRUE;
133: }
134: p = 0;
135: while (!foundm) {
136: more = PETSC_FALSE; /* More norm checks needed */
137: if (*s > s0) {
138: SlepcNormAm(n,TrootmI,3,work,rand,&d3);
139: d3 = PetscPowReal(d3,1.0/3.0);
140: }
141: SlepcNormAm(n,TrootmI,4,work,rand,&d4);
142: d4 = PetscPowReal(d4,1.0/4.0);
143: a3 = PetscMax(d3,d4);
144: if (a3 <= xvals[mmax-1]) {
145: for (j=2;j<mmax;j++) if (a3 <= xvals[j]) break;
146: if (j <= 5) {
147: *m = j+1;
148: break;
149: } else if (a3/2.0 <= xvals[4] && p < 2) {
150: more = PETSC_TRUE;
151: p = p + 1;
152: }
153: }
154: if (!more) {
155: SlepcNormAm(n,TrootmI,5,work,rand,&d5);
156: d5 = PetscPowReal(d5,1.0/5.0);
157: a4 = PetscMax(d4,d5);
158: eta = PetscMin(a3,a4);
159: if (eta <= xvals[mmax-1]) {
160: for (j=5;j<mmax;j++) if (eta <= xvals[j]) break;
161: *m = j + 1;
162: break;
163: }
164: }
165: if (*s == maxroots) {
166: PetscInfo(fn,"Too many matrix square roots\n");
167: *m = mmax; /* No good value found so take largest */
168: break;
169: }
170: FNSqrtmSchur(fn,n,Troot,ld,PETSC_FALSE);
171: /* TrootmI = Troot - I */
172: for (j=0;j<n;j++) {
173: PetscArraycpy(TrootmI+j*ld,Troot+j*ld,PetscMin(j+2,n));
174: TrootmI[j+j*ld] -= 1.0;
175: }
176: *s = *s + 1;
177: }
178: PetscRandomDestroy(&rand);
179: return 0;
180: }
182: #if !defined(PETSC_USE_COMPLEX)
183: /*
184: Computes a^(1/2^s) - 1 accurately, avoiding subtractive cancellation
185: */
186: static PetscScalar sqrt_obo(PetscScalar a,PetscInt s)
187: {
188: PetscScalar val,z0,r;
189: PetscReal angle;
190: PetscInt i,n0;
192: if (s == 0) val = a-1.0;
193: else {
194: n0 = s;
195: angle = PetscAtan2Real(PetscImaginaryPart(a),PetscRealPart(a));
196: if (angle >= PETSC_PI/2.0) {
197: a = PetscSqrtScalar(a);
198: n0 = s - 1;
199: }
200: z0 = a - 1.0;
201: a = PetscSqrtScalar(a);
202: r = 1.0 + a;
203: for (i=0;i<n0-1;i++) {
204: a = PetscSqrtScalar(a);
205: r = r*(1.0+a);
206: }
207: val = z0/r;
208: }
209: return val;
210: }
211: #endif
213: /*
214: Square root of 2x2 matrix T from block diagonal of Schur form. Overwrites T.
215: */
216: static PetscErrorCode sqrtm_tbt(PetscScalar *T)
217: {
218: PetscScalar t11,t12,t21,t22,r11,r22;
219: #if !defined(PETSC_USE_COMPLEX)
220: PetscScalar mu;
221: #endif
223: t11 = T[0]; t21 = T[1]; t12 = T[2]; t22 = T[3];
224: if (t21 != 0.0) {
225: /* Compute square root of 2x2 quasitriangular block */
226: /* The algorithm assumes the special structure of real Schur form */
227: #if defined(PETSC_USE_COMPLEX)
228: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Should not reach this line in complex scalars");
229: #else
230: mu = PetscSqrtReal(-t21*t12);
231: if (t11 > 0.0) r11 = PetscSqrtReal((t11+SlepcAbsEigenvalue(t11,mu))/2.0);
232: else r11 = mu / PetscSqrtReal(2.0*(-t11+SlepcAbsEigenvalue(t11,mu)));
233: T[0] = r11;
234: T[1] = t21/(2.0*r11);
235: T[2] = t12/(2.0*r11);
236: T[3] = r11;
237: #endif
238: } else {
239: /* Compute square root of 2x2 upper triangular block */
240: r11 = PetscSqrtScalar(t11);
241: r22 = PetscSqrtScalar(t22);
242: T[0] = r11;
243: T[2] = t12/(r11+r22);
244: T[3] = r22;
245: }
246: return 0;
247: }
249: #if defined(PETSC_USE_COMPLEX)
250: /*
251: Unwinding number of z
252: */
253: static inline PetscReal unwinding(PetscScalar z)
254: {
255: PetscReal u;
257: u = PetscCeilReal((PetscImaginaryPart(z)-PETSC_PI)/(2.0*PETSC_PI));
258: return u;
259: }
260: #endif
262: /*
263: Power of 2-by-2 upper triangular matrix A.
264: Returns the (1,2) element of the pth power of A, where p is an arbitrary real number
265: */
266: static PetscScalar powerm2by2(PetscScalar A11,PetscScalar A22,PetscScalar A12,PetscReal p)
267: {
268: PetscScalar a1,a2,a1p,a2p,loga1,loga2,w,dd,x12;
270: a1 = A11;
271: a2 = A22;
272: if (a1 == a2) {
273: x12 = p*A12*PetscPowScalarReal(a1,p-1);
274: } else if (PetscAbsScalar(a1) < 0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2) < 0.5*PetscAbsScalar(a1)) {
275: a1p = PetscPowScalarReal(a1,p);
276: a2p = PetscPowScalarReal(a2,p);
277: x12 = A12*(a2p-a1p)/(a2-a1);
278: } else { /* Close eigenvalues */
279: loga1 = PetscLogScalar(a1);
280: loga2 = PetscLogScalar(a2);
281: w = PetscAtanhScalar((a2-a1)/(a2+a1));
282: #if defined(PETSC_USE_COMPLEX)
283: w += PETSC_i*PETSC_PI*unwinding(loga2-loga1);
284: #endif
285: dd = 2.0*PetscExpScalar(p*(loga1+loga2)/2.0)*PetscSinhScalar(p*w)/(a2-a1);
286: x12 = A12*dd;
287: }
288: return x12;
289: }
291: /*
292: Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
293: */
294: static PetscErrorCode recompute_diag_blocks_sqrt(PetscBLASInt n,PetscScalar *Troot,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct,PetscInt s)
295: {
296: PetscScalar A[4],P[4],M[4],Z0[4],det;
297: PetscInt i,j;
298: #if !defined(PETSC_USE_COMPLEX)
299: PetscInt last_block=0;
300: PetscScalar a;
301: #endif
303: for (j=0;j<n-1;j++) {
304: #if !defined(PETSC_USE_COMPLEX)
305: switch (blockStruct[j]) {
306: case 0: /* Not start of a block */
307: if (last_block != 0) {
308: last_block = 0;
309: } else { /* In a 1x1 block */
310: a = T[j+j*ld];
311: Troot[j+j*ld] = sqrt_obo(a,s);
312: }
313: break;
314: default: /* In a 2x2 block */
315: last_block = blockStruct[j];
316: #endif
317: if (s == 0) {
318: Troot[j+j*ld] = T[j+j*ld]-1.0;
319: Troot[j+1+j*ld] = T[j+1+j*ld];
320: Troot[j+(j+1)*ld] = T[j+(j+1)*ld];
321: Troot[j+1+(j+1)*ld] = T[j+1+(j+1)*ld]-1.0;
322: continue;
323: }
324: A[0] = T[j+j*ld]; A[1] = T[j+1+j*ld]; A[2] = T[j+(j+1)*ld]; A[3] = T[j+1+(j+1)*ld];
325: sqrtm_tbt(A);
326: /* Z0 = A - I */
327: Z0[0] = A[0]-1.0; Z0[1] = A[1]; Z0[2] = A[2]; Z0[3] = A[3]-1.0;
328: if (s == 1) {
329: Troot[j+j*ld] = Z0[0];
330: Troot[j+1+j*ld] = Z0[1];
331: Troot[j+(j+1)*ld] = Z0[2];
332: Troot[j+1+(j+1)*ld] = Z0[3];
333: continue;
334: }
335: sqrtm_tbt(A);
336: /* P = A + I */
337: P[0] = A[0]+1.0; P[1] = A[1]; P[2] = A[2]; P[3] = A[3]+1.0;
338: for (i=0;i<s-2;i++) {
339: sqrtm_tbt(A);
340: /* P = P*(I + A) */
341: M[0] = P[0]*(A[0]+1.0)+P[2]*A[1];
342: M[1] = P[1]*(A[0]+1.0)+P[3]*A[1];
343: M[2] = P[0]*A[2]+P[2]*(A[3]+1.0);
344: M[3] = P[1]*A[2]+P[3]*(A[3]+1.0);
345: P[0] = M[0]; P[1] = M[1]; P[2] = M[2]; P[3] = M[3];
346: }
347: /* Troot(j:j+1,j:j+1) = Z0 / P (via Cramer) */
348: det = P[0]*P[3]-P[1]*P[2];
349: Troot[j+j*ld] = (Z0[0]*P[3]-P[1]*Z0[2])/det;
350: Troot[j+(j+1)*ld] = (P[0]*Z0[2]-Z0[0]*P[2])/det;
351: Troot[j+1+j*ld] = (Z0[1]*P[3]-P[1]*Z0[3])/det;
352: Troot[j+1+(j+1)*ld] = (P[0]*Z0[3]-Z0[1]*P[2])/det;
353: /* If block is upper triangular recompute the (1,2) element.
354: Skip when T(j,j) or T(j+1,j+1) < 0 since the implementation of atanh is nonstandard */
355: if (T[j+1+j*ld]==0.0 && PetscRealPart(T[j+j*ld])>=0.0 && PetscRealPart(T[j+1+(j+1)*ld])>=0.0) {
356: Troot[j+(j+1)*ld] = powerm2by2(T[j+j*ld],T[j+1+(j+1)*ld],T[j+(j+1)*ld],1.0/PetscPowInt(2,s));
357: }
358: #if !defined(PETSC_USE_COMPLEX)
359: }
360: #endif
361: }
362: #if !defined(PETSC_USE_COMPLEX)
363: /* If last diagonal entry is not in a block it will have been missed */
364: if (blockStruct[n-2] == 0) {
365: a = T[n-1+(n-1)*ld];
366: Troot[n-1+(n-1)*ld] = sqrt_obo(a,s);
367: }
368: #endif
369: return 0;
370: }
372: /*
373: Nodes x and weights w for n-point Gauss-Legendre quadrature (Q is n*n workspace)
375: G. H. Golub and J. H. Welsch, Calculation of Gauss quadrature
376: rules, Math. Comp., 23(106):221-230, 1969.
377: */
378: static PetscErrorCode gauss_legendre(PetscBLASInt n,PetscScalar *x,PetscScalar *w,PetscScalar *Q)
379: {
380: PetscScalar v,a,*work;
381: PetscReal *eig,dummy;
382: PetscBLASInt k,ld=n,lwork,info;
383: #if defined(PETSC_USE_COMPLEX)
384: PetscReal *rwork,rdummy;
385: #endif
387: PetscArrayzero(Q,n*n);
388: for (k=1;k<n;k++) {
389: v = k/PetscSqrtReal(4.0*k*k-1.0);
390: Q[k+(k-1)*n] = v;
391: Q[(k-1)+k*n] = v;
392: }
394: /* workspace query and memory allocation */
395: lwork = -1;
396: #if defined(PETSC_USE_COMPLEX)
397: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&rdummy,&info));
398: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
399: PetscMalloc3(n,&eig,lwork,&work,PetscMax(1,3*n-2),&rwork);
400: #else
401: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,&dummy,&a,&lwork,&info));
402: PetscBLASIntCast((PetscInt)a,&lwork);
403: PetscMalloc2(n,&eig,lwork,&work);
404: #endif
406: /* compute eigendecomposition */
407: #if defined(PETSC_USE_COMPLEX)
408: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,rwork,&info));
409: #else
410: PetscCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n,Q,&ld,eig,work,&lwork,&info));
411: #endif
412: SlepcCheckLapackInfo("syev",info);
414: for (k=0;k<n;k++) {
415: x[k] = eig[k];
416: w[k] = 2.0*Q[k*n]*Q[k*n];
417: }
418: #if defined(PETSC_USE_COMPLEX)
419: PetscFree3(eig,work,rwork);
420: #else
421: PetscFree2(eig,work);
422: #endif
423: PetscLogFlops(9.0*n*n*n+2.0*n*n*n);
424: return 0;
425: }
427: /*
428: Pade approximation to log(1 + T) via partial fractions
429: */
430: static PetscErrorCode pade_approx(PetscBLASInt n,PetscScalar *T,PetscScalar *L,PetscBLASInt ld,PetscInt m,PetscScalar *work)
431: {
432: PetscScalar *K,*W,*nodes,*wts;
433: PetscBLASInt *ipiv,info;
434: PetscInt i,j,k;
436: K = work;
437: W = work+n*n;
438: nodes = work+2*n*n;
439: wts = work+2*n*n+m;
440: ipiv = (PetscBLASInt*)(work+2*n*n+2*m);
441: gauss_legendre(m,nodes,wts,L);
442: /* Convert from [-1,1] to [0,1] */
443: for (i=0;i<m;i++) {
444: nodes[i] = (nodes[i]+1.0)/2.0;
445: wts[i] = wts[i]/2.0;
446: }
447: PetscArrayzero(L,n*n);
448: for (k=0;k<m;k++) {
449: for (i=0;i<n;i++) for (j=0;j<n;j++) K[i+j*ld] = nodes[k]*T[i+j*ld];
450: for (i=0;i<n;i++) K[i+i*ld] += 1.0;
451: for (i=0;i<n;i++) for (j=0;j<n;j++) W[i+j*ld] = T[i+j*ld];
452: PetscCallBLAS("LAPACKgesv",LAPACKgesv_(&n,&n,K,&n,ipiv,W,&n,&info));
453: for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] += wts[k]*W[i+j*ld];
454: }
455: return 0;
456: }
458: /*
459: Recomputes diagonal blocks of T = X^(1/2^s) - 1 more accurately
460: */
461: static PetscErrorCode recompute_diag_blocks_log(PetscBLASInt n,PetscScalar *L,PetscScalar *T,PetscBLASInt ld,PetscInt *blockStruct)
462: {
463: PetscScalar a1,a2,a12,loga1,loga2,z,dd;
464: PetscInt j;
465: #if !defined(PETSC_USE_COMPLEX)
466: PetscInt last_block=0;
467: PetscScalar f,t;
468: #endif
470: for (j=0;j<n-1;j++) {
471: #if !defined(PETSC_USE_COMPLEX)
472: switch (blockStruct[j]) {
473: case 0: /* Not start of a block */
474: if (last_block != 0) {
475: last_block = 0;
476: } else { /* In a 1x1 block */
477: L[j+j*ld] = PetscLogScalar(T[j+j*ld]);
478: }
479: break;
480: case 1: /* Start of upper-tri block */
481: last_block = 1;
482: #endif
483: a1 = T[j+j*ld];
484: a2 = T[j+1+(j+1)*ld];
485: loga1 = PetscLogScalar(a1);
486: loga2 = PetscLogScalar(a2);
487: L[j+j*ld] = loga1;
488: L[j+1+(j+1)*ld] = loga2;
489: if ((PetscRealPart(a1)<0.0 && PetscImaginaryPart(a1)==0.0) || (PetscRealPart(a2)<0.0 && PetscImaginaryPart(a1)==0.0)) {
490: /* Problems with 2 x 2 formula for (1,2) block
491: since atanh is nonstandard, just redo diagonal part */
492: continue;
493: }
494: if (a1 == a2) {
495: a12 = T[j+(j+1)*ld]/a1;
496: } else if (PetscAbsScalar(a1)<0.5*PetscAbsScalar(a2) || PetscAbsScalar(a2)<0.5*PetscAbsScalar(a1)) {
497: a12 = T[j+(j+1)*ld]*(loga2-loga1)/(a2-a1);
498: } else { /* Close eigenvalues */
499: z = (a2-a1)/(a2+a1);
500: dd = 2.0*PetscAtanhScalar(z);
501: #if defined(PETSC_USE_COMPLEX)
502: dd += 2.0*PETSC_i*PETSC_PI*unwinding(loga2-loga1);
503: #endif
504: dd /= (a2-a1);
505: a12 = T[j+(j+1)*ld]*dd;
506: }
507: L[j+(j+1)*ld] = a12;
508: #if !defined(PETSC_USE_COMPLEX)
509: break;
510: case 2: /* Start of quasi-tri block */
511: last_block = 2;
512: f = 0.5*PetscLogScalar(T[j+j*ld]*T[j+j*ld]-T[j+(j+1)*ld]*T[j+1+j*ld]);
513: t = PetscAtan2Real(PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]),T[j+j*ld])/PetscSqrtScalar(-T[j+(j+1)*ld]*T[j+1+j*ld]);
514: L[j+j*ld] = f;
515: L[j+1+j*ld] = t*T[j+1+j*ld];
516: L[j+(j+1)*ld] = t*T[j+(j+1)*ld];
517: L[j+1+(j+1)*ld] = f;
518: }
519: #endif
520: }
521: return 0;
522: }
523: /*
524: * Matrix logarithm implementation based on algorithm and matlab code by N. Higham and co-authors
525: *
526: * H. Al-Mohy and N. J. Higham, "Improved inverse scaling and squaring
527: * algorithms for the matrix logarithm", SIAM J. Sci. Comput. 34(4):C153-C169, 2012.
528: */
529: static PetscErrorCode FNLogmPade(FN fn,PetscBLASInt n,PetscScalar *T,PetscBLASInt ld,PetscBool firstonly)
530: {
531: #if !defined(PETSC_HAVE_COMPLEX)
532: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"This function requires C99 or C++ complex support");
533: #else
534: PetscBLASInt k,sdim,lwork,info;
535: PetscScalar *wr,*wi=NULL,*W,*Q,*Troot,*L,*work,one=1.0,zero=0.0,alpha;
536: PetscInt i,j,s=0,m=0,*blockformat;
537: #if defined(PETSC_USE_COMPLEX)
538: PetscReal *rwork;
539: #endif
541: lwork = 3*n*n; /* gees needs only 5*n, but work is also passed to FNlogm_params */
542: k = firstonly? 1: n;
544: /* compute Schur decomposition A*Q = Q*T */
545: PetscCalloc7(n,&wr,n*k,&W,n*n,&Q,n*n,&Troot,n*n,&L,lwork,&work,n-1,&blockformat);
546: #if !defined(PETSC_USE_COMPLEX)
547: PetscMalloc1(n,&wi);
548: PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,wi,Q,&ld,work,&lwork,NULL,&info));
549: #else
550: PetscMalloc1(n,&rwork);
551: PetscCallBLAS("LAPACKgees",LAPACKgees_("V","N",NULL,&n,T,&ld,&sdim,wr,Q,&ld,work,&lwork,rwork,NULL,&info));
552: #endif
553: SlepcCheckLapackInfo("gees",info);
555: #if !defined(PETSC_USE_COMPLEX)
556: /* check for negative real eigenvalues */
557: for (i=0;i<n;i++) {
559: }
560: #endif
562: /* get block structure of Schur factor */
563: qtri_struct(n,T,ld,blockformat);
565: /* get parameters */
566: FNlogm_params(fn,n,T,ld,wr,wi,100,&s,&m,Troot,work);
568: /* compute Troot - I = T(1/2^s) - I more accurately */
569: recompute_diag_blocks_sqrt(n,Troot,T,ld,blockformat,s);
571: /* compute Pade approximant */
572: pade_approx(n,Troot,L,ld,m,work);
574: /* scale back up, L = 2^s * L */
575: alpha = PetscPowInt(2,s);
576: for (i=0;i<n;i++) for (j=0;j<n;j++) L[i+j*ld] *= alpha;
578: /* recompute diagonal blocks */
579: recompute_diag_blocks_log(n,L,T,ld,blockformat);
581: /* backtransform B = Q*L*Q' */
582: PetscCallBLAS("BLASgemm",BLASgemm_("N","C",&n,&k,&n,&one,L,&ld,Q,&ld,&zero,W,&ld));
583: PetscCallBLAS("BLASgemm",BLASgemm_("N","N",&n,&k,&n,&one,Q,&ld,W,&ld,&zero,T,&ld));
585: /* flop count: Schur decomposition, and backtransform */
586: PetscLogFlops(25.0*n*n*n+4.0*n*n*k);
588: PetscFree7(wr,W,Q,Troot,L,work,blockformat);
589: #if !defined(PETSC_USE_COMPLEX)
590: PetscFree(wi);
591: #else
592: PetscFree(rwork);
593: #endif
594: return 0;
595: #endif
596: }
598: PetscErrorCode FNEvaluateFunctionMat_Log_Higham(FN fn,Mat A,Mat B)
599: {
600: PetscBLASInt n = 0;
601: PetscScalar *T;
602: PetscInt m;
604: if (A!=B) MatCopy(A,B,SAME_NONZERO_PATTERN);
605: MatDenseGetArray(B,&T);
606: MatGetSize(A,&m,NULL);
607: PetscBLASIntCast(m,&n);
608: FNLogmPade(fn,n,T,n,PETSC_FALSE);
609: MatDenseRestoreArray(B,&T);
610: return 0;
611: }
613: PetscErrorCode FNEvaluateFunctionMatVec_Log_Higham(FN fn,Mat A,Vec v)
614: {
615: PetscBLASInt n = 0;
616: PetscScalar *T;
617: PetscInt m;
618: Mat B;
620: FN_AllocateWorkMat(fn,A,&B);
621: MatDenseGetArray(B,&T);
622: MatGetSize(A,&m,NULL);
623: PetscBLASIntCast(m,&n);
624: FNLogmPade(fn,n,T,n,PETSC_TRUE);
625: MatDenseRestoreArray(B,&T);
626: MatGetColumnVector(B,v,0);
627: FN_FreeWorkMat(fn,&B);
628: return 0;
629: }
631: PetscErrorCode FNView_Log(FN fn,PetscViewer viewer)
632: {
633: PetscBool isascii;
634: char str[50];
635: const char *methodname[] = {
636: "scaling & squaring, [m/m] Pade approximant (Higham)"
637: };
638: const int nmeth=PETSC_STATIC_ARRAY_LENGTH(methodname);
640: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&isascii);
641: if (isascii) {
642: if (fn->beta==(PetscScalar)1.0) {
643: if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer," logarithm: log(x)\n");
644: else {
645: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
646: PetscViewerASCIIPrintf(viewer," logarithm: log(%s*x)\n",str);
647: }
648: } else {
649: SlepcSNPrintfScalar(str,sizeof(str),fn->beta,PETSC_TRUE);
650: if (fn->alpha==(PetscScalar)1.0) PetscViewerASCIIPrintf(viewer," logarithm: %s*log(x)\n",str);
651: else {
652: PetscViewerASCIIPrintf(viewer," logarithm: %s",str);
653: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
654: SlepcSNPrintfScalar(str,sizeof(str),fn->alpha,PETSC_TRUE);
655: PetscViewerASCIIPrintf(viewer,"*log(%s*x)\n",str);
656: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
657: }
658: }
659: if (fn->method<nmeth) PetscViewerASCIIPrintf(viewer," computing matrix functions with: %s\n",methodname[fn->method]);
660: }
661: return 0;
662: }
664: SLEPC_EXTERN PetscErrorCode FNCreate_Log(FN fn)
665: {
666: fn->ops->evaluatefunction = FNEvaluateFunction_Log;
667: fn->ops->evaluatederivative = FNEvaluateDerivative_Log;
668: fn->ops->evaluatefunctionmat[0] = FNEvaluateFunctionMat_Log_Higham;
669: fn->ops->evaluatefunctionmatvec[0] = FNEvaluateFunctionMatVec_Log_Higham;
670: fn->ops->view = FNView_Log;
671: return 0;
672: }