Actual source code: agmresdeflation.c
petsc-3.9.3 2018-07-02
1: /*
2: * This file computes data for the deflated restarting in the Newton-basis GMRES. At each restart (or at each detected stagnation in the adaptive strategy), a basis of an (approximated)invariant subspace corresponding to the smallest eigenvalues is extracted from the Krylov subspace. It is then used to augment the Newton basis.
3: *
4: * References : D. Nuentsa Wakam and J. Erhel, Parallelism and robustness in GMRES with the Newton basis and the deflation of eigenvalues. Research report INRIA RR-7787.
5: * Author: Desire NUENTSA WAKAM <desire.nuentsa_wakam@inria.fr>, 2011
6: */
8: #include <../src/ksp/ksp/impls/gmres/agmres/agmresimpl.h>
10: /* Quicksort algorithm to sort the eigenvalues in increasing orders
11: * val_r - real part of eigenvalues, unchanged on exit.
12: * val_i - Imaginary part of eigenvalues unchanged on exit.
13: * size - Number of eigenvalues (with complex conjugates)
14: * perm - contains on exit the permutation vector to reorder the vectors val_r and val_i.
15: */
16: #define DEPTH 500
17: static PetscErrorCode KSPAGMRESQuickSort(PetscScalar *val_r, PetscScalar *val_i, PetscInt size, PetscInt *perm)
18: {
20: PetscInt deb[DEPTH], fin[DEPTH];
21: PetscInt ipivot;
22: PetscScalar pivot_r, pivot_i;
23: PetscInt i, L, R, j;
24: PetscScalar abs_pivot;
25: PetscScalar abs_val;
28: /* initialize perm vector */
29: for (j = 0; j < size; j++) perm[j] = j;
31: deb[0] = 0;
32: fin[0] = size;
33: i = 0;
34: while (i >= 0) {
35: L = deb[i];
36: R = fin[i] - 1;
37: if (L < R) {
38: pivot_r = val_r[L];
39: pivot_i = val_i[L];
40: abs_pivot = PetscSqrtReal(pivot_r * pivot_r + pivot_i * pivot_i);
41: ipivot = perm[L];
42: if (i == DEPTH - 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Could cause stack overflow: Try to increase the value of DEPTH ");
43: while (L < R) {
44: abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
45: while (abs_val >= abs_pivot && L < R) {
46: R--;
47: abs_val = PetscSqrtReal(val_r[R] * val_r[R] + val_i[R] * val_i[R]);
48: }
49: if (L < R) {
50: val_r[L] = val_r[R];
51: val_i[L] = val_i[R];
52: perm[L] = perm[R];
53: L += 1;
54: }
55: abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
56: while (abs_val <= abs_pivot && L < R) {
57: L++;
58: abs_val = PetscSqrtReal(val_r[L] * val_r[L] + val_i[L] * val_i[L]);
59: }
60: if (L < R) {
61: val_r[R] = val_r[L];
62: val_i[R] = val_i[L];
63: perm[R] = perm[L];
64: R -= 1;
65: }
66: }
67: val_r[L] = pivot_r;
68: val_i[L] = pivot_i;
69: perm[L] = ipivot;
70: deb[i+1] = L + 1;
71: fin[i+1] = fin[i];
72: fin[i] = L;
73: i += 1;
74: if (i == DEPTH - 1) SETERRQ(PETSC_COMM_SELF, PETSC_ERR_MEM, "Could cause stack overflow: Try to increase the value of DEPTH ");
75: } else i--;
76: }
77: return(0);
78: }
80: /*
81: * Compute the Schur vectors from the generalized eigenvalue problem A.u =\lamba.B.u
82: * KspSize - rank of the matrices A and B, size of the current Krylov basis
83: * A - Left matrix
84: * B - Right matrix
85: * ldA - first dimension of A as declared in the calling program
86: * ldB - first dimension of B as declared in the calling program
87: * IsReduced - specifies if the matrices are already in the reduced form,
88: * i.e A is a Hessenberg matrix and B is upper triangular.
89: * Sr - on exit, the extracted Schur vectors corresponding
90: * the smallest eigenvalues (with complex conjugates)
91: * CurNeig - Number of extracted eigenvalues
92: */
93: static PetscErrorCode KSPAGMRESSchurForm(KSP ksp, PetscBLASInt KspSize, PetscScalar *A, PetscBLASInt ldA, PetscScalar *B, PetscBLASInt ldB, PetscBool IsReduced, PetscScalar *Sr, PetscInt *CurNeig)
94: {
95: KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data;
96: PetscInt max_k = agmres->max_k;
97: PetscBLASInt r;
98: PetscInt neig = agmres->neig;
99: PetscScalar *wr = agmres->wr;
100: PetscScalar *wi = agmres->wi;
101: PetscScalar *beta = agmres->beta;
102: PetscScalar *Q = agmres->Q;
103: PetscScalar *Z = agmres->Z;
104: PetscScalar *work = agmres->work;
105: PetscBLASInt *select = agmres->select;
106: PetscInt *perm = agmres->perm;
107: PetscBLASInt sdim = 0;
108: PetscInt i,j;
109: PetscBLASInt info;
111: PetscBLASInt *iwork = agmres->iwork;
112: PetscBLASInt N = MAXKSPSIZE;
113: PetscBLASInt lwork,liwork;
114: PetscBLASInt ilo,ihi;
115: PetscBLASInt ijob,wantQ,wantZ;
116: PetscScalar Dif[2];
119: ijob = 2;
120: wantQ = 1;
121: wantZ = 1;
122: PetscBLASIntCast(PetscMax(8*N+16,4*neig*(N-neig)),&lwork);
123: PetscBLASIntCast(2*N*neig,&liwork);
124: ilo = 1;
125: PetscBLASIntCast(KspSize,&ihi);
127: /* Compute the Schur form */
128: if (IsReduced) { /* The eigenvalue problem is already in reduced form, meaning that A is upper Hessenberg and B is triangular */
129: #if defined(PETSC_MISSING_LAPACK_HGEQZ)
130: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"HGEQZ - Lapack routine is unavailable.");
131: #else
132: PetscStackCallBLAS("LAPACKhgeqz",LAPACKhgeqz_("S", "I", "I", &KspSize, &ilo, &ihi, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, work, &lwork, &info));
133: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling LAPACK routine xhgeqz_");
134: #endif
135: } else {
136: #if defined(PETSC_MISSING_LAPACK_GGES)
137: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
138: #else
139: PetscStackCallBLAS("LAPACKgges",LAPACKgges_("V", "V", "N", NULL, &KspSize, A, &ldA, B, &ldB, &sdim, wr, wi, beta, Q, &N, Z, &N, work, &lwork, NULL, &info));
140: if (info) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "Error while calling LAPACK routine xgges_");
141: #endif
142: }
144: /* We should avoid computing these ratio... */
145: for (i = 0; i < KspSize; i++) {
146: if (beta[i] != 0.0) {
147: wr[i] /= beta[i];
148: wi[i] /= beta[i];
149: }
150: }
152: /* Sort the eigenvalues to extract the smallest ones */
153: KSPAGMRESQuickSort(wr, wi, KspSize, perm);
155: /* Count the number of extracted eigenvalues (with complex conjugates) */
156: r = 0;
157: while (r < neig) {
158: if (wi[r] != 0) r += 2;
159: else r += 1;
160: }
161: /* Reorder the Schur decomposition so that the cluster of smallest/largest eigenvalues appears in the leading diagonal blocks of A (and B)*/
162: PetscMemzero(select, N*sizeof(PetscBLASInt));
163: if (!agmres->GreatestEig) {
164: for (j = 0; j < r; j++) select[perm[j]] = 1;
165: } else {
166: for (j = 0; j < r; j++) select[perm[KspSize-j-1]] = 1;
167: }
168: #if defined(PETSC_MISSING_LAPACK_TGSEN)
169: SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_SUP,"GGES - Lapack routine is unavailable.");
170: #else
171: PetscStackCallBLAS("LAPACKtgsen",LAPACKtgsen_(&ijob, &wantQ, &wantZ, select, &KspSize, A, &ldA, B, &ldB, wr, wi, beta, Q, &N, Z, &N, &r, NULL, NULL, &(Dif[0]), work, &lwork, iwork, &liwork, &info));
172: if (info == 1) SETERRQ(PetscObjectComm((PetscObject)ksp),PETSC_ERR_PLIB, "UNABLE TO REORDER THE EIGENVALUES WITH THE LAPACK ROUTINE : ILL-CONDITIONED PROBLEM");
173: #endif
174: /* Extract the Schur vectors associated to the r smallest eigenvalues */
175: PetscMemzero(Sr,(N+1)*r*sizeof(PetscScalar));
176: for (j = 0; j < r; j++) {
177: for (i = 0; i < KspSize; i++) {
178: Sr[j*(N+1)+i] = Z[j*N+i];
179: }
180: }
182: /* Broadcast Sr to all other processes to have consistent data;
183: * FIXME should investigate how to get unique Schur vectors (unique QR factorization, probably the sign of rotations) */
184: MPI_Bcast(Sr, (N+1)*r, MPIU_SCALAR, agmres->First, PetscObjectComm((PetscObject)ksp));
185: /* Update the Shift values for the Newton basis. This is surely necessary when applying the DeflationPrecond */
186: if (agmres->DeflPrecond) {
187: KSPAGMRESLejaOrdering(wr, wi, agmres->Rshift, agmres->Ishift, max_k);
188: }
189: *CurNeig = r; /* Number of extracted eigenvalues */
190: return(0);
192: }
194: /*
195: * This function form the matrices for the generalized eigenvalue problem,
196: * it then compute the Schur vectors needed to augment the Newton basis.
197: */
198: PetscErrorCode KSPAGMRESComputeDeflationData(KSP ksp)
199: {
200: KSP_AGMRES *agmres = (KSP_AGMRES*)ksp->data;
201: Vec *U = agmres->U;
202: Vec *TmpU = agmres->TmpU;
203: PetscScalar *MatEigL = agmres->MatEigL;
204: PetscScalar *MatEigR = agmres->MatEigR;
205: PetscScalar *Sr = agmres->Sr;
206: PetscScalar alpha, beta;
207: PetscInt i,j;
209: PetscInt max_k = agmres->max_k; /* size of the non - augmented subspace */
210: PetscInt CurNeig; /* Current number of extracted eigenvalues */
211: PetscInt N = MAXKSPSIZE;
212: PetscBLASInt bN;
213: PetscInt lC = N + 1;
214: PetscInt KspSize = KSPSIZE;
215: PetscBLASInt blC,bKspSize;
216: PetscInt PrevNeig = agmres->r;
219: PetscLogEventBegin(KSP_AGMRESComputeDeflationData, ksp, 0,0,0);
220: if (agmres->neig <= 1) return(0);
221: /* Explicitly form MatEigL = H^T*H, It can also be formed as H^T+h_{N+1,N}H^-1e^T */
222: alpha = 1.0;
223: beta = 0.0;
224: PetscBLASIntCast(KspSize,&bKspSize);
225: PetscBLASIntCast(lC,&blC);
226: PetscBLASIntCast(N,&bN);
227: PetscStackCallBLAS("BLASgemm",BLASgemm_("T", "N", &bKspSize, &bKspSize, &blC, &alpha, agmres->hes_origin, &blC, agmres->hes_origin, &blC, &beta, MatEigL, &bN));
228: if (!agmres->ritz) {
229: /* Form TmpU = V*H where V is the Newton basis orthogonalized with roddec*/
230: for (j = 0; j < KspSize; j++) {
231: /* Apply the elementary reflectors (stored in Qloc) on H */
232: KSPAGMRESRodvec(ksp, KspSize+1, &agmres->hes_origin[j*lC], TmpU[j]);
233: }
234: /* Now form MatEigR = TmpU^T*W where W is [VEC_V(1:max_k); U] */
235: for (j = 0; j < max_k; j++) {
236: VecMDot(VEC_V(j), KspSize, TmpU, &MatEigR[j*N]);
237: }
238: for (j = max_k; j < KspSize; j++) {
239: VecMDot(U[j-max_k], KspSize, TmpU, &MatEigR[j*N]);
240: }
241: } else { /* Form H^T */
242: for (j = 0; j < N; j++) {
243: for (i = 0; i < N; i++) {
244: MatEigR[j*N+i] = agmres->hes_origin[i*lC+j];
245: }
246: }
247: }
248: /* Obtain the Schur form of the generalized eigenvalue problem MatEigL*y = \lambda*MatEigR*y */
249: if (agmres->DeflPrecond) {
250: KSPAGMRESSchurForm(ksp, KspSize, agmres->hes_origin, lC, agmres->Rloc, lC, PETSC_TRUE, Sr, &CurNeig);
251: } else {
252: KSPAGMRESSchurForm(ksp, KspSize, MatEigL, N, MatEigR, N, PETSC_FALSE, Sr, &CurNeig);
253: }
255: if (agmres->DeflPrecond) { /* Switch to DGMRES to improve the basis of the invariant subspace associated to the deflation */
256: agmres->HasSchur = PETSC_TRUE;
257: KSPDGMRESComputeDeflationData(ksp, &CurNeig);
258: return(0);
259: }
260: /* Form the Schur vectors in the entire subspace: U = W * Sr where W = [VEC_V(1:max_k); U]*/
261: for (j = 0; j < PrevNeig; j++) { /* First, copy U to a temporary place */
262: VecCopy(U[j], TmpU[j]);
263: }
265: for (j = 0; j < CurNeig; j++) {
266: VecZeroEntries(U[j]);
267: VecMAXPY(U[j], max_k, &Sr[j*(N+1)], &VEC_V(0));
268: VecMAXPY(U[j], PrevNeig, &Sr[j*(N+1)+max_k], TmpU);
269: }
270: agmres->r = CurNeig;
271: PetscLogEventEnd(KSP_AGMRESComputeDeflationData, ksp, 0,0,0);
272: return(0);
273: }