Actual source code: pipelcg.c
petsc-3.9.3 2018-07-02
1: #include <petsc/private/kspimpl.h>
2: #include <petsc/private/vecimpl.h>
4: #define offset(j) PetscMax(((j) - (2*l)), 0)
5: #define shift(i,j) ((i) - offset((j)))
6: #define G(i,j) (plcg->G[((j)*(2*l+1))+(shift((i),(j))) ])
7: #define G_noshift(i,j) (plcg->G[((j)*(2*l+1))+(i)])
8: #define alpha(i) (plcg->alpha[(i)])
9: #define gamma(i) (plcg->gamma[(i)])
10: #define delta(i) (plcg->delta[(i)])
11: #define sigma(i) (plcg->sigma[(i)])
12: #define req(i) (plcg->req[(i)])
14: typedef struct KSP_CG_PIPE_L_s KSP_CG_PIPE_L;
15: struct KSP_CG_PIPE_L_s {
16: PetscInt l; /* pipeline depth */
17: Vec *Z; /* Z vector (shifted base) */
18: Vec *V; /* V vector (original base) */
19: Vec z_2; /* additional vector needed when l == 1 */
20: Vec p,u,up,upp; /* some work vectors */
21: PetscScalar *G; /* such that Z = VG (band matrix)*/
22: PetscScalar *gamma,*delta,*alpha;
23: PetscReal lmin,lmax; /* min and max eigen values estimates to compute base shifts */
24: PetscReal *sigma; /* base shifts */
25: MPI_Request *req; /* request array for asynchronous global collective */
26: };
28: /**
29: * KSPSetUp_PIPELCG - Sets up the workspace needed by the PIPELCG method.
30: *
31: * This is called once, usually automatically by KSPSolve() or KSPSetUp()
32: * but can be called directly by KSPSetUp()
33: */
34: static PetscErrorCode KSPSetUp_PIPELCG(KSP ksp)
35: {
37: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
38: PetscInt l=plcg->l,max_it=ksp->max_it;
39: MPI_Comm comm;
42: comm = PetscObjectComm((PetscObject)ksp);
43: if (max_it < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: max_it argument must be positive.",((PetscObject)ksp)->type_name);
44: if (l < 1) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be positive.",((PetscObject)ksp)->type_name);
45: if (l > max_it) SETERRQ1(comm,PETSC_ERR_ARG_OUTOFRANGE,"%s: pipel argument must be less than max_it.",((PetscObject)ksp)->type_name);
47: KSPSetWorkVecs(ksp,4); /* get work vectors needed by PIPELCG */
48: plcg->p = ksp->work[0];
49: plcg->u = ksp->work[1];
50: plcg->up = ksp->work[2];
51: plcg->upp = ksp->work[3];
53: VecDuplicateVecs(plcg->p,l+1,&plcg->Z);
54: VecDuplicateVecs(plcg->p,2*l+1,&plcg->V);
55: PetscCalloc1(2*l,&plcg->alpha);
56: PetscCalloc1(l,&plcg->sigma);
57: if (l == 1) {
58: VecDuplicate(plcg->p,&plcg->z_2);
59: }
61: return(0);
62: }
64: static PetscErrorCode KSPReset_PIPELCG(KSP ksp)
65: {
66: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
67: PetscInt ierr=0,l=plcg->l;
70: PetscFree(plcg->sigma);
71: PetscFree(plcg->alpha);
72: VecDestroyVecs(l+1,&plcg->Z);
73: VecDestroyVecs(2*l+1,&plcg->V);
74: VecDestroy(&plcg->z_2);
75: return(0);
76: }
78: static PetscErrorCode KSPDestroy_PIPELCG(KSP ksp)
79: {
80: PetscInt ierr=0;
83: KSPReset_PIPELCG(ksp);
84: KSPDestroyDefault(ksp);
85: return(0);
86: }
88: static PetscErrorCode KSPSetFromOptions_PIPELCG(PetscOptionItems *PetscOptionsObject,KSP ksp)
89: {
90: PetscInt ierr=0;
91: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
92: PetscBool flag=PETSC_FALSE;
95: PetscOptionsHead(PetscOptionsObject,"KSP PIPELCG options");
96: PetscOptionsInt("-ksp_pipelcg_pipel","Pipeline length","",plcg->l,&plcg->l,&flag);
97: if (!flag) plcg->l = 1;
98: PetscOptionsReal("-ksp_pipelcg_lmin","Estimate for smallest eigenvalue","",plcg->lmin,&plcg->lmin,&flag);
99: if (!flag) plcg->lmin = 0.0;
100: PetscOptionsReal("-ksp_pipelcg_lmax","Estimate for largest eigenvalue","",plcg->lmax,&plcg->lmax,&flag);
101: if (!flag) plcg->lmax = 0.0;
102: PetscOptionsTail();
103: return(0);
104: }
106: static PetscErrorCode MPIPetsc_Iallreduce(void *sendbuf,void *recvbuf,PetscMPIInt count,MPI_Datatype datatype,MPI_Op op,MPI_Comm comm,MPI_Request *request)
107: {
108: PetscErrorCode ierr=0;
111: #if defined(PETSC_HAVE_MPI_IALLREDUCE)
112: MPI_Iallreduce(sendbuf,recvbuf,count,datatype,op,comm,request);
113: #else
114: MPIU_Allreduce(sendbuf,recvbuf,count,datatype,op,comm);
115: *request = MPI_REQUEST_NULL;
116: #endif
117: return(0);
118: }
120: static PetscErrorCode KSPView_PIPELCG(KSP ksp,PetscViewer viewer)
121: {
122: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
123: PetscErrorCode ierr=0;
124: PetscBool iascii=PETSC_FALSE,isstring=PETSC_FALSE;
127: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
128: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSTRING,&isstring);
129: if (iascii) {
130: PetscViewerASCIIPrintf(viewer," Pipeline depth: %D\n", plcg->l);
131: PetscViewerASCIIPrintf(viewer," Minimal eigen value estimate %g\n",plcg->lmin);
132: PetscViewerASCIIPrintf(viewer," Maximal eigen value estimate %g\n",plcg->lmax);
133: } else if (isstring) {
134: PetscViewerStringSPrintf(viewer," Pipeline depth: %D\n", plcg->l);
135: PetscViewerStringSPrintf(viewer," Minimal eigen value estimate %g\n",plcg->lmin);
136: PetscViewerStringSPrintf(viewer," Maximal eigen value estimate %g\n",plcg->lmax);
137: }
138: return(0);
139: }
141: static PetscErrorCode KSPSolve_InnerLoop_PIPELCG(KSP ksp)
142: {
143: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
144: Mat A=NULL,Pmat=NULL;
145: PetscInt it=0,max_it=ksp->max_it,ierr=0,l=plcg->l,i=0,j=0,k=0;
146: PetscInt start=0,middle=0,end=0;
147: Vec *Z = plcg->Z,*V=plcg->V;
148: Vec x=NULL,p=NULL,u=NULL,up=NULL,upp=NULL,temp=NULL,temp2[2],z_2=NULL;
149: PetscScalar sum_dummy=0.0,eta=0.0,zeta=0.0,lambda=0.0;
150: PetscReal dp=0.0,tmp=0.0,beta=0.0,invbeta2=0.0;
151: MPI_Comm comm;
154: x = ksp->vec_sol;
155: p = plcg->p;
156: u = plcg->u;
157: up = plcg->up;
158: upp = plcg->upp;
159: z_2 = plcg->z_2;
161: comm = PetscObjectComm((PetscObject)ksp);
162: PCGetOperators(ksp->pc,&A,&Pmat);
164: for (it = 0; it < max_it+l; ++it) {
165: /* ----------------------------------- */
166: /* Multiplication z_{it+1} = Az_{it} */
167: /* ----------------------------------- */
168: VecCopy(up,upp);
169: VecCopy(u,up);
170: if (it < l) {
171: /* SpMV and Prec */
172: MatMult(A,Z[l-it],u);
173: KSP_PCApply(ksp,u,Z[l-it-1]);
174: /* Apply shift */
175: VecAXPY(Z[l-it-1],-sigma(it),Z[l-it]);
176: VecAXPY(u,-sigma(it),Z[l-it]);
177: } else {
178: /* Shift the Z vector pointers */
179: if (l == 1) {
180: VecCopy(Z[l],z_2);
181: }
182: temp = Z[l];
183: for (i = l; i > 0; --i) {
184: Z[i] = Z[i-1];
185: }
186: Z[0] = temp;
187: MatMult(A,Z[1],u);
188: KSP_PCApply(ksp,u,Z[0]);
189: }
191: /* ----------------------------------- */
192: /* Adjust the G matrix */
193: /* ----------------------------------- */
194: if (it >= l) {
195: if (it == l) {
196: /* MPI_Wait for G(0,0),scale V0 and Z and u vectors with 1/beta */
197: MPI_Wait(&req(0),MPI_STATUS_IGNORE);
198: beta = PetscSqrtReal(PetscRealPart(G(0,0)));
199: G(0,0) = 1.0;
200: VecAXPY(V[2*l],1.0/beta,p);
201: for (j = 0; j <= l; ++j) {
202: VecScale(Z[j],1.0/beta);
203: }
204: VecScale(u,1.0/beta);
205: VecScale(up,1.0/beta);
206: VecScale(upp,1.0/beta);
207: }
209: /* MPI_Wait until the dot products,started l iterations ago,are completed */
210: MPI_Wait(&req(it-l+1),MPI_STATUS_IGNORE);
211: if (it <= 2*l-1) {
212: invbeta2 = 1.0 / (beta * beta);
213: /* scale column 1 up to column l of G with 1/beta^2 */
214: for (j = PetscMax(it-3*l+1,0); j <= it-l+1; ++j) {
215: G(j,it-l+1) *= invbeta2;
216: }
217: }
219: for (j = PetscMax(it-2*l+2,0); j <= it-l; ++j) {
220: sum_dummy = 0.0;
221: for (k = PetscMax(it-3*l+1,0); k <= j-1; ++k) {
222: sum_dummy = sum_dummy + G(k,j) * G(k,it-l+1);
223: }
224: G(j,it-l+1) = (G(j,it-l+1) - sum_dummy) / G(j,j);
225: }
227: sum_dummy = 0.0;
228: for (k = PetscMax(it-3*l+1,0); k <= it-l; ++k) {
229: sum_dummy = sum_dummy + G(k,it-l+1) * G(k,it-l+1);
230: }
232: /* Breakdown check */
233: tmp = PetscRealPart(G(it-l+1,it-l+1) - sum_dummy);
234: if (tmp < 0) {
235: PetscPrintf(comm,"sqrt breakdown in iteration %d: value is %e. Iteration was restarted.\n",ksp->its+1,tmp);
236: /* End hanging dot-products in the pipeline before exiting for-loop */
237: start = it-l+2;
238: end = PetscMin(it+1,max_it+1); /* !warning! 'it' can actually be greater than 'max_it' */
239: for (i = start; i < end; ++i) {
240: MPI_Wait(&req(i),MPI_STATUS_IGNORE);
241: }
242: break;
243: }
244: G(it-l+1,it-l+1) = PetscSqrtReal(tmp);
246: if (it < 2*l) {
247: if (it == l) {
248: gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)) / G(it-l,it-l);
249: } else {
250: gamma(it-l) = (G(it-l,it-l+1) + sigma(it-l) * G(it-l,it-l)
251: - delta(it-l-1) * G(it-l-1,it-l)) / G(it-l,it-l);
252: }
253: delta(it-l) = G(it-l+1,it-l+1) / G(it-l,it-l);
254: } else {
255: gamma(it-l) = (G(it-l,it-l) * gamma(it-2*l)
256: + G(it-l,it-l+1) * delta(it-2*l)
257: - G(it-l-1,it-l) * delta(it-l-1)) / G(it-l,it-l);
258: delta(it-l) = (G(it-l+1,it-l+1) * delta(it-2*l)) / G(it-l,it-l);
259: }
261: /* -------------------------------------------- */
262: /* Recursively compute the next V and Z vectors */
263: /* -------------------------------------------- */
264: /* Recurrence V vectors */
265: if (it < 3*l) {
266: VecAXPY(V[3*l-it-1],1.0/G(it-l+1,it-l+1),Z[l]);
267: for (j = 0; j <= it-l; ++j) {
268: alpha(it-l-j) = -G(j,it-l+1)/G(it-l+1,it-l+1);
269: }
270: VecMAXPY(V[3*l-it-1],it-l+1,&alpha(0),&V[3*l-it]);
271: } else {
272: /* Shift the V vector pointers */
273: temp = V[2*l];
274: for (i = 2*l; i>0; i--) {
275: V[i] = V[i-1];
276: }
277: V[0] = temp;
279: VecSet(V[0],0.0);
280: VecAXPY(V[0],1.0/G(it-l+1,it-l+1),Z[l]);
281: for (j = 0; j < 2*l; ++j) {
282: k = (it-3*l+1)+j;
283: alpha(2*l-1-j) = -G(k,it-l+1)/G(it-l+1,it-l+1);
284: }
285: VecMAXPY(V[0],2*l,&alpha(0),&V[1]);
286: }
287: /* Recurrence Z and U vectors */
288: if (it <= l) {
289: VecAXPY(Z[0],-gamma(it-l),Z[1]);
290: VecAXPY(u,-gamma(it-l),up);
291: } else {
292: alpha(0) = -delta(it-l-1);
293: alpha(1) = -gamma(it-l);
295: temp2[0] = (l==1) ? z_2 : Z[2];
296: temp2[1] = Z[1];
297: VecMAXPY(Z[0],2,&alpha(0),temp2);
299: temp2[0] = upp;
300: temp2[1] = up;
301: VecMAXPY(u,2,&alpha(0),temp2);
302: }
303: VecScale(Z[0],1.0/delta(it-l));
304: VecScale(u,1.0/delta(it-l));
305: }
307: /* ---------------------------------------- */
308: /* Compute and communicate the dot products */
309: /* ---------------------------------------- */
310: if (it < l) {
311: /* dot-product (Z_{it+1},z_j) */
312: for (j = 0; j < it+2; ++j) {
313: (*u->ops->dot_local)(u,Z[l-j],&G(j,it+1));
314: }
315: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,it+1),it+2,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
316: } else if ((it >= l) && (it < max_it)) {
317: start = PetscMax(0,it-2*l+1);
318: middle = it-l+2;
319: end = it+2;
320: for (j = start; j < middle; ++j) { /* dot-product (Z_{it+1},v_j) */
321: temp = (it < 3*l) ? plcg->V[2*l-j] : plcg->V[it-l+1-j];
322: (*u->ops->dot_local)(u,temp,&G(j,it+1));
323: }
324: for (j = middle; j < end; ++j) { /* dot-product (Z_{it+1},z_j) */
325: (*u->ops->dot_local)(u,plcg->Z[it+1-j],&G(j,it+1));
326: }
327: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(start,it+1),end-start,MPIU_SCALAR,MPIU_SUM,comm,&req(it+1));
328: }
330: /* ----------------------------------------- */
331: /* Compute solution vector and residual norm */
332: /* ----------------------------------------- */
333: if (it >= l) {
334: if (it == l) {
335: if (ksp->its != 0) {
336: ++ ksp->its;
337: }
338: eta = gamma(0);
339: zeta = beta;
340: VecCopy(V[2*l],p);
341: VecScale(p,1.0/eta);
342: VecAXPY(x,zeta,p);
344: dp = beta;
345: ksp->rnorm = dp;
346: KSPLogResidualHistory(ksp,dp);
347: KSPMonitor(ksp,ksp->its,dp);
348: (*ksp->converged)(ksp,ksp->its,dp,&ksp->reason,ksp->cnvP);
349: } else if (it > l) {
350: k = it-l;
351: ++ ksp->its;
352: lambda = delta(k-1) / eta;
353: eta = gamma(k) - lambda * delta(k-1);
354: zeta = -lambda * zeta;
355: VecScale(p,-delta(k-1)/eta);
356: VecAXPY(p,1.0/eta,(it < 3*l) ? V[3*l-it] : V[1]);
357: VecAXPY(x,zeta,p);
359: dp = PetscAbsScalar(zeta);
360: ksp->rnorm = dp;
361: KSPLogResidualHistory(ksp,dp);
362: KSPMonitor(ksp,ksp->its,dp);
363: (*ksp->converged)(ksp,ksp->its,dp,&ksp->reason,ksp->cnvP);
364: }
365: if (ksp->its >= max_it-1) {
366: ksp->reason = KSP_DIVERGED_ITS;
367: }
368: if (ksp->reason) {
369: /* End hanging dot-products in the pipeline before exiting for-loop */
370: start = it-l+2;
371: end = PetscMin(it+2,max_it+1); /* !warning! 'it' can actually be greater than 'max_it' */
372: for (i = start; i < end; ++i) {
373: MPI_Wait(&req(i),MPI_STATUS_IGNORE);
374: }
375: break;
376: }
377: }
378: } /* End inner for loop */
379: return(0);
380: }
382: static PetscErrorCode KSPSolve_ReInitData_PIPELCG(KSP ksp)
383: {
384: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
385: PetscInt ierr=0,i=0,j=0,l=plcg->l,max_it=ksp->max_it;
388: VecSet(plcg->up,0.0);
389: VecSet(plcg->upp,0.0);
390: for (i = 0; i < l+1; ++i) {
391: VecSet(plcg->Z[i],0.0);
392: }
393: for (i = 0; i < (2*l+1); ++i) {
394: VecSet(plcg->V[i],0.0);
395: }
396: for (j = 0; j < (max_it+1); ++j) {
397: gamma(j) = 0.0;
398: delta(j) = 0.0;
399: for (i = 0; i < (2*l+1); ++i) {
400: G_noshift(i,j) = 0.0;
401: }
402: }
403: return(0);
404: }
406: /**
407: * KSPSolve_PIPELCG - This routine actually applies the pipelined(l) conjugate gradient method
408: *
409: * Input Parameter:
410: * ksp - the Krylov space object that was set to use conjugate gradient,by,for
411: * example,KSPCreate(MPI_Comm,KSP *ksp); KSPSetType(ksp,KSPCG);
412: */
413: static PetscErrorCode KSPSolve_PIPELCG(KSP ksp)
414: {
415: PetscErrorCode ierr=0;
416: KSP_CG_PIPE_L *plcg = (KSP_CG_PIPE_L*)ksp->data;
417: Mat A=NULL,Pmat=NULL;
418: Vec b=NULL,x=NULL,p=NULL,u=NULL;
419: PetscInt max_it=ksp->max_it,l=plcg->l;
420: PetscInt i=0,outer_it=0,curr_guess_zero=0;
421: PetscReal lmin=plcg->lmin,lmax=plcg->lmax;
422: PetscBool diagonalscale=PETSC_FALSE,ispcnone=PETSC_FALSE;
423: MPI_Comm comm;
426: comm = PetscObjectComm((PetscObject)ksp);
427: PCGetDiagonalScale(ksp->pc,&diagonalscale);
428: if (diagonalscale) {
429: SETERRQ1(comm,PETSC_ERR_SUP,"Krylov method %s does not support diagonal scaling",((PetscObject)ksp)->type_name);
430: }
432: x = ksp->vec_sol;
433: b = ksp->vec_rhs;
434: p = plcg->p;
435: u = plcg->u;
437: PetscCalloc1((max_it+1)*(2*l+1),&plcg->G);
438: PetscCalloc1(max_it+1,&plcg->gamma);
439: PetscCalloc1(max_it+1,&plcg->delta);
440: PetscCalloc1(max_it+1,&plcg->req);
442: PCGetOperators(ksp->pc,&A,&Pmat);
443: PetscObjectTypeCompare((PetscObject)ksp->pc,PCNONE,&ispcnone);
444: if (ispcnone) {
445: for (i = 0; i < l; ++i) {
446: sigma(i) = (0.5*(lmin+lmax) + (0.5*(lmax-lmin) * PetscCosReal(PETSC_PI*(2.0*i+1.0)/(2.0*l))));
447: }
448: } else {
449: for (i = 0; i < l; ++i) {
450: sigma(i) = 0.0;
451: }
452: }
454: ksp->its = 0;
455: outer_it = 0;
456: curr_guess_zero = !! ksp->guess_zero;
458: while (ksp->its < max_it) { /* OUTER LOOP (gmres-like restart to handle breakdowns) */
459: /* * */
460: /* RESTART LOOP */
461: /* * */
462: if (!curr_guess_zero) {
463: KSP_MatMult(ksp,A,x,u); /* u <- b - Ax */
464: VecAYPX(u,-1.0,b);
465: } else {
466: VecCopy(b,u); /* u <- b (x is 0) */
467: }
468: KSP_PCApply(ksp,u,p); /* p <- Bu */
470: if (outer_it > 0) {
471: /* Re-initialize Z,V,gamma,delta,G,u,up,upp after restart occurred */
472: KSPSolve_ReInitData_PIPELCG(ksp);
473: }
475: (*u->ops->dot_local)(u,p,&G(0,0));
476: MPIPetsc_Iallreduce(MPI_IN_PLACE,&G(0,0),1,MPIU_SCALAR,MPIU_SUM,comm,&req(0));
477: VecCopy(p,plcg->Z[l]);
479: KSPSolve_InnerLoop_PIPELCG(ksp);
481: if (ksp->reason) break; /* convergence or divergence */
482: ++ outer_it;
483: curr_guess_zero = 0;
484: }
486: if (ksp->its >= max_it-1) {
487: ksp->reason = KSP_DIVERGED_ITS;
488: }
489: PetscFree(plcg->G);
490: PetscFree(plcg->gamma);
491: PetscFree(plcg->delta);
492: PetscFree(plcg->req);
493: return(0);
494: }
496: /*MC
497: KSPPIPELCG - Pipelined conjugate gradient method with deeper pipelines.
498: M*/
499: PETSC_EXTERN
500: PetscErrorCode KSPCreate_PIPELCG(KSP ksp)
501: {
503: KSP_CG_PIPE_L *plcg = NULL;
506: PetscNewLog(ksp,&plcg);
507: ksp->data = (void*)plcg;
509: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_LEFT,2);
510: KSPSetSupportedNorm(ksp,KSP_NORM_NATURAL,PC_LEFT,2);
512: ksp->ops->setup = KSPSetUp_PIPELCG;
513: ksp->ops->solve = KSPSolve_PIPELCG;
514: ksp->ops->reset = KSPReset_PIPELCG;
515: ksp->ops->destroy = KSPDestroy_PIPELCG;
516: ksp->ops->view = KSPView_PIPELCG;
517: ksp->ops->setfromoptions = KSPSetFromOptions_PIPELCG;
518: ksp->ops->buildsolution = KSPBuildSolutionDefault;
519: ksp->ops->buildresidual = KSPBuildResidualDefault;
520: return(0);
521: }