Actual source code: sacusp.cu

petsc-3.8.4 2018-03-24
Report Typos and Errors

  2: /*  -------------------------------------------------------------------- */

  4: /*
  5:    Include files needed for the CUSP Smoothed Aggregation preconditioner:
  6:      pcimpl.h - private include file intended for use by all preconditioners
  7: */
  8: #define PETSC_SKIP_SPINLOCK
  9:  #include <petsc/private/pcimpl.h>
 10:  #include <../src/mat/impls/aij/seq/aij.h>
 11: #include <cusp/monitor.h>
 12: #include <cusp/version.h>
 13: #if CUSP_VERSION >= 400
 14: #include <cusp/precond/aggregation/smoothed_aggregation.h>
 15: #define cuspsaprecond cusp::precond::aggregation::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
 16: #else
 17: #include <cusp/precond/smoothed_aggregation.h>
 18: #define cuspsaprecond cusp::precond::smoothed_aggregation<PetscInt,PetscScalar,cusp::device_memory>
 19: #endif
 20:  #include <../src/vec/vec/impls/dvecimpl.h>
 21:  #include <../src/mat/impls/aij/seq/seqcusp/cuspmatimpl.h>
 22:  #include <../src/vec/vec/impls/seq/seqcusp/cuspvecimpl.h>

 24: /*
 25:    Private context (data structure) for the SACUSP preconditioner.
 26: */
 27: typedef struct {
 28:   cuspsaprecond * SACUSP;
 29:   /*int cycles; */
 30: } PC_SACUSP;

 32: /*
 33: static PetscErrorCode PCSACUSPSetCycles(PC pc, int n)
 34: {
 35:   PC_SACUSP      *sac = (PC_SACUSP*)pc->data;

 38:   sac->cycles = n;
 39:   return(0);

 41:   }*/

 43: /* -------------------------------------------------------------------------- */
 44: /*
 45:    PCSetUp_SACUSP - Prepares for the use of the SACUSP preconditioner
 46:                     by setting data structures and options.

 48:    Input Parameter:
 49: .  pc - the preconditioner context

 51:    Application Interface Routine: PCSetUp()

 53:    Notes:
 54:    The interface routine PCSetUp() is not usually called directly by
 55:    the user, but instead is called by PCApply() if necessary.
 56: */
 57: static PetscErrorCode PCSetUp_SACUSP(PC pc)
 58: {
 59:   PC_SACUSP      *sa = (PC_SACUSP*)pc->data;
 60:   PetscBool      flg = PETSC_FALSE;
 62: #if !defined(PETSC_USE_COMPLEX)
 63:   // protect these in order to avoid compiler warnings. This preconditioner does
 64:   // not work for complex types.
 65:   Mat_SeqAIJCUSP *gpustruct;
 66: #endif

 69:   PetscObjectTypeCompare((PetscObject)pc->pmat,MATSEQAIJCUSP,&flg);
 70:   if (!flg) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Currently only handles CUSP matrices");
 71:   if (pc->setupcalled != 0) {
 72:     try {
 73:       delete sa->SACUSP;
 74:     } catch(char *ex) {
 75:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
 76:     }
 77:   }
 78:   try {
 79: #if defined(PETSC_USE_COMPLEX)
 80:     sa->SACUSP = 0;CHKERRQ(1); /* TODO */
 81: #else
 82:     MatCUSPCopyToGPU(pc->pmat);
 83:     gpustruct = (Mat_SeqAIJCUSP*)(pc->pmat->spptr);
 84: 
 85:     if (gpustruct->format==MAT_CUSP_ELL) {
 86:       CUSPMATRIXELL *mat = (CUSPMATRIXELL*)gpustruct->mat;
 87:       sa->SACUSP = new cuspsaprecond(*mat);
 88:     } else if (gpustruct->format==MAT_CUSP_DIA) {
 89:       CUSPMATRIXDIA *mat = (CUSPMATRIXDIA*)gpustruct->mat;
 90:       sa->SACUSP = new cuspsaprecond(*mat);
 91:     } else {
 92:       CUSPMATRIX *mat = (CUSPMATRIX*)gpustruct->mat;
 93:       sa->SACUSP = new cuspsaprecond(*mat);
 94:     }
 95: #endif

 97:   } catch(char *ex) {
 98:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
 99:   }
100:   /*PetscOptionsInt("-pc_sacusp_cycles","Number of v-cycles to perform","PCSACUSPSetCycles",sa->cycles,
101:     &sa->cycles,NULL);*/
102:   return(0);
103: }

105: static PetscErrorCode PCApplyRichardson_SACUSP(PC pc, Vec b, Vec y, Vec w,PetscReal rtol, PetscReal abstol, PetscReal dtol, PetscInt its, PetscBool guesszero,PetscInt *outits,PCRichardsonConvergedReason *reason)
106: {
107: #if !defined(PETSC_USE_COMPLEX)
108:   // protect these in order to avoid compiler warnings. This preconditioner does
109:   // not work for complex types.
110:   PC_SACUSP *sac = (PC_SACUSP*)pc->data;
111: #endif
113:   CUSPARRAY      *barray,*yarray;

116:   /* how to incorporate dtol, guesszero, w?*/
118:   VecCUSPGetArrayRead(b,&barray);
119:   VecCUSPGetArrayReadWrite(y,&yarray);
120: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500
121:   cusp::monitor<PetscReal> monitor(*barray,its,rtol,abstol);
122: #else
123:   cusp::default_monitor<PetscReal> monitor(*barray,its,rtol,abstol);
124: #endif
125: #if defined(PETSC_USE_COMPLEX)
126:   CHKERRQ(1);
127:   /* TODO */
128: #else
129:   sac->SACUSP->solve(*barray,*yarray,monitor);
130:   *outits = monitor.iteration_count();
131:   if (monitor.converged()) *reason = PCRICHARDSON_CONVERGED_RTOL; /* how to discern between converging from RTOL or ATOL?*/
132:   else *reason = PCRICHARDSON_CONVERGED_ITS;
133: #endif
134:   PetscObjectStateIncrease((PetscObject)y);
135:   VecCUSPRestoreArrayRead(b,&barray);
136:   VecCUSPRestoreArrayReadWrite(y,&yarray);
137:   return(0);
138: }

140: /* -------------------------------------------------------------------------- */
141: /*
142:    PCApply_SACUSP - Applies the SACUSP preconditioner to a vector.

144:    Input Parameters:
145: .  pc - the preconditioner context
146: .  x - input vector

148:    Output Parameter:
149: .  y - output vector

151:    Application Interface Routine: PCApply()
152:  */
153: static PetscErrorCode PCApply_SACUSP(PC pc,Vec x,Vec y)
154: {
155:   PC_SACUSP      *sac = (PC_SACUSP*)pc->data;
157:   PetscBool      flg1,flg2;
158:   CUSPARRAY      *xarray=NULL,*yarray=NULL;

161:   /*how to apply a certain fixed number of iterations?*/
162:   PetscObjectTypeCompare((PetscObject)x,VECSEQCUSP,&flg1);
163:   PetscObjectTypeCompare((PetscObject)y,VECSEQCUSP,&flg2);
164:   if (!(flg1 && flg2)) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP, "Currently only handles CUSP vectors");
165:   if (!sac->SACUSP) {
166:     PCSetUp_SACUSP(pc);
167:   }
168:   VecSet(y,0.0);
169:   VecCUSPGetArrayRead(x,&xarray);
170:   VecCUSPGetArrayWrite(y,&yarray);
171:   try {
172: #if defined(PETSC_USE_COMPLEX)

174: #else
175:     cusp::multiply(*sac->SACUSP,*xarray,*yarray);
176: #endif
177:   } catch(char * ex) {
178:     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
179:   }
180:   VecCUSPRestoreArrayRead(x,&xarray);
181:   VecCUSPRestoreArrayWrite(y,&yarray);
182:   PetscObjectStateIncrease((PetscObject)y);
183:   return(0);
184: }
185: /* -------------------------------------------------------------------------- */
186: /*
187:    PCDestroy_SACUSP - Destroys the private context for the SACUSP preconditioner
188:    that was created with PCCreate_SACUSP().

190:    Input Parameter:
191: .  pc - the preconditioner context

193:    Application Interface Routine: PCDestroy()
194: */
195: static PetscErrorCode PCDestroy_SACUSP(PC pc)
196: {
197:   PC_SACUSP      *sac = (PC_SACUSP*)pc->data;

201:   if (sac->SACUSP) {
202:     try {
203:       delete sac->SACUSP;
204:     } catch(char * ex) {
205:       SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error: %s", ex);
206:     }
207:   }

209:   /*
210:       Free the private data structure that was hanging off the PC
211:   */
212:   PetscFree(pc->data);
213:   return(0);
214: }

216: static PetscErrorCode PCSetFromOptions_SACUSP(PetscOptionItems *PetscOptionsObject,PC pc)
217: {

221:   PetscOptionsHead(PetscOptionsObject,"SACUSP options");
222:   PetscOptionsTail();
223:   return(0);
224: }

226: /* -------------------------------------------------------------------------- */


229: /*MC
230:      PCSACUSP  - A smoothed agglomeration algorithm that runs on the Nvidia GPU.


233:     http://research.nvidia.com/sites/default/files/publications/nvr-2011-002.pdf

235:    Level: advanced

237: .seealso:  PCCreate(), PCSetType(), PCType (for list of available types), PC

239: M*/

241: PETSC_EXTERN PetscErrorCode PCCreate_SACUSP(PC pc)
242: {
243:   PC_SACUSP      *sac;

247:   /*
248:      Creates the private data structure for this preconditioner and
249:      attach it to the PC object.
250:   */
251:   PetscNewLog(pc,&sac);
252:   pc->data = (void*)sac;

254:   /*
255:      Initialize the pointer to zero
256:      Initialize number of v-cycles to default (1)
257:   */
258:   sac->SACUSP = 0;
259:   /*sac->cycles=1;*/


262:   /*
263:       Set the pointers for the functions that are provided above.
264:       Now when the user-level routines (such as PCApply(), PCDestroy(), etc.)
265:       are called, they will automatically call these functions.  Note we
266:       choose not to provide a couple of these functions since they are
267:       not needed.
268:   */
269:   pc->ops->apply               = PCApply_SACUSP;
270:   pc->ops->applytranspose      = 0;
271:   pc->ops->setup               = PCSetUp_SACUSP;
272:   pc->ops->destroy             = PCDestroy_SACUSP;
273:   pc->ops->setfromoptions      = PCSetFromOptions_SACUSP;
274:   pc->ops->view                = 0;
275:   pc->ops->applyrichardson     = PCApplyRichardson_SACUSP;
276:   pc->ops->applysymmetricleft  = 0;
277:   pc->ops->applysymmetricright = 0;
278:   return(0);
279: }