Actual source code: veccuda.c

petsc-3.8.4 2018-03-24
Report Typos and Errors
  1: /*
  2:  Implementation of the sequential cuda vectors.

  4:  This file contains the code that can be compiled with a C
  5:  compiler.  The companion file veccuda2.cu contains the code that
  6:  must be compiled with nvcc or a C++ compiler.
  7:  */

  9: #define PETSC_SKIP_SPINLOCK

 11: #include <petscconf.h>
 12: #include <petsc/private/vecimpl.h>          /*I <petscvec.h> I*/
 13:  #include <../src/vec/vec/impls/dvecimpl.h>
 14:  #include <../src/vec/vec/impls/seq/seqcuda/cudavecimpl.h>

 16: /*
 17:     Allocates space for the vector array on the Host if it does not exist.
 18:     Does NOT change the PetscCUDAFlag for the vector
 19:     Does NOT zero the CUDA array
 20:  */
 21: PetscErrorCode VecCUDAAllocateCheckHost(Vec v)
 22: {
 24:   PetscScalar    *array;
 25:   Vec_Seq        *s = (Vec_Seq*)v->data;
 26:   PetscInt       n = v->map->n;

 29:   if (!s) {
 30:     PetscNewLog((PetscObject)v,&s);
 31:     v->data = s;
 32:   }
 33:   if (!s->array) {
 34:     PetscMalloc1(n,&array);
 35:     PetscLogObjectMemory((PetscObject)v,n*sizeof(PetscScalar));
 36:     s->array           = array;
 37:     s->array_allocated = array;
 38:     if (v->valid_GPU_array == PETSC_CUDA_UNALLOCATED) {
 39:       v->valid_GPU_array = PETSC_CUDA_CPU;
 40:     }
 41:   }
 42:   return(0);
 43: }

 45: PetscErrorCode VecCopy_SeqCUDA_Private(Vec xin,Vec yin)
 46: {
 47:   PetscScalar       *ya;
 48:   const PetscScalar *xa;
 49:   PetscErrorCode    ierr;

 52:   VecCUDAAllocateCheckHost(xin);
 53:   VecCUDAAllocateCheckHost(yin);
 54:   if (xin != yin) {
 55:     VecGetArrayRead(xin,&xa);
 56:     VecGetArray(yin,&ya);
 57:     PetscMemcpy(ya,xa,xin->map->n*sizeof(PetscScalar));
 58:     VecRestoreArrayRead(xin,&xa);
 59:     VecRestoreArray(yin,&ya);
 60:   }
 61:   return(0);
 62: }

 64: PetscErrorCode VecSetRandom_SeqCUDA_Private(Vec xin,PetscRandom r)
 65: {
 67:   PetscInt       n = xin->map->n,i;
 68:   PetscScalar    *xx;

 71:   VecGetArray(xin,&xx);
 72:   for (i=0; i<n; i++) { PetscRandomGetValue(r,&xx[i]); }
 73:   VecRestoreArray(xin,&xx);
 74:   return(0);
 75: }

 77: PetscErrorCode VecDestroy_SeqCUDA_Private(Vec v)
 78: {
 79:   Vec_Seq        *vs = (Vec_Seq*)v->data;

 83:   PetscObjectSAWsViewOff(v);
 84: #if defined(PETSC_USE_LOG)
 85:   PetscLogObjectState((PetscObject)v,"Length=%D",v->map->n);
 86: #endif
 87:   if (vs) {
 88:     if (vs->array_allocated) { PetscFree(vs->array_allocated); }
 89:     PetscFree(vs);
 90:   }
 91:   return(0);
 92: }

 94: PetscErrorCode VecResetArray_SeqCUDA_Private(Vec vin)
 95: {
 96:   Vec_Seq *v = (Vec_Seq*)vin->data;

 99:   v->array         = v->unplacedarray;
100:   v->unplacedarray = 0;
101:   return(0);
102: }

104: PetscErrorCode VecCUDAAllocateCheck_Public(Vec v)
105: {

109:   VecCUDAAllocateCheck(v);
110:   return(0);
111: }

113: PetscErrorCode VecCUDACopyToGPU_Public(Vec v)
114: {

118:   VecCUDACopyToGPU(v);
119:   return(0);
120: }

122: /*
123:     VecCUDACopyToGPUSome_Public - Copies certain entries down to the GPU from the CPU of a vector

125:    Input Parameters:
126: .    v - the vector
127: .    indices - the requested indices, this should be created with CUDAIndicesCreate()

129: */
130: PetscErrorCode VecCUDACopyToGPUSome_Public(Vec v,PetscCUDAIndices ci)
131: {

135:   VecCUDACopyToGPUSome(v,ci);
136:   return(0);
137: }

139: /*
140:   VecCUDACopyFromGPUSome_Public - Copies certain entries up to the CPU from the GPU of a vector

142:   Input Parameters:
143:  +    v - the vector
144:  -    indices - the requested indices, this should be created with CUDAIndicesCreate()
145: */
146: PetscErrorCode VecCUDACopyFromGPUSome_Public(Vec v,PetscCUDAIndices ci)
147: {

151:   VecCUDACopyFromGPUSome(v,ci);
152:   return(0);
153: }

155: PetscErrorCode VecSetRandom_SeqCUDA(Vec xin,PetscRandom r)
156: {

160:   VecSetRandom_SeqCUDA_Private(xin,r);
161:   xin->valid_GPU_array = PETSC_CUDA_CPU;
162:   return(0);
163: }

165: PetscErrorCode VecResetArray_SeqCUDA(Vec vin)
166: {

170:   VecCUDACopyFromGPU(vin);
171:   VecResetArray_SeqCUDA_Private(vin);
172:   vin->valid_GPU_array = PETSC_CUDA_CPU;
173:   return(0);
174: }

176: PetscErrorCode VecPlaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
177: {

181:   VecCUDACopyFromGPU(vin);
182:   VecPlaceArray_Seq(vin,a);
183:   vin->valid_GPU_array = PETSC_CUDA_CPU;
184:   return(0);
185: }

187: PetscErrorCode VecReplaceArray_SeqCUDA(Vec vin,const PetscScalar *a)
188: {

192:   VecCUDACopyFromGPU(vin);
193:   VecReplaceArray_Seq(vin,a);
194:   vin->valid_GPU_array = PETSC_CUDA_CPU;
195:   return(0);
196: }

198: /*@
199:  VecCreateSeqCUDA - Creates a standard, sequential array-style vector.

201:  Collective on MPI_Comm

203:  Input Parameter:
204:  +  comm - the communicator, should be PETSC_COMM_SELF
205:  -  n - the vector length

207:  Output Parameter:
208:  .  V - the vector

210:  Notes:
211:  Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
212:  same type as an existing vector.

214:  Level: intermediate

216:  Concepts: vectors^creating sequential

218:  .seealso: VecCreateMPI(), VecCreate(), VecDuplicate(), VecDuplicateVecs(), VecCreateGhost()
219:  @*/
220: PetscErrorCode VecCreateSeqCUDA(MPI_Comm comm,PetscInt n,Vec *v)
221: {

225:   VecCreate(comm,v);
226:   VecSetSizes(*v,n,n);
227:   VecSetType(*v,VECSEQCUDA);
228:   return(0);
229: }

231: PetscErrorCode VecDuplicate_SeqCUDA(Vec win,Vec *V)
232: {

236:   VecCreateSeqCUDA(PetscObjectComm((PetscObject)win),win->map->n,V);
237:   PetscLayoutReference(win->map,&(*V)->map);
238:   PetscObjectListDuplicate(((PetscObject)win)->olist,&((PetscObject)(*V))->olist);
239:   PetscFunctionListDuplicate(((PetscObject)win)->qlist,&((PetscObject)(*V))->qlist);
240:   (*V)->stash.ignorenegidx = win->stash.ignorenegidx;
241:   return(0);
242: }

244: PetscErrorCode VecCreate_SeqCUDA(Vec V)
245: {

249:   PetscLayoutSetUp(V->map);
250:   VecCUDAAllocateCheck(V);
251:   V->valid_GPU_array = PETSC_CUDA_GPU;
252:   VecCreate_SeqCUDA_Private(V,((Vec_CUDA*)V->spptr)->GPUarray_allocated);
253:   VecSet(V,0.0);
254:   return(0);
255: }

257: /*@C
258:    VecCreateSeqCUDAWithArray - Creates a CUDA sequential array-style vector,
259:    where the user provides the array space to store the vector values. The array
260:    provided must be a GPU array.

262:    Collective on MPI_Comm

264:    Input Parameter:
265: +  comm - the communicator, should be PETSC_COMM_SELF
266: .  bs - the block size
267: .  n - the vector length
268: -  array - GPU memory where the vector elements are to be stored.

270:    Output Parameter:
271: .  V - the vector

273:    Notes:
274:    Use VecDuplicate() or VecDuplicateVecs() to form additional vectors of the
275:    same type as an existing vector.

277:    If the user-provided array is NULL, then VecCUDAPlaceArray() can be used
278:    at a later stage to SET the array for storing the vector values.

280:    PETSc does NOT free the array when the vector is destroyed via VecDestroy().
281:    The user should not free the array until the vector is destroyed.

283:    Level: intermediate

285:    Concepts: vectors^creating with array

287: .seealso: VecCreateMPICUDAWithArray(), VecCreate(), VecDuplicate(), VecDuplicateVecs(),
288:           VecCreateGhost(), VecCreateSeq(), VecCUDAPlaceArray(), VecCreateSeqWithArray(),
289:           VecCreateMPIWithArray()
290: @*/
291: PetscErrorCode  VecCreateSeqCUDAWithArray(MPI_Comm comm,PetscInt bs,PetscInt n,const PetscScalar array[],Vec *V)
292: {
294:   PetscMPIInt    size;

297:   VecCreate(comm,V);
298:   VecSetSizes(*V,n,n);
299:   VecSetBlockSize(*V,bs);
300:   MPI_Comm_size(comm,&size);
301:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQ on more than one process");
302:   VecCreate_SeqCUDA_Private(*V,array);
303:   return(0);
304: }

306: PetscErrorCode VecCreate_SeqCUDA_Private(Vec V,const PetscScalar *array)
307: {
309:   cudaError_t    err;
310:   Vec_CUDA       *veccuda;
311:   PetscMPIInt    size;

314:   MPI_Comm_size(PetscObjectComm((PetscObject)V),&size);
315:   if (size > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot create VECSEQCUDA on more than one process");
316:   VecCreate_Seq_Private(V,0);
317:   PetscObjectChangeTypeName((PetscObject)V,VECSEQCUDA);

319:   V->ops->dot                    = VecDot_SeqCUDA;
320:   V->ops->norm                   = VecNorm_SeqCUDA;
321:   V->ops->tdot                   = VecTDot_SeqCUDA;
322:   V->ops->scale                  = VecScale_SeqCUDA;
323:   V->ops->copy                   = VecCopy_SeqCUDA;
324:   V->ops->set                    = VecSet_SeqCUDA;
325:   V->ops->swap                   = VecSwap_SeqCUDA;
326:   V->ops->axpy                   = VecAXPY_SeqCUDA;
327:   V->ops->axpby                  = VecAXPBY_SeqCUDA;
328:   V->ops->axpbypcz               = VecAXPBYPCZ_SeqCUDA;
329:   V->ops->pointwisemult          = VecPointwiseMult_SeqCUDA;
330:   V->ops->pointwisedivide        = VecPointwiseDivide_SeqCUDA;
331:   V->ops->setrandom              = VecSetRandom_SeqCUDA;
332:   V->ops->dot_local              = VecDot_SeqCUDA;
333:   V->ops->tdot_local             = VecTDot_SeqCUDA;
334:   V->ops->norm_local             = VecNorm_SeqCUDA;
335:   V->ops->mdot_local             = VecMDot_SeqCUDA;
336:   V->ops->maxpy                  = VecMAXPY_SeqCUDA;
337:   V->ops->mdot                   = VecMDot_SeqCUDA;
338:   V->ops->aypx                   = VecAYPX_SeqCUDA;
339:   V->ops->waxpy                  = VecWAXPY_SeqCUDA;
340:   V->ops->dotnorm2               = VecDotNorm2_SeqCUDA;
341:   V->ops->placearray             = VecPlaceArray_SeqCUDA;
342:   V->ops->replacearray           = VecReplaceArray_SeqCUDA;
343:   V->ops->resetarray             = VecResetArray_SeqCUDA;
344:   V->ops->destroy                = VecDestroy_SeqCUDA;
345:   V->ops->duplicate              = VecDuplicate_SeqCUDA;
346:   V->ops->conjugate              = VecConjugate_SeqCUDA;
347:   V->ops->getlocalvector         = VecGetLocalVector_SeqCUDA;
348:   V->ops->restorelocalvector     = VecRestoreLocalVector_SeqCUDA;
349:   V->ops->getlocalvectorread     = VecGetLocalVector_SeqCUDA;
350:   V->ops->restorelocalvectorread = VecRestoreLocalVector_SeqCUDA;

352:   /* Later, functions check for the Vec_CUDA structure existence, so do not create it without array */
353:   if (array) {
354:     if (!V->spptr) {
355:       PetscMalloc(sizeof(Vec_CUDA),&V->spptr);
356:       veccuda = (Vec_CUDA*)V->spptr;
357:       err = cudaStreamCreate(&veccuda->stream);CHKERRCUDA(err);
358:       veccuda->GPUarray_allocated = 0;
359:       veccuda->hostDataRegisteredAsPageLocked = PETSC_FALSE;
360:       V->valid_GPU_array = PETSC_CUDA_UNALLOCATED;
361:     }
362:     veccuda = (Vec_CUDA*)V->spptr;
363:     veccuda->GPUarray = (PetscScalar*)array;
364:   }
365:   return(0);
366: }