Actual source code: cuspvecimpl.h

petsc-3.8.4 2018-03-24
Report Typos and Errors

  4: #if defined(__CUDACC__)

  6:  #include <petsccusp.h>
  7:  #include <petsc/private/vecimpl.h>

  9: #include <algorithm>
 10: #include <vector>
 11: #include <string>

 13: #if defined(CUSP_VERSION) && CUSP_VERSION >= 500
 14: #include <cusp/blas/blas.h>
 15: #else
 16: #include <cusp/blas.h>
 17: #endif
 18: #include <thrust/host_vector.h>
 19: #include <thrust/device_vector.h>
 20: #include <thrust/iterator/constant_iterator.h>
 21: #include <thrust/transform.h>
 22: #include <thrust/iterator/permutation_iterator.h>

 24: #define CUSPARRAY cusp::array1d<PetscScalar,cusp::device_memory>
 25: #define CUSPARRAYCPU cusp::array1d<PetscScalar,cusp::host_memory>
 26: #define CUSPINTARRAYGPU cusp::array1d<PetscInt,cusp::device_memory>
 27: #define CUSPINTARRAYCPU cusp::array1d<PetscInt,cusp::host_memory>
 28: #define CHKERRCUSP(err) if (((int)err) != (int)CUBLAS_STATUS_SUCCESS) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"CUSP error %d",err)
 29: #define VecCUSPCastToRawPtr(x) thrust::raw_pointer_cast(&(x)[0])
 30: #define WaitForGPU() PetscCUSPSynchronize ? cudaThreadSynchronize() : 0

 32: struct Vec_CUSP {
 33:   CUSPARRAY *GPUarray;        /* this always holds the GPU data */
 34:   cudaStream_t stream;        /* A stream for doing asynchronous data transfers */
 35:   PetscBool hostDataRegisteredAsPageLocked;
 36: };

 38: #endif


 41: #include <cuda_runtime.h>

 43: PETSC_INTERN PetscErrorCode VecDotNorm2_SeqCUSP(Vec,Vec,PetscScalar*, PetscScalar*);
 44: PETSC_INTERN PetscErrorCode VecPointwiseDivide_SeqCUSP(Vec,Vec,Vec);
 45: PETSC_INTERN PetscErrorCode VecWAXPY_SeqCUSP(Vec,PetscScalar,Vec,Vec);
 46: PETSC_INTERN PetscErrorCode VecMDot_SeqCUSP(Vec,PetscInt,const Vec[],PetscScalar*);
 47: PETSC_EXTERN PetscErrorCode VecSet_SeqCUSP(Vec,PetscScalar);
 48: PETSC_INTERN PetscErrorCode VecMAXPY_SeqCUSP(Vec,PetscInt,const PetscScalar*,Vec*);
 49: PETSC_INTERN PetscErrorCode VecAXPBYPCZ_SeqCUSP(Vec,PetscScalar,PetscScalar,PetscScalar,Vec,Vec);
 50: PETSC_INTERN PetscErrorCode VecPointwiseMult_SeqCUSP(Vec,Vec,Vec);
 51: PETSC_INTERN PetscErrorCode VecPlaceArray_SeqCUSP(Vec,const PetscScalar*);
 52: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP(Vec);
 53: PETSC_INTERN PetscErrorCode VecReplaceArray_SeqCUSP(Vec,const PetscScalar*);
 54: PETSC_INTERN PetscErrorCode VecDot_SeqCUSP(Vec,Vec,PetscScalar*);
 55: PETSC_INTERN PetscErrorCode VecTDot_SeqCUSP(Vec,Vec,PetscScalar*);
 56: PETSC_INTERN PetscErrorCode VecScale_SeqCUSP(Vec,PetscScalar);
 57: PETSC_EXTERN PetscErrorCode VecCopy_SeqCUSP(Vec,Vec);
 58: PETSC_INTERN PetscErrorCode VecSwap_SeqCUSP(Vec,Vec);
 59: PETSC_INTERN PetscErrorCode VecAXPY_SeqCUSP(Vec,PetscScalar,Vec);
 60: PETSC_INTERN PetscErrorCode VecAXPBY_SeqCUSP(Vec,PetscScalar,PetscScalar,Vec);
 61: PETSC_INTERN PetscErrorCode VecDuplicate_SeqCUSP(Vec,Vec*);
 62: PETSC_INTERN PetscErrorCode VecConjugate_SeqCUSP(Vec xin);
 63: PETSC_INTERN PetscErrorCode VecNorm_SeqCUSP(Vec,NormType,PetscReal*);
 64: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU(Vec);
 65: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck(Vec);
 66: PETSC_EXTERN PetscErrorCode VecCreate_SeqCUSP(Vec);
 67: PETSC_EXTERN PetscErrorCode VecView_Seq(Vec,PetscViewer);
 68: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP(Vec);
 69: PETSC_INTERN PetscErrorCode VecAYPX_SeqCUSP(Vec,PetscScalar,Vec);
 70: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP(Vec,PetscRandom);
 71: PETSC_INTERN PetscErrorCode VecGetLocalVector_SeqCUSP(Vec,Vec);
 72: PETSC_INTERN PetscErrorCode VecRestoreLocalVector_SeqCUSP(Vec,Vec);
 73: PETSC_INTERN PetscErrorCode VecCopy_SeqCUSP_Private(Vec xin,Vec yin);
 74: PETSC_INTERN PetscErrorCode VecSetRandom_SeqCUSP_Private(Vec xin,PetscRandom r);
 75: PETSC_INTERN PetscErrorCode VecDestroy_SeqCUSP_Private(Vec v);
 76: PETSC_INTERN PetscErrorCode VecResetArray_SeqCUSP_Private(Vec vin);
 77: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPU_Public(Vec);
 78: PETSC_INTERN PetscErrorCode VecCUSPAllocateCheck_Public(Vec);
 79: PETSC_INTERN PetscErrorCode VecCUSPCopyToGPUSome(Vec v, PetscCUSPIndices ci);
 80: PETSC_INTERN PetscErrorCode VecCUSPCopyFromGPUSome(Vec v, PetscCUSPIndices ci);


 83: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_PtoP(PetscInt, PetscInt*,PetscInt, PetscInt*,PetscCUSPIndices*);
 84: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesCreate_StoS(PetscInt,PetscInt,PetscInt,PetscInt,PetscInt,PetscInt*,PetscInt*,PetscCUSPIndices*);
 85: PETSC_INTERN PetscErrorCode VecScatterCUSPIndicesDestroy(PetscCUSPIndices*);
 86: PETSC_INTERN PetscErrorCode VecScatterCUSP_StoS(Vec,Vec,PetscCUSPIndices,InsertMode,ScatterMode);

 88: typedef enum {VEC_SCATTER_CUSP_STOS, VEC_SCATTER_CUSP_PTOP} VecCUSPScatterType;
 89: typedef enum {VEC_SCATTER_CUSP_GENERAL, VEC_SCATTER_CUSP_STRIDED} VecCUSPSequentialScatterMode;

 91: struct  _p_VecScatterCUSPIndices_PtoP {
 92:   PetscInt ns;
 93:   PetscInt sendLowestIndex;
 94:   PetscInt nr;
 95:   PetscInt recvLowestIndex;
 96: };

 98: struct  _p_VecScatterCUSPIndices_StoS {
 99:   /* from indices data */
100:   PetscInt *fslots;
101:   PetscInt fromFirst;
102:   PetscInt fromStep;
103:   VecCUSPSequentialScatterMode fromMode;

105:   /* to indices data */
106:   PetscInt *tslots;
107:   PetscInt toFirst;
108:   PetscInt toStep;
109:   VecCUSPSequentialScatterMode toMode;

111:   PetscInt n;
112:   PetscInt MAX_BLOCKS;
113:   PetscInt MAX_CORESIDENT_THREADS;
114:   cudaStream_t stream;
115: };

117: struct  _p_PetscCUSPIndices {
118:   void * scatter;
119:   VecCUSPScatterType scatterType;
120: };

122: #endif