Actual source code: land_tensors.h

  1: #define LANDAU_INVSQRT(q) (1./PetscSqrtReal(q))

  3: #if defined(__CUDA_ARCH__)
  4: #define PETSC_DEVICE_FUNC_DECL __device__
  5: #elif defined(KOKKOS_INLINE_FUNCTION)
  6: #define PETSC_DEVICE_FUNC_DECL KOKKOS_INLINE_FUNCTION
  7: #else
  8: #define PETSC_DEVICE_FUNC_DECL static
  9: #endif

 11: #if LANDAU_DIM==2
 12: /* elliptic functions
 13:  */
 14: PETSC_DEVICE_FUNC_DECL PetscReal polevl_10(PetscReal x, const PetscReal coef[])
 15: {
 16:   PetscReal ans;
 17:   PetscInt  i;
 18:   ans = coef[0];
 19:   for (i=1; i<11; i++) ans = ans * x + coef[i];
 20:   return(ans);
 21: }
 22: PETSC_DEVICE_FUNC_DECL PetscReal polevl_9(PetscReal x, const PetscReal coef[])
 23: {
 24:   PetscReal ans;
 25:   PetscInt  i;
 26:   ans = coef[0];
 27:   for (i=1; i<10; i++) ans = ans * x + coef[i];
 28:   return(ans);
 29: }
 30: /*
 31:  *      Complete elliptic integral of the second kind
 32:  */
 33: PETSC_DEVICE_FUNC_DECL void ellipticE(PetscReal x,PetscReal *ret)
 34: {
 35: #if defined(PETSC_USE_REAL_SINGLE)
 36:   static const PetscReal P2[] = {
 37:     1.53552577301013293365E-4F,
 38:     2.50888492163602060990E-3F,
 39:     8.68786816565889628429E-3F,
 40:     1.07350949056076193403E-2F,
 41:     7.77395492516787092951E-3F,
 42:     7.58395289413514708519E-3F,
 43:     1.15688436810574127319E-2F,
 44:     2.18317996015557253103E-2F,
 45:     5.68051945617860553470E-2F,
 46:     4.43147180560990850618E-1F,
 47:     1.00000000000000000299E0F
 48:   };
 49:   static const PetscReal Q2[] = {
 50:     3.27954898576485872656E-5F,
 51:     1.00962792679356715133E-3F,
 52:     6.50609489976927491433E-3F,
 53:     1.68862163993311317300E-2F,
 54:     2.61769742454493659583E-2F,
 55:     3.34833904888224918614E-2F,
 56:     4.27180926518931511717E-2F,
 57:     5.85936634471101055642E-2F,
 58:     9.37499997197644278445E-2F,
 59:     2.49999999999888314361E-1F
 60:   };
 61: #else
 62:   static const PetscReal P2[] = {
 63:     1.53552577301013293365E-4,
 64:     2.50888492163602060990E-3,
 65:     8.68786816565889628429E-3,
 66:     1.07350949056076193403E-2,
 67:     7.77395492516787092951E-3,
 68:     7.58395289413514708519E-3,
 69:     1.15688436810574127319E-2,
 70:     2.18317996015557253103E-2,
 71:     5.68051945617860553470E-2,
 72:     4.43147180560990850618E-1,
 73:     1.00000000000000000299E0
 74:   };
 75:   static const PetscReal Q2[] = {
 76:     3.27954898576485872656E-5,
 77:     1.00962792679356715133E-3,
 78:     6.50609489976927491433E-3,
 79:     1.68862163993311317300E-2,
 80:     2.61769742454493659583E-2,
 81:     3.34833904888224918614E-2,
 82:     4.27180926518931511717E-2,
 83:     5.85936634471101055642E-2,
 84:     9.37499997197644278445E-2,
 85:     2.49999999999888314361E-1
 86:   };
 87: #endif
 88:   x = 1 - x; /* where m = 1 - m1 */
 89:   *ret = polevl_10(x,P2) - PetscLogReal(x) * (x * polevl_9(x,Q2));
 90: }
 91: /*
 92:  *      Complete elliptic integral of the first kind
 93:  */
 94: PETSC_DEVICE_FUNC_DECL void ellipticK(PetscReal x,PetscReal *ret)
 95: {
 96: #if defined(PETSC_USE_REAL_SINGLE)
 97:   static const PetscReal P1[] =
 98:     {
 99:       1.37982864606273237150E-4F,
100:       2.28025724005875567385E-3F,
101:       7.97404013220415179367E-3F,
102:       9.85821379021226008714E-3F,
103:       6.87489687449949877925E-3F,
104:       6.18901033637687613229E-3F,
105:       8.79078273952743772254E-3F,
106:       1.49380448916805252718E-2F,
107:       3.08851465246711995998E-2F,
108:       9.65735902811690126535E-2F,
109:       1.38629436111989062502E0F
110:     };
111:   static const PetscReal Q1[] =
112:     {
113:       2.94078955048598507511E-5F,
114:       9.14184723865917226571E-4F,
115:       5.94058303753167793257E-3F,
116:       1.54850516649762399335E-2F,
117:       2.39089602715924892727E-2F,
118:       3.01204715227604046988E-2F,
119:       3.73774314173823228969E-2F,
120:       4.88280347570998239232E-2F,
121:       7.03124996963957469739E-2F,
122:       1.24999999999870820058E-1F,
123:       4.99999999999999999821E-1F
124:     };
125: #else
126:   static const PetscReal P1[] =
127:     {
128:       1.37982864606273237150E-4,
129:       2.28025724005875567385E-3,
130:       7.97404013220415179367E-3,
131:       9.85821379021226008714E-3,
132:       6.87489687449949877925E-3,
133:       6.18901033637687613229E-3,
134:       8.79078273952743772254E-3,
135:       1.49380448916805252718E-2,
136:       3.08851465246711995998E-2,
137:       9.65735902811690126535E-2,
138:       1.38629436111989062502E0
139:     };
140:   static const PetscReal Q1[] =
141:     {
142:       2.94078955048598507511E-5,
143:       9.14184723865917226571E-4,
144:       5.94058303753167793257E-3,
145:       1.54850516649762399335E-2,
146:       2.39089602715924892727E-2,
147:       3.01204715227604046988E-2,
148:       3.73774314173823228969E-2,
149:       4.88280347570998239232E-2,
150:       7.03124996963957469739E-2,
151:       1.24999999999870820058E-1,
152:       4.99999999999999999821E-1
153:     };
154: #endif
155:   x = 1 - x; /* where m = 1 - m1 */
156:   *ret = polevl_10(x,P1) - PetscLogReal(x) * polevl_10(x,Q1);
157: }
158: /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */
159: PETSC_DEVICE_FUNC_DECL void LandauTensor2D(const PetscReal x[], const PetscReal rp, const PetscReal zp, PetscReal Ud[][2], PetscReal Uk[][2], const PetscReal mask)
160: {
161:   PetscReal l,s,r=x[0],z=x[1],i1func,i2func,i3func,ks,es,pi4pow,sqrt_1s,r2,rp2,r2prp2,zmzp,zmzp2,tt;
162:   //PetscReal mask /* = !!(r!=rp || z!=zp) */;
163:   /* !!(zmzp2 > 1.e-12 || (r-rp) >  1.e-12 || (r-rp) < -1.e-12); */
164:   r2=PetscSqr(r);
165:   zmzp=z-zp;
166:   rp2=PetscSqr(rp);
167:   zmzp2=PetscSqr(zmzp);
168:   r2prp2=r2+rp2;
169:   l = r2 + rp2 + zmzp2;
170:   /* if      (zmzp2 >  PETSC_SMALL) mask = 1; */
171:   /* else if ((tt=(r-rp)) >  PETSC_SMALL) mask = 1; */
172:   /* else if  (tt         < -PETSC_SMALL) mask = 1; */
173:   /* else mask = 0; */
174:   s = mask*2*r*rp/l; /* mask for vectorization */
175:   tt = 1./(1+s);
176:   pi4pow = 4*PETSC_PI*LANDAU_INVSQRT(PetscSqr(l)*l);
177:   sqrt_1s = PetscSqrtReal(1.+s);
178:   /* sp.ellipe(2.*s/(1.+s)) */
179:   ellipticE(2*s*tt,&es); /* 44 flops * 2 + 75 = 163 flops including 2 logs, 1 sqrt, 1 pow, 21 mult */
180:   /* sp.ellipk(2.*s/(1.+s)) */
181:   ellipticK(2*s*tt,&ks); /* 44 flops + 75 in rest, 21 mult */
182:   /* mask is needed here just for single precision */
183:   i2func = 2./((1-s)*sqrt_1s) * es;
184:   i1func = 4./(PetscSqr(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * mask * (ks - (1.+s) * es);
185:   i3func = 2./((1-s)*(s)*sqrt_1s + PETSC_MACHINE_EPSILON) * (es - (1-s) * ks);
186:   Ud[0][0]=                   -pi4pow*(rp2*i1func+PetscSqr(zmzp)*i2func);
187:   Ud[0][1]=Ud[1][0]=Uk[0][1]=  pi4pow*(zmzp)*(r*i2func-rp*i3func);
188:   Uk[1][1]=Ud[1][1]=          -pi4pow*((r2prp2)*i2func-2*r*rp*i3func)*mask;
189:   Uk[0][0]=                   -pi4pow*(zmzp2*i3func+r*rp*i1func);
190:   Uk[1][0]=                    pi4pow*(zmzp)*(r*i3func-rp*i2func); /* 48 mults + 21 + 21 = 90 mults and divs */
191: }
192: #else
193: /* integration point functions */
194: /* Evaluates the tensor U=(I-(x-y)(x-y)/(x-y)^2)/|x-y| at point x,y */
195: /* if x==y we will return zero. This is not the correct result */
196: /* since the tensor diverges for x==y but when integrated */
197: /* the divergent part is antisymmetric and vanishes. This is not  */
198: /* trivial, but can be proven. */
199: PETSC_DEVICE_FUNC_DECL void LandauTensor3D(const PetscReal x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask)
200: {
201:   PetscReal dx[3],inorm3,inorm,inorm2,norm2,x2[] = {xp,yp,zp};
202:   PetscInt  d;
203:   for (d = 0, norm2 = PETSC_MACHINE_EPSILON; d < 3; ++d) {
204:     dx[d] = x2[d] - x1[d];
205:     norm2 += dx[d] * dx[d];
206:   }
207:   inorm2 = mask/norm2;
208:   inorm = PetscSqrtReal(inorm2);
209:   inorm3 = inorm2*inorm;
210:   for (d = 0; d < 3; ++d) U[d][d] = -(inorm - inorm3 * dx[d] * dx[d]);
211:   U[1][0] = U[0][1] = inorm3 * dx[0] * dx[1];
212:   U[1][2] = U[2][1] = inorm3 * dx[2] * dx[1];
213:   U[2][0] = U[0][2] = inorm3 * dx[0] * dx[2];
214: }
215: /* Relativistic form */
216: #define GAMMA3(_x,_c02) PetscSqrtReal(1.0 + ((_x[0]*_x[0]) + (_x[1]*_x[1]) + (_x[2]*_x[2]))/(_c02))
217: PETSC_DEVICE_FUNC_DECL void LandauTensor3DRelativistic(const PetscReal a_x1[], const PetscReal xp, const PetscReal yp, const PetscReal zp, PetscReal U[][3], PetscReal mask, PetscReal c0)
218: {
219:   const PetscReal x2[3] = {xp,yp,zp}, x1[3] = {a_x1[0],a_x1[1],a_x1[2]}, c02 = c0*c0, g1 = GAMMA3(x1,c02), g2 = GAMMA3(x2,c02);
220:   PetscReal       fact, u1u2, diff[3], udiff2,u12,u22,wsq,rsq, tt;
221:   PetscInt        i,j;

223:   if (mask==0.0) {
224:     for (i = 0; i < 3; ++i) {
225:       for (j = 0; j < 3; ++j) {
226:         U[i][j] = 0;
227:       }
228:     }
229:   } else {
230:     for (i = 0, u1u2 = u12 = u22 = udiff2 = 0; i < 3; ++i) {
231:       diff[i] = x1[i] - x2[i];
232:       udiff2 += diff[i] * diff[i];
233:       u12 += x1[i]*x1[i];
234:       u22 += x2[i]*x2[i];
235:       u1u2 += x1[i]*x2[i];
236:     }
237:     tt = 2.*u1u2*(1.-g1*g2) + (u12*u22 + u1u2*u1u2)/c02; // these two terms are about the same with opposite sign
238:     wsq = udiff2 + tt;
239:     //wsq = udiff2 + 2.*u1u2*(1.-g1*g2) + (u12*u22 + u1u2*u1u2)/c02;
240:     rsq = 1.+wsq/c02;
241:     fact = -rsq/(g1*g2*PetscSqrtReal(wsq)); /* flip sign. papers use du/dt = C, PETSc uses form G(u) = du/dt - C(u) = 0 */
242:     for (i = 0; i < 3; ++i) {
243:       for (j = 0; j < 3; ++j) {
244:         U[i][j] = fact * ( -diff[i]*diff[j]/wsq + (PetscSqrtReal(rsq)-1.)*(x1[i]*x2[j] + x1[j]*x2[i])/wsq);
245:       }
246:       U[i][i] += fact;
247:     }
248:   }
249: }

251: #endif