Actual source code: baijsolvnat3.c
1: #include <../src/mat/impls/baij/seq/baij.h>
2: #include <petsc/private/kernels/blockinvert.h>
4: /*
5: Special case where the matrix was ILU(0) factored in the natural
6: ordering. This eliminates the need for the column and row permutation.
7: */
8: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering_inplace(Mat A,Vec bb,Vec xx)
9: {
10: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
11: const PetscInt n =a->mbs,*ai=a->i,*aj=a->j;
12: const PetscInt *diag = a->diag,*vi;
13: const MatScalar *aa =a->a,*v;
14: PetscScalar *x,s1,s2,s3,x1,x2,x3;
15: const PetscScalar *b;
16: PetscInt jdx,idt,idx,nz,i;
18: VecGetArrayRead(bb,&b);
19: VecGetArray(xx,&x);
21: /* forward solve the lower triangular */
22: idx = 0;
23: x[0] = b[0]; x[1] = b[1]; x[2] = b[2];
24: for (i=1; i<n; i++) {
25: v = aa + 9*ai[i];
26: vi = aj + ai[i];
27: nz = diag[i] - ai[i];
28: idx += 3;
29: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];
30: while (nz--) {
31: jdx = 3*(*vi++);
32: x1 = x[jdx];x2 = x[1+jdx];x3 = x[2+jdx];
33: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
34: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
35: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
36: v += 9;
37: }
38: x[idx] = s1;
39: x[1+idx] = s2;
40: x[2+idx] = s3;
41: }
42: /* backward solve the upper triangular */
43: for (i=n-1; i>=0; i--) {
44: v = aa + 9*diag[i] + 9;
45: vi = aj + diag[i] + 1;
46: nz = ai[i+1] - diag[i] - 1;
47: idt = 3*i;
48: s1 = x[idt]; s2 = x[1+idt];
49: s3 = x[2+idt];
50: while (nz--) {
51: idx = 3*(*vi++);
52: x1 = x[idx]; x2 = x[1+idx];x3 = x[2+idx];
53: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
54: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
55: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
56: v += 9;
57: }
58: v = aa + 9*diag[i];
59: x[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3;
60: x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
61: x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
62: }
64: VecRestoreArrayRead(bb,&b);
65: VecRestoreArray(xx,&x);
66: PetscLogFlops(2.0*9*(a->nz) - 3.0*A->cmap->n);
67: return 0;
68: }
70: PetscErrorCode MatSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
71: {
72: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
73: const PetscInt n =a->mbs,*vi,*ai=a->i,*aj=a->j,*adiag=a->diag;
74: PetscInt i,k,nz,idx,jdx,idt;
75: const PetscInt bs = A->rmap->bs,bs2 = a->bs2;
76: const MatScalar *aa=a->a,*v;
77: PetscScalar *x;
78: const PetscScalar *b;
79: PetscScalar s1,s2,s3,x1,x2,x3;
81: VecGetArrayRead(bb,&b);
82: VecGetArray(xx,&x);
83: /* forward solve the lower triangular */
84: idx = 0;
85: x[0] = b[idx]; x[1] = b[1+idx];x[2] = b[2+idx];
86: for (i=1; i<n; i++) {
87: v = aa + bs2*ai[i];
88: vi = aj + ai[i];
89: nz = ai[i+1] - ai[i];
90: idx = bs*i;
91: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];
92: for (k=0; k<nz; k++) {
93: jdx = bs*vi[k];
94: x1 = x[jdx];x2 = x[1+jdx]; x3 =x[2+jdx];
95: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
96: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
97: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
99: v += bs2;
100: }
102: x[idx] = s1;
103: x[1+idx] = s2;
104: x[2+idx] = s3;
105: }
107: /* backward solve the upper triangular */
108: for (i=n-1; i>=0; i--) {
109: v = aa + bs2*(adiag[i+1]+1);
110: vi = aj + adiag[i+1]+1;
111: nz = adiag[i] - adiag[i+1]-1;
112: idt = bs*i;
113: s1 = x[idt]; s2 = x[1+idt];s3 = x[2+idt];
115: for (k=0; k<nz; k++) {
116: idx = bs*vi[k];
117: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
118: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
119: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
120: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
122: v += bs2;
123: }
124: /* x = inv_diagonal*x */
125: x[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3;
126: x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
127: x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
129: }
131: VecRestoreArrayRead(bb,&b);
132: VecRestoreArray(xx,&x);
133: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
134: return 0;
135: }
137: PetscErrorCode MatForwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
138: {
139: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
140: const PetscInt n =a->mbs,*vi,*ai=a->i,*aj=a->j;
141: PetscInt i,k,nz,idx,jdx;
142: const PetscInt bs = A->rmap->bs,bs2 = a->bs2;
143: const MatScalar *aa=a->a,*v;
144: PetscScalar *x;
145: const PetscScalar *b;
146: PetscScalar s1,s2,s3,x1,x2,x3;
148: VecGetArrayRead(bb,&b);
149: VecGetArray(xx,&x);
150: /* forward solve the lower triangular */
151: idx = 0;
152: x[0] = b[idx]; x[1] = b[1+idx];x[2] = b[2+idx];
153: for (i=1; i<n; i++) {
154: v = aa + bs2*ai[i];
155: vi = aj + ai[i];
156: nz = ai[i+1] - ai[i];
157: idx = bs*i;
158: s1 = b[idx];s2 = b[1+idx];s3 = b[2+idx];
159: for (k=0; k<nz; k++) {
160: jdx = bs*vi[k];
161: x1 = x[jdx];x2 = x[1+jdx]; x3 =x[2+jdx];
162: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
163: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
164: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
166: v += bs2;
167: }
169: x[idx] = s1;
170: x[1+idx] = s2;
171: x[2+idx] = s3;
172: }
174: VecRestoreArrayRead(bb,&b);
175: VecRestoreArray(xx,&x);
176: PetscLogFlops(1.0*bs2*(a->nz) - bs*A->cmap->n);
177: return 0;
178: }
180: PetscErrorCode MatBackwardSolve_SeqBAIJ_3_NaturalOrdering(Mat A,Vec bb,Vec xx)
181: {
182: Mat_SeqBAIJ *a = (Mat_SeqBAIJ*)A->data;
183: const PetscInt n =a->mbs,*vi,*aj=a->j,*adiag=a->diag;
184: PetscInt i,k,nz,idx,idt;
185: const PetscInt bs = A->rmap->bs,bs2 = a->bs2;
186: const MatScalar *aa=a->a,*v;
187: PetscScalar *x;
188: const PetscScalar *b;
189: PetscScalar s1,s2,s3,x1,x2,x3;
191: VecGetArrayRead(bb,&b);
192: VecGetArray(xx,&x);
194: /* backward solve the upper triangular */
195: for (i=n-1; i>=0; i--) {
196: v = aa + bs2*(adiag[i+1]+1);
197: vi = aj + adiag[i+1]+1;
198: nz = adiag[i] - adiag[i+1]-1;
199: idt = bs*i;
200: s1 = b[idt]; s2 = b[1+idt];s3 = b[2+idt];
202: for (k=0; k<nz; k++) {
203: idx = bs*vi[k];
204: x1 = x[idx]; x2 = x[1+idx]; x3 = x[2+idx];
205: s1 -= v[0]*x1 + v[3]*x2 + v[6]*x3;
206: s2 -= v[1]*x1 + v[4]*x2 + v[7]*x3;
207: s3 -= v[2]*x1 + v[5]*x2 + v[8]*x3;
209: v += bs2;
210: }
211: /* x = inv_diagonal*x */
212: x[idt] = v[0]*s1 + v[3]*s2 + v[6]*s3;
213: x[1+idt] = v[1]*s1 + v[4]*s2 + v[7]*s3;
214: x[2+idt] = v[2]*s1 + v[5]*s2 + v[8]*s3;
216: }
218: VecRestoreArrayRead(bb,&b);
219: VecRestoreArray(xx,&x);
220: PetscLogFlops(2.0*bs2*(a->nz) - bs*A->cmap->n);
221: return 0;
222: }