1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: This file is an adaptation of several subroutines from FILTLAN, the
12: Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
14: More information at:
15: https://www-users.cs.umn.edu/~saad/software/filtlan
17: References:
19: [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
20: eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
21: */
23: #include <slepc/private/stimpl.h> 24: #include "filter.h"
26: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
27: static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
28: static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
30: /* ////////////////////////////////////////////////////////////////////////////
31: // Newton - Hermite Polynomial Interpolation
32: //////////////////////////////////////////////////////////////////////////// */
34: /*
35: FILTLAN function NewtonPolynomial
37: build P(z) by Newton's divided differences in the form
38: P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
39: such that P(x(i)) = y(i) for i=1,...,n, where
40: x,y are input vectors of length n, and a is the output vector of length n
41: if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
42: and the Hermite polynomial interpolation is applied
43: in general, if there are k x(i)'s with the same value x0, then
44: the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
45: */
46: static inline PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf) 47: {
48: PetscReal d,*sx=x,*sy=y;
49: PetscInt j,k;
51: PetscArraycpy(sf,sy,n);
53: /* apply Newton's finite difference method */
54: sa[0] = sf[0];
55: for (j=1;j<n;j++) {
56: for (k=n-1;k>=j;k--) {
57: d = sx[k]-sx[k-j];
58: if (PetscUnlikely(d == 0.0)) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */
59: else sf[k] = (sf[k]-sf[k-1]) / d;
60: }
61: sa[j] = sf[j];
62: }
63: return 0;
64: }
66: /*
67: FILTLAN function HermiteBaseFilterInChebyshevBasis
69: compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
70: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
72: The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
73: P_j(z) a Hermite interpolating polynomial
75: input:
76: intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
77: HiLowFlags determines the shape of the base filter P(z)
78: Consider the j-th interval [intv(j), intv(j+1)]
79: HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)]
80: ==0, P(z)==0 for z in [intv(j), intv(j+1)]
81: ==-1, [intv(j), intv(j+1)] is a transition interval;
82: P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
83: baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
84: to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
85: end points z=intv(j) for i=1,...,baseDeg
87: output:
88: P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
89: to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
90: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
91: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
92: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
93: with T_i(z) the Chebyshev polynomial of the first kind,
94: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
95: the return matrix is the matrix of Chebyshev coefficients pp just described
97: note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
98: let n be the length of intv; then there are n-1 intervals
99: therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
100: */
101: static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)102: {
103: PetscInt m,ii,jj;
104: PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
105: const PetscInt *hilo = HighLowFlags;
107: m = 2*baseDeg+2;
108: jj = npoints-1; /* jj is initialized as the number of intervals */
109: PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work);
110: sbf = baseFilter;
112: while (jj--) { /* the main loop to compute the Chebyshev coefficients */
114: flag = (PetscReal)(*hilo++); /* get the flag of the current interval */
115: if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */
117: flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */
118: flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */
120: /* two pointers for traversing x[] and y[] */
121: sx = px;
122: sy = py;
124: /* find the current interval [aa,bb] */
125: aa = *currentPoint++;
126: bb = *currentPoint;
128: /* now left-hand side */
129: ii = baseDeg+1;
130: while (ii--) {
131: *sy++ = flag0;
132: *sx++ = aa;
133: }
135: /* now right-hand side */
136: ii = baseDeg+1;
137: while (ii--) {
138: *sy++ = flag2;
139: *sx++ = bb;
140: }
142: /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
143: FILTLAN_NewtonPolynomial(m,px,py,pp,work);
145: /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
146: P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
148: /* translate the Newton coefficients to the Chebyshev coefficients */
149: FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work);
150: /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
152: /* copy the Chebyshev coefficients to baseFilter
153: OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
154: sq = qq;
155: ii = 2*baseDeg+2;
156: while (ii--) *sbf++ = *sq++;
158: } else {
160: /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
161: OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
162: *sbf++ = flag;
164: /* the other coefficients are all zero, since P(z) is a constant
165: OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
166: ii = 2*baseDeg+1;
167: while (ii--) *sbf++ = 0.0;
169: /* for the next point */
170: currentPoint++;
171: }
172: }
173: PetscFree5(px,py,pp,qq,work);
174: return 0;
175: }
177: /* ////////////////////////////////////////////////////////////////////////////
178: // Base Filter
179: //////////////////////////////////////////////////////////////////////////// */
181: /*
182: FILTLAN function GetIntervals
184: this routine determines the intervals (including the transition one(s)) by an iterative process
186: frame is a vector consisting of 4 ordered elements:
187: [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
188: [frame(2),frame(3)] is the interval in which the eigenvalues are sought
189: baseDeg is the left-and-right degree of the base filter for each interval
190: polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
191: intv is the output; the j-th interval is [intv(j),intv(j+1))
192: opts is a collection of interval options
194: the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
195: at each end point of intervals
197: the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
198: minimized with s(z) a polynomial of degree up to polyDeg
200: the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
201: y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
203: the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
204: */
205: static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)206: {
207: PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
208: PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
209: PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
210: PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
211: PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
212: const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
213: const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */
214: const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
218: filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */
219: if (b == b1) {
221: filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */
224: /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
225: halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */
226: leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */
227: rightDelta = leftDelta; /* initial right shift */
228: opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
229: gridSize = (b-a) / (PetscReal)(opts->numGridPoints);
231: for (i=0;i<6;i++) intv[i] = 0.0;
232: if (filterInfo->filterType == 2) { /* for interior eigenvalues */
233: npoints = 6;
234: intv[0] = a;
235: intv[5] = b;
236: /* intv[1], intv[2], intv[3], and intv[4] to be determined */
237: } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
238: npoints = 4;
239: intv[0] = a;
240: intv[3] = b;
241: /* intv[1], and intv[2] to be determined */
242: }
243: z1 = a1 - leftDelta;
244: z2 = b1 + rightDelta;
245: filterInfo->filterOK = 0; /* not yet found any OK filter */
247: /* allocate matrices */
248: PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter);
250: /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
251: intervals[0] = intv[0];
252: intervals[3] = intv[3];
253: intervals[5] = intv[5];
254: intervals[1] = z1;
255: if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */
256: intervals[4] = z2;
257: c = (a1+b1) / 2.0;
258: intervals[2] = c - halfPlateau;
259: intervals[3] = c + halfPlateau;
260: } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
261: intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
262: }
264: /* the main loop */
265: for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
266: if (z1 <= a) { /* outer loop updates z1 and z2 */
267: z1 = a;
268: leftBoundaryMet = PETSC_TRUE;
269: }
270: if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */
271: if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */
272: z2 = b;
273: rightBoundaryMet = PETSC_TRUE;
274: }
275: /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
276: /* [z1, c-h] and [c+h, z2] are transition interval */
277: intv[1] = z1;
278: intv[4] = z2;
279: c1 = z1 + halfPlateau;
280: intv[2] = z1; /* i.e. c1 - halfPlateau */
281: intv[3] = c1 + halfPlateau;
282: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
283: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
284: /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
285: c2 = z2 - halfPlateau;
286: intv[2] = c2 - halfPlateau;
287: intv[3] = z2; /* i.e. c2 + halfPlateau */
288: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
289: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
290: fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
291: yLimitGap = PETSC_MAX_REAL;
292: ii = opts->maxInnerIter;
293: while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
294: /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
295: c = (c1+c2) / 2.0;
296: intv[2] = c - halfPlateau;
297: intv[3] = c + halfPlateau;
298: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
299: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
300: fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
301: if (fc*fc2 < 0.0) {
302: c1 = c;
303: /* fc1 = fc; */
304: } else {
305: c2 = c;
306: fc2 = fc;
307: }
308: yLimitGap = PetscAbsReal(fc);
309: }
310: } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
311: intv[1] = z1;
312: intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
313: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg);
314: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg);
315: }
316: /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
317: expanded in the `translated' Chebyshev basis */
318: /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
319: yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
320: yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
321: yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
322: ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
323: x = a1;
324: while ((x+=gridSize) < b1) {
325: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
326: if (y < yLimit) yLimit = y;
327: if (y > ySummit) ySummit = y;
328: }
329: /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
330: stepLeft = PETSC_FALSE;
331: stepRight = PETSC_FALSE;
332: if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
333: /* very bad, step to see what will happen */
334: stepLeft = PETSC_TRUE;
335: if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
336: } else if (filterInfo->filterType == 2) {
337: if (yLeftLimit < yRightLimit) {
338: if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
339: } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
340: }
341: if (!stepLeft) {
342: yLeftBottom = yLeftLimit;
343: x = a1;
344: while ((x-=gridSize) >= a) {
345: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
346: if (y < yLeftBottom) yLeftBottom = y;
347: else if (y > yLeftBottom) break;
348: }
349: yLeftSummit = yLeftBottom;
350: while ((x-=gridSize) >= a) {
351: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
352: if (y > yLeftSummit) {
353: yLeftSummit = y;
354: if (yLeftSummit > yLimit*opts->yRippleLimit) {
355: stepLeft = PETSC_TRUE;
356: break;
357: }
358: }
359: if (y < yLeftBottom) yLeftBottom = y;
360: }
361: }
362: if (filterInfo->filterType == 2 && !stepRight) {
363: yRightBottom = yRightLimit;
364: x = b1;
365: while ((x+=gridSize) <= b) {
366: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
367: if (y < yRightBottom) yRightBottom = y;
368: else if (y > yRightBottom) break;
369: }
370: yRightSummit = yRightBottom;
371: while ((x+=gridSize) <= b) {
372: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
373: if (y > yRightSummit) {
374: yRightSummit = y;
375: if (yRightSummit > yLimit*opts->yRippleLimit) {
376: stepRight = PETSC_TRUE;
377: break;
378: }
379: }
380: if (y < yRightBottom) yRightBottom = y;
381: }
382: }
383: if (!stepLeft && !stepRight) {
384: if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
385: else bottom = yLeftBottom;
386: qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
387: if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
388: /* found the first OK filter or a better filter */
389: for (i=0;i<6;i++) intervals[i] = intv[i];
390: filterInfo->filterOK = 1;
391: filterInfo->filterQualityIndex = qIndex;
392: filterInfo->numIter = numIter;
393: filterInfo->yLimit = yLimit;
394: filterInfo->ySummit = ySummit;
395: filterInfo->numLeftSteps = numLeftSteps;
396: filterInfo->yLeftSummit = yLeftSummit;
397: filterInfo->yLeftBottom = yLeftBottom;
398: if (filterInfo->filterType == 2) {
399: filterInfo->yLimitGap = yLimitGap;
400: filterInfo->numRightSteps = numRightSteps;
401: filterInfo->yRightSummit = yRightSummit;
402: filterInfo->yRightBottom = yRightBottom;
403: }
404: numMoreLooked = 0;
405: } else if (++numMoreLooked == numLookMore) {
406: /* filter has been optimal */
407: filterInfo->filterOK = 2;
408: break;
409: }
410: /* try stepping further to see whether it can improve */
411: stepLeft = PETSC_TRUE;
412: if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
413: }
414: /* check whether we can really "step" */
415: if (leftBoundaryMet) {
416: if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */
417: if (stepLeft) {
418: /* cannot step left, so try stepping right */
419: stepLeft = PETSC_FALSE;
420: stepRight = PETSC_TRUE;
421: }
422: }
423: if (rightBoundaryMet && stepRight) {
424: /* cannot step right, so try stepping left */
425: stepRight = PETSC_FALSE;
426: stepLeft = PETSC_TRUE;
427: }
428: /* now "step" */
429: if (stepLeft) {
430: numLeftSteps++;
431: if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
432: z1 -= leftDelta;
433: }
434: if (stepRight) {
435: numRightSteps++;
436: rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
437: z2 += rightDelta;
438: }
439: if (filterInfo->filterType == 2) {
440: /* shrink the "plateau" of the (dual) base filter */
441: if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
442: else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
443: }
444: }
447: filterInfo->totalNumIter = numIter;
448: PetscFree2(polyFilter,baseFilter);
449: return 0;
450: }
452: /* ////////////////////////////////////////////////////////////////////////////
453: // Chebyshev Polynomials
454: //////////////////////////////////////////////////////////////////////////// */
456: /*
457: FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
459: translate the coefficients of a Newton polynomial to the coefficients
460: in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
462: input:
463: a Newton polynomial defined by vectors a and x:
464: P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
465: the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
466: where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
467: note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
469: output:
470: a vector q containing the Chebyshev coefficients:
471: P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
472: */
473: static inline PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)474: {
475: PetscInt m,mm;
476: PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
478: sa = a+n; /* pointers for traversing a and x */
479: sx = x+n-1;
480: *q = *--sa; /* set q[0] = a(n) */
482: c = (aa+bb)/2.0;
483: h = (bb-aa)/2.0;
484: h2 = h/2.0;
486: for (m=1;m<=n-1;m++) { /* the main loop for translation */
488: /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
489: mm = m;
490: sq = q;
491: sq2 = q2;
492: c2 = c-(*--sx);
493: while (mm--) *(sq2++) = c2*(*sq++);
494: *sq2 = 0.0; /* q2[m] = 0.0 */
495: *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */
497: /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
498: mm = m-1;
499: sq2 = q2;
500: sq = q+1;
501: while (mm--) *(sq2++) += h2*(*sq++);
503: /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
504: mm = m-1;
505: sq2 = q2+2;
506: sq = q+1;
507: while (mm--) *(sq2++) += h2*(*sq++);
509: /* compute q[0:m] = q2[0:m] */
510: mm = m+1;
511: sq2 = q2;
512: sq = q;
513: while (mm--) *sq++ = *sq2++;
514: *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */
515: }
516: return 0;
517: }
519: /*
520: FILTLAN function PolynomialEvaluationInChebyshevBasis
522: evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
523: the `translated' (i.e. scale-and-shift) Chebyshev polynomials
525: input:
526: c is a vector of Chebyshev coefficients which defines the polynomial
527: P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
528: where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
529: c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
530: note that T_i(z) is the Chebyshev polynomial of the first kind,
531: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
533: output:
534: the evaluated value of P(z) at z=z0
535: */
536: static inline PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)537: {
538: PetscInt ii,deg1;
539: PetscReal y,zz,t0,t1,t2,*sc;
541: deg1 = m;
542: if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */
543: else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
545: /* compute y = P(z0), where we utilize the Chebyshev recursion */
546: sc = pp+(idx-1)*m; /* sc points to column idx of pp */
547: y = *sc++;
548: t1 = 1.0; t2 = zz;
549: ii = deg1-1;
550: while (ii--) {
551: /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
552: the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
553: t0 = 2*zz*t1 - t2;
554: /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
555: t2 = t1;
556: t1 = t0;
557: y += (*sc++)*t0;
558: }
559: return y;
560: }
562: #define basisTranslated PETSC_TRUE563: /*
564: FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
566: evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
567: in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
569: input:
570: intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
571: pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
572: in a basis of the `translated' Chebyshev polynomials in each interval
573: the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
574: if basisTranslated == false, then
575: P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
576: where T_i(z) is the Chebyshev polynomial of the first kind,
577: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
578: if basisTranslated == true, then
579: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
580: where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
581: c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
583: output:
584: the evaluated value of P(z) at z=z0
586: note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
587: if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0)
588: */
589: static inline PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)590: {
591: PetscReal *sintv,aa,bb,resul;
592: PetscInt idx;
594: /* determine the interval which contains z0 */
595: sintv = &intv[1];
596: idx = 1;
597: if (npoints>2 && z0 >= *sintv) {
598: sintv++;
599: while (++idx < npoints-1) {
600: if (z0 < *sintv) break;
601: sintv++;
602: }
603: }
604: /* idx==1 if npoints<=2; otherwise idx satisfies:
605: intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2
606: z0 < intv(idx+1), if idx == 1
607: intv(idx) <= z0, if idx == npoints-1
608: in addition, sintv points to &intv(idx+1) */
609: if (basisTranslated) {
610: /* the basis consists of `translated' Chebyshev polynomials */
611: /* find the interval of concern, [aa,bb] */
612: aa = *(sintv-1);
613: bb = *sintv;
614: resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
615: } else {
616: /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
617: resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
618: }
619: return resul;
620: }
622: /*
623: FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
625: compute the weighted inner product of two piecewise polynomials expanded
626: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
628: pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
629: for z in the j-th interval, P(z) equals
630: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
631: and Q(z) equals
632: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
633: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
634: S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2,
635: with T_i(z) the Chebyshev polynomial of the first kind
636: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
638: the (scaled) j-th interval inner product is defined by
639: <P_j,Q_j> = (Pi/2)*(pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j)),
640: which comes from the property
641: <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
643: the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
644: which is the return value
646: note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
647: */
648: static inline PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)649: {
650: PetscInt nintv,deg1,i,k;
651: PetscReal *sp,*sq,ans=0.0,ans2;
653: deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */
654: if (PetscUnlikely(!deg1)) PetscFunctionReturn(0.0);
655: nintv = PetscMin(pcols,qcols); /* number of intervals */
657: /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
658: compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
659: for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
660: sp = pp+i*ldp;
661: sq = qq+i*ldq;
662: ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */
663: for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */
664: ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */
665: }
666: PetscFunctionReturn(ans*PETSC_PI/2.0);
667: }
669: /*
670: FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
672: compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
673: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
675: P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
677: For z in the j-th interval, P(z) equals
678: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
679: and Q(z) equals
680: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
681: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
682: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
683: with T_i(z) the Chebyshev polynomial of the first kind
684: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
686: the returned matrix is qq which represents Q(z) = z*P(z)
687: */
688: static inline PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)689: {
690: PetscInt i,j;
691: PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
693: for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */
694: sp = pp+j*ldp;
695: sq = qq+j*ldq;
696: sp2 = sp;
697: sq2 = sq+1;
698: c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */
699: h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */
700: h2 = 0.5*h;
701: i = deg1;
702: while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */
703: *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */
704: *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */
705: i = deg1-1;
706: while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
707: tmp = h2*(*sp2++);
708: *(sq2-2) += tmp;
709: *(sq2++) += tmp;
710: }
711: }
712: return 0;
713: }
715: /* ////////////////////////////////////////////////////////////////////////////
716: // Conjugate Residual Method in the Polynomial Space
717: //////////////////////////////////////////////////////////////////////////// */
719: /*
720: B := alpha*A + beta*B
722: A,B are nxk
723: */
724: static inline PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)725: {
726: PetscInt i,j;
728: if (beta==(PetscReal)1.0) {
729: for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
730: PetscLogFlops(2.0*n*k);
731: } else {
732: for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
733: PetscLogFlops(3.0*n*k);
734: }
735: return 0;
736: }
738: /*
739: FILTLAN function FilteredConjugateResidualPolynomial
741: ** Conjugate Residual Method in the Polynomial Space
743: this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
744: where P(z), the base filter, is the input piecewise polynomial, and
745: Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
746: niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
747: both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
748: and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
750: input:
751: intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
752: w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
753: the interval weights define the inner product of two continuous functions and then
754: the derived w-norm ||P(z)-Q(z)||_w
755: pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
756: to be specific, for z in [intv(j), intv(j+1)), P(z) equals
757: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
758: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
759: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
760: with T_i(z) the Chebyshev polynomial of the first kind,
761: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
763: output:
764: the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
765: and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
766: this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
767: to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
768: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
770: note:
771: 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
772: for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
773: 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
774: in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
775: */
776: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)777: {
778: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
779: PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
781: nintv = m-1;
782: ld = niter+2; /* leading dimension */
783: PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol);
784: PetscArrayzero(cpol,ld*nintv);
785: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
786: sppol = 2;
787: srpol = 2;
788: scpol = 2;
789: for (j=0;j<nintv;j++) {
790: ppol[j*ld] = 1.0;
791: rpol[j*ld] = 1.0;
792: cpol[j*ld] = 1.0;
793: }
794: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
795: rpol is the r-polynomial (corresponding to the residual vector r in CG)
796: cpol is the "corrected" residual polynomial (result of this function) */
797: sappol = 3;
798: sarpol = 3;
799: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
800: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
801: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
802: for (i=0;i<niter;i++) {
803: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
804: alp0 = rho/den;
805: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
806: alp = (rho-rho1)/den;
807: srpol++;
808: scpol++;
809: Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
810: Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
811: if (i+1 == niter) break;
812: sarpol++;
813: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
814: rho0 = rho;
815: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
816: bet = rho / rho0;
817: sppol++;
818: sappol++;
819: Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
820: Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
821: }
822: PetscFree4(ppol,rpol,appol,arpol);
823: return 0;
824: }
826: /*
827: FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
829: this routine employs a conjugate-residual-type algorithm in polynomial space to compute
830: x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
831: P(z) is a given piecewise polynomial, called the base filter,
832: s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
833: and b and x0 are given vectors
835: note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
836: and presented as a matrix of Chebyshev coefficients pp
838: input:
839: A is a sparse matrix
840: x0, b are vectors
841: niter is the number of conjugate-residual iterations
842: intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
843: w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
844: the interval weights define the inner product of two continuous functions and then
845: the derived w-norm ||P(z)-Q(z)||_w
846: pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
847: to be specific, for z in [intv(j), intv(j+1)), P(z) equals
848: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
849: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
850: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
851: with T_i(z) the Chebyshev polynomial of the first kind,
852: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
853: tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
854: than tol, then stop the conjugate-residual iteration
856: output:
857: the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
858: subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
859: in short, z*s(z) approximates 1-P(z)
861: note:
862: 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
863: for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
864: 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
865: in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
866: 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
867: in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
868: z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
869: */
870: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)871: {
872: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
873: PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
874: Vec r=work[0],p=work[1],ap=work[2],w=work[3];
875: PetscScalar alpha;
877: ld = niter+3; /* leading dimension */
878: PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol);
879: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
880: sppol = 2;
881: srpol = 2;
882: scpol = 2;
883: for (j=0;j<nintv;j++) {
884: ppol[j*ld] = 1.0;
885: rpol[j*ld] = 1.0;
886: cpol[j*ld] = 1.0;
887: }
888: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
889: rpol is the r-polynomial (corresponding to the residual vector r in CG)
890: cpol is the "corrected" residual polynomial */
891: sappol = 3;
892: sarpol = 3;
893: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
894: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
895: rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
896: rho = rho00;
898: /* corrected CR in vector space */
899: /* we assume x0 is always 0 */
900: VecSet(x,0.0);
901: VecCopy(b,r); /* initial residual r = b-A*x0 */
902: VecCopy(r,p); /* p = r */
903: MatMult(A,p,ap); /* ap = A*p */
905: for (i=0;i<niter;i++) {
906: /* iteration in the polynomial space */
907: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
908: alp0 = rho/den;
909: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
910: alp = (rho-rho1)/den;
911: srpol++;
912: scpol++;
913: Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
914: Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
915: sarpol++;
916: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
917: rho0 = rho;
918: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
920: /* update x in the vector space */
921: alpha = alp;
922: VecAXPY(x,alpha,p); /* x += alp*p */
923: if (rho < tol*rho00) break;
925: /* finish the iteration in the polynomial space */
926: bet = rho / rho0;
927: sppol++;
928: sappol++;
929: Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
930: Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
932: /* finish the iteration in the vector space */
933: alpha = -alp0;
934: VecAXPY(r,alpha,ap); /* r -= alp0*ap */
935: alpha = bet;
936: VecAYPX(p,alpha,r); /* p = r + bet*p */
937: MatMult(A,r,w); /* ap = A*r + bet*ap */
938: VecAYPX(ap,alpha,w);
939: }
940: PetscFree5(ppol,rpol,cpol,appol,arpol);
941: return 0;
942: }
944: /*
945: Gateway to FILTLAN for evaluating y=p(A)*x
946: */
947: PetscErrorCode MatMult_FILTLAN(Mat A,Vec x,Vec y)948: {
949: ST st;
950: ST_FILTER *ctx;
951: PetscInt npoints;
953: MatShellGetContext(A,&st);
954: ctx = (ST_FILTER*)st->data;
955: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
956: FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(ctx->T,x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work);
957: VecCopy(y,st->work[0]);
958: MatMult(ctx->T,st->work[0],y);
959: return 0;
960: }
962: /* Block version of FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct */
963: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(Mat A,Mat B,Mat C,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work,Mat R,Mat P,Mat AP,Mat W)964: {
965: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
966: PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
967: PetscScalar alpha;
969: ld = niter+3; /* leading dimension */
970: PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol);
971: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
972: sppol = 2;
973: srpol = 2;
974: scpol = 2;
975: for (j=0;j<nintv;j++) {
976: ppol[j*ld] = 1.0;
977: rpol[j*ld] = 1.0;
978: cpol[j*ld] = 1.0;
979: }
980: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
981: rpol is the r-polynomial (corresponding to the residual vector r in CG)
982: cpol is the "corrected" residual polynomial */
983: sappol = 3;
984: sarpol = 3;
985: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
986: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
987: rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
988: rho = rho00;
990: /* corrected CR in vector space */
991: /* we assume x0 is always 0 */
992: MatZeroEntries(C);
993: MatCopy(B,R,SAME_NONZERO_PATTERN); /* initial residual r = b-A*x0 */
994: MatCopy(R,P,SAME_NONZERO_PATTERN); /* p = r */
995: MatMatMult(A,P,MAT_REUSE_MATRIX,PETSC_DEFAULT,&AP); /* ap = A*p */
997: for (i=0;i<niter;i++) {
998: /* iteration in the polynomial space */
999: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
1000: alp0 = rho/den;
1001: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
1002: alp = (rho-rho1)/den;
1003: srpol++;
1004: scpol++;
1005: Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
1006: Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
1007: sarpol++;
1008: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
1009: rho0 = rho;
1010: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
1012: /* update x in the vector space */
1013: alpha = alp;
1014: MatAXPY(C,alpha,P,SAME_NONZERO_PATTERN); /* x += alp*p */
1015: if (rho < tol*rho00) break;
1017: /* finish the iteration in the polynomial space */
1018: bet = rho / rho0;
1019: sppol++;
1020: sappol++;
1021: Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
1022: Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
1024: /* finish the iteration in the vector space */
1025: alpha = -alp0;
1026: MatAXPY(R,alpha,AP,SAME_NONZERO_PATTERN); /* r -= alp0*ap */
1027: alpha = bet;
1028: MatAYPX(P,alpha,R,SAME_NONZERO_PATTERN); /* p = r + bet*p */
1029: MatMatMult(A,R,MAT_REUSE_MATRIX,PETSC_DEFAULT,&W); /* ap = A*r + bet*ap */
1030: MatAYPX(AP,alpha,W,SAME_NONZERO_PATTERN);
1031: }
1032: PetscFree5(ppol,rpol,cpol,appol,arpol);
1033: return 0;
1034: }
1036: /*
1037: Gateway to FILTLAN for evaluating C=p(A)*B
1038: */
1039: PetscErrorCode MatMatMult_FILTLAN(Mat A,Mat B,Mat C,void *pctx)1040: {
1041: ST st;
1042: ST_FILTER *ctx;
1043: PetscInt i,m1,m2,npoints;
1045: MatShellGetContext(A,&st);
1046: ctx = (ST_FILTER*)st->data;
1047: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1048: if (ctx->nW) { /* check if work matrices must be resized */
1049: MatGetSize(B,NULL,&m1);
1050: MatGetSize(ctx->W[0],NULL,&m2);
1051: if (m1!=m2) {
1052: MatDestroyMatrices(ctx->nW,&ctx->W);
1053: ctx->nW = 0;
1054: }
1055: }
1056: if (!ctx->nW) { /* allocate work matrices */
1057: ctx->nW = 4;
1058: PetscMalloc1(ctx->nW,&ctx->W);
1059: for (i=0;i<ctx->nW;i++) MatDuplicate(B,MAT_DO_NOT_COPY_VALUES,&ctx->W[i]);
1060: }
1061: FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProductBlock(ctx->T,B,ctx->W[0],ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work,C,ctx->W[1],ctx->W[2],ctx->W[3]);
1062: MatMatMult(ctx->T,ctx->W[0],MAT_REUSE_MATRIX,PETSC_DEFAULT,&C);
1063: return 0;
1064: }
1066: /*
1067: FILTLAN function PolynomialFilterInterface::setFilter
1069: Creates the shifted (and scaled) matrix and the base filter P(z).
1070: M is a shell matrix whose MatMult() applies the filter.
1071: */
1072: PetscErrorCode STFilter_FILTLAN_setFilter(ST st,Mat *G)1073: {
1074: ST_FILTER *ctx = (ST_FILTER*)st->data;
1075: PetscInt i,npoints,n,m,N,M;
1076: PetscReal frame2[4];
1077: PetscScalar alpha;
1078: const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
1080: if (ctx->frame[0] == ctx->frame[1]) { /* low pass filter, convert it to high pass filter */
1081: /* T = frame[3]*eye(n) - A */
1082: MatDestroy(&ctx->T);
1083: MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T);
1084: MatScale(ctx->T,-1.0);
1085: alpha = ctx->frame[3];
1086: MatShift(ctx->T,alpha);
1087: for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
1088: } else { /* it can be a mid-pass filter or a high-pass filter */
1089: if (ctx->frame[0] == 0.0) {
1090: PetscObjectReference((PetscObject)st->A[0]);
1091: MatDestroy(&ctx->T);
1092: ctx->T = st->A[0];
1093: for (i=0;i<4;i++) frame2[i] = ctx->frame[i];
1094: } else {
1095: /* T = A - frame[0]*eye(n) */
1096: MatDestroy(&ctx->T);
1097: MatDuplicate(st->A[0],MAT_COPY_VALUES,&ctx->T);
1098: alpha = -ctx->frame[0];
1099: MatShift(ctx->T,alpha);
1100: for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1101: }
1102: }
1104: /* no need to recompute filter if the parameters did not change */
1105: if (st->state==ST_STATE_INITIAL || ctx->filtch) {
1106: FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1107: /* translate the intervals back */
1108: if (ctx->frame[0] == ctx->frame[1]) { /* low pass filter, convert it to high pass filter */
1109: for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1110: } else { /* it can be a mid-pass filter or a high-pass filter */
1111: if (ctx->frame[0] == 0.0) {
1112: for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1113: } else {
1114: for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1115: }
1116: }
1118: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1119: PetscFree(ctx->baseFilter);
1120: PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter);
1121: FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree);
1122: PetscInfo(st,"Computed value of yLimit = %g\n",(double)ctx->filterInfo->yLimit);
1123: }
1124: ctx->filtch = PETSC_FALSE;
1126: /* create shell matrix*/
1127: if (!*G) {
1128: MatGetSize(ctx->T,&N,&M);
1129: MatGetLocalSize(ctx->T,&n,&m);
1130: MatCreateShell(PetscObjectComm((PetscObject)st),n,m,N,M,st,G);
1131: MatShellSetOperation(*G,MATOP_MULT,(void(*)(void))MatMult_FILTLAN);
1132: MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSE,MATDENSE);
1133: MatShellSetMatProductOperation(*G,MATPRODUCT_AB,NULL,MatMatMult_FILTLAN,NULL,MATDENSECUDA,MATDENSECUDA);
1134: }
1135: return 0;
1136: }