1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: This file is an adaptation of several subroutines from FILTLAN, the
12: Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.
14: More information at:
15: http://www-users.cs.umn.edu/~saad/software/filtlan
17: References:
19: [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
20: eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
21: */
23: #include <slepc/private/stimpl.h>
24: #include "./filter.h"
26: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
27: static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
28: static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);
30: /* ////////////////////////////////////////////////////////////////////////////
31: // Newton - Hermite Polynomial Interpolation
32: //////////////////////////////////////////////////////////////////////////// */
34: /*
35: FILTLAN function NewtonPolynomial
37: build P(z) by Newton's divided differences in the form
38: P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
39: such that P(x(i)) = y(i) for i=1,...,n, where
40: x,y are input vectors of length n, and a is the output vector of length n
41: if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
42: and the Hermite polynomial interpolation is applied
43: in general, if there are k x(i)'s with the same value x0, then
44: the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
45: */
46: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf) 47: {
49: PetscReal d,*sx=x,*sy=y;
50: PetscInt j,k;
53: PetscMemcpy(sf,sy,n*sizeof(PetscReal));
55: /* apply Newton's finite difference method */
56: sa[0] = sf[0];
57: for (j=1;j<n;j++) {
58: for (k=n-1;k>=j;k--) {
59: d = sx[k]-sx[k-j];
60: if (d == 0.0) sf[k] = 0.0; /* assume that the derivative is 0.0 and apply the Hermite interpolation */
61: else sf[k] = (sf[k]-sf[k-1]) / d;
62: }
63: sa[j] = sf[j];
64: }
65: return(0);
66: }
68: /*
69: FILTLAN function HermiteBaseFilterInChebyshevBasis
71: compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
72: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval
74: The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
75: P_j(z) a Hermite interpolating polynomial
77: input:
78: intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
79: HiLowFlags determines the shape of the base filter P(z)
80: Consider the j-th interval [intv(j), intv(j+1)]
81: HighLowFlag[j-1]==1, P(z)==1 for z in [intv(j), intv(j+1)]
82: ==0, P(z)==0 for z in [intv(j), intv(j+1)]
83: ==-1, [intv(j), intv(j+1)] is a transition interval;
84: P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
85: baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
86: to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
87: end points z=intv(j) for i=1,...,baseDeg
89: output:
90: P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
91: to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
92: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
93: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
94: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
95: with T_i(z) the Chebyshev polynomial of the first kind,
96: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
97: the return matrix is the matrix of Chebyshev coefficients pp just described
99: note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
100: let n be the length of intv; then there are n-1 intervals
101: therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
102: */
103: static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)104: {
106: PetscInt m,ii,jj;
107: PetscReal flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
108: const PetscInt *hilo = HighLowFlags;
111: m = 2*baseDeg+2;
112: jj = npoints-1; /* jj is initialized as the number of intervals */
113: PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work);
114: sbf = baseFilter;
116: while (jj--) { /* the main loop to compute the Chebyshev coefficients */
118: flag = (PetscReal)(*hilo++); /* get the flag of the current interval */
119: if (flag == -1.0) { /* flag == -1 means that the current interval is a transition polynomial */
121: flag2 = (PetscReal)(*hilo); /* get flag2, the flag of the next interval */
122: flag0 = 1.0-flag2; /* the flag of the previous interval is 1-flag2 */
124: /* two pointers for traversing x[] and y[] */
125: sx = px;
126: sy = py;
128: /* find the current interval [aa,bb] */
129: aa = *currentPoint++;
130: bb = *currentPoint;
132: /* now left-hand side */
133: ii = baseDeg+1;
134: while (ii--) {
135: *sy++ = flag0;
136: *sx++ = aa;
137: }
139: /* now right-hand side */
140: ii = baseDeg+1;
141: while (ii--) {
142: *sy++ = flag2;
143: *sx++ = bb;
144: }
146: /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
147: FILTLAN_NewtonPolynomial(m,px,py,pp,work);
149: /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
150: P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */
152: /* translate the Newton coefficients to the Chebyshev coefficients */
153: FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work);
154: /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */
156: /* copy the Chebyshev coefficients to baseFilter
157: OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
158: sq = qq;
159: ii = 2*baseDeg+2;
160: while (ii--) *sbf++ = *sq++;
162: } else {
164: /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
165: OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
166: *sbf++ = flag;
168: /* the other coefficients are all zero, since P(z) is a constant
169: OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
170: ii = 2*baseDeg+1;
171: while (ii--) *sbf++ = 0.0;
173: /* for the next point */
174: currentPoint++;
175: }
176: }
177: PetscFree5(px,py,pp,qq,work);
178: return(0);
179: }
181: /* ////////////////////////////////////////////////////////////////////////////
182: // Base Filter
183: //////////////////////////////////////////////////////////////////////////// */
185: /*
186: FILTLAN function GetIntervals
188: this routine determines the intervals (including the transition one(s)) by an iterative process
190: frame is a vector consisting of 4 ordered elements:
191: [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
192: [frame(2),frame(3)] is the interval in which the eigenvalues are sought
193: baseDeg is the left-and-right degree of the base filter for each interval
194: polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
195: intv is the output; the j-th interval is [intv(j),intv(j+1))
196: opts is a collection of interval options
198: the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
199: at each end point of intervals
201: the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
202: minimized with s(z) a polynomial of degree up to polyDeg
204: the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
205: y in [frame[0],frame[3]] but not in [frame[1],frame[2]]
207: the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
208: */
209: static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)210: {
211: PetscErrorCode ierr;
212: PetscReal intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
213: PetscReal yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
214: PetscReal yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
215: PetscInt i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
216: PetscBool leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
217: const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
218: const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 }; /* if filterType is 1, only first 3 elements will be used */
219: const PetscInt numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));
222: if (a>a1 || a1>b1 || b1>b) SETERRQ(PETSC_COMM_SELF,1,"Values in the frame vector should be non-decreasing");
223: if (a1 == b1) SETERRQ(PETSC_COMM_SELF,1,"The range of wanted eigenvalues cannot be of size zero");
224: filterInfo->filterType = 2; /* mid-pass filter, for interior eigenvalues */
225: if (b == b1) {
226: if (a == a1) SETERRQ(PETSC_COMM_SELF,1,"A polynomial filter should not cover all eigenvalues");
227: filterInfo->filterType = 1; /* high-pass filter, for largest eigenvalues */
228: } else if (a == a1) SETERRQ(PETSC_COMM_SELF,1,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");
230: /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
231: halfPlateau = 0.5*(b1-a1)*opts->initialPlateau; /* half length of the "plateau" of the (dual) base filter */
232: leftDelta = (b1-a1)*opts->initialShiftStep; /* initial left shift */
233: rightDelta = leftDelta; /* initial right shift */
234: opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
235: gridSize = (b-a) / (PetscReal)(opts->numGridPoints);
237: for (i=0;i<6;i++) intv[i] = 0.0;
238: if (filterInfo->filterType == 2) { /* for interior eigenvalues */
239: npoints = 6;
240: intv[0] = a;
241: intv[5] = b;
242: /* intv[1], intv[2], intv[3], and intv[4] to be determined */
243: } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
244: npoints = 4;
245: intv[0] = a;
246: intv[3] = b;
247: /* intv[1], and intv[2] to be determined */
248: }
249: z1 = a1 - leftDelta;
250: z2 = b1 + rightDelta;
251: filterInfo->filterOK = 0; /* not yet found any OK filter */
253: /* allocate matrices */
254: PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter);
256: /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
257: intervals[0] = intv[0];
258: intervals[3] = intv[3];
259: intervals[5] = intv[5];
260: intervals[1] = z1;
261: if (filterInfo->filterType == 2) { /* a mid-pass filter for interior eigenvalues */
262: intervals[4] = z2;
263: c = (a1+b1) / 2.0;
264: intervals[2] = c - halfPlateau;
265: intervals[3] = c + halfPlateau;
266: } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
267: intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
268: }
270: /* the main loop */
271: for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
272: if (z1 <= a) { /* outer loop updates z1 and z2 */
273: z1 = a;
274: leftBoundaryMet = PETSC_TRUE;
275: }
276: if (filterInfo->filterType == 2) { /* a <= z1 < (a1) */
277: if (z2 >= b) { /* a mid-pass filter for interior eigenvalues */
278: z2 = b;
279: rightBoundaryMet = PETSC_TRUE;
280: }
281: /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
282: /* [z1, c-h] and [c+h, z2] are transition interval */
283: intv[1] = z1;
284: intv[4] = z2;
285: c1 = z1 + halfPlateau;
286: intv[2] = z1; /* i.e. c1 - halfPlateau */
287: intv[3] = c1 + halfPlateau;
288: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
289: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
290: /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
291: c2 = z2 - halfPlateau;
292: intv[2] = c2 - halfPlateau;
293: intv[3] = z2; /* i.e. c2 + halfPlateau */
294: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
295: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
296: fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
297: yLimitGap = PETSC_MAX_REAL;
298: ii = opts->maxInnerIter;
299: while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
300: /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
301: c = (c1+c2) / 2.0;
302: intv[2] = c - halfPlateau;
303: intv[3] = c + halfPlateau;
304: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
305: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
306: fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
307: if (fc*fc2 < 0.0) {
308: c1 = c;
309: /* fc1 = fc; */
310: } else {
311: c2 = c;
312: fc2 = fc;
313: }
314: yLimitGap = PetscAbsReal(fc);
315: }
316: } else { /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
317: intv[1] = z1;
318: intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
319: FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg);
320: FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg);
321: }
322: /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
323: expanded in the `translated' Chebyshev basis */
324: /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
325: yLeftLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
326: yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
327: yLimit = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
328: ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
329: x = a1;
330: while ((x+=gridSize) < b1) {
331: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
332: if (y < yLimit) yLimit = y;
333: if (y > ySummit) ySummit = y;
334: }
335: /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
336: stepLeft = PETSC_FALSE;
337: stepRight = PETSC_FALSE;
338: if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
339: /* very bad, step to see what will happen */
340: stepLeft = PETSC_TRUE;
341: if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
342: } else if (filterInfo->filterType == 2) {
343: if (yLeftLimit < yRightLimit) {
344: if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
345: } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
346: }
347: if (!stepLeft) {
348: yLeftBottom = yLeftLimit;
349: x = a1;
350: while ((x-=gridSize) >= a) {
351: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
352: if (y < yLeftBottom) yLeftBottom = y;
353: else if (y > yLeftBottom) break;
354: }
355: yLeftSummit = yLeftBottom;
356: while ((x-=gridSize) >= a) {
357: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
358: if (y > yLeftSummit) {
359: yLeftSummit = y;
360: if (yLeftSummit > yLimit*opts->yRippleLimit) {
361: stepLeft = PETSC_TRUE;
362: break;
363: }
364: }
365: if (y < yLeftBottom) yLeftBottom = y;
366: }
367: }
368: if (filterInfo->filterType == 2 && !stepRight) {
369: yRightBottom = yRightLimit;
370: x = b1;
371: while ((x+=gridSize) <= b) {
372: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
373: if (y < yRightBottom) yRightBottom = y;
374: else if (y > yRightBottom) break;
375: }
376: yRightSummit = yRightBottom;
377: while ((x+=gridSize) <= b) {
378: y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
379: if (y > yRightSummit) {
380: yRightSummit = y;
381: if (yRightSummit > yLimit*opts->yRippleLimit) {
382: stepRight = PETSC_TRUE;
383: break;
384: }
385: }
386: if (y < yRightBottom) yRightBottom = y;
387: }
388: }
389: if (!stepLeft && !stepRight) {
390: if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
391: else bottom = yLeftBottom;
392: qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
393: if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
394: /* found the first OK filter or a better filter */
395: for (i=0;i<6;i++) intervals[i] = intv[i];
396: filterInfo->filterOK = 1;
397: filterInfo->filterQualityIndex = qIndex;
398: filterInfo->numIter = numIter;
399: filterInfo->yLimit = yLimit;
400: filterInfo->ySummit = ySummit;
401: filterInfo->numLeftSteps = numLeftSteps;
402: filterInfo->yLeftSummit = yLeftSummit;
403: filterInfo->yLeftBottom = yLeftBottom;
404: if (filterInfo->filterType == 2) {
405: filterInfo->yLimitGap = yLimitGap;
406: filterInfo->numRightSteps = numRightSteps;
407: filterInfo->yRightSummit = yRightSummit;
408: filterInfo->yRightBottom = yRightBottom;
409: }
410: numMoreLooked = 0;
411: } else if (++numMoreLooked == numLookMore) {
412: /* filter has been optimal */
413: filterInfo->filterOK = 2;
414: break;
415: }
416: /* try stepping further to see whether it can improve */
417: stepLeft = PETSC_TRUE;
418: if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
419: }
420: /* check whether we can really "step" */
421: if (leftBoundaryMet) {
422: if (filterInfo->filterType == 1 || rightBoundaryMet) break; /* cannot step further, so break the loop */
423: if (stepLeft) {
424: /* cannot step left, so try stepping right */
425: stepLeft = PETSC_FALSE;
426: stepRight = PETSC_TRUE;
427: }
428: }
429: if (rightBoundaryMet && stepRight) {
430: /* cannot step right, so try stepping left */
431: stepRight = PETSC_FALSE;
432: stepLeft = PETSC_TRUE;
433: }
434: /* now "step" */
435: if (stepLeft) {
436: numLeftSteps++;
437: if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
438: z1 -= leftDelta;
439: }
440: if (stepRight) {
441: numRightSteps++;
442: rightDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
443: z2 += rightDelta;
444: }
445: if (filterInfo->filterType == 2) {
446: /* shrink the "plateau" of the (dual) base filter */
447: if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
448: else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
449: }
450: }
451: if (!filterInfo->filterOK) SETERRQ(PETSC_COMM_SELF,1,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");
453: filterInfo->totalNumIter = numIter;
454: PetscFree2(polyFilter,baseFilter);
455: return(0);
456: }
458: /* ////////////////////////////////////////////////////////////////////////////
459: // Chebyshev Polynomials
460: //////////////////////////////////////////////////////////////////////////// */
462: /*
463: FILTLAN function ExpandNewtonPolynomialInChebyshevBasis
465: translate the coefficients of a Newton polynomial to the coefficients
466: in a basis of the `translated' (scale-and-shift) Chebyshev polynomials
468: input:
469: a Newton polynomial defined by vectors a and x:
470: P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
471: the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
472: where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
473: note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2
475: output:
476: a vector q containing the Chebyshev coefficients:
477: P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
478: */
479: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)480: {
481: PetscInt m,mm;
482: PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;
485: sa = a+n; /* pointers for traversing a and x */
486: sx = x+n-1;
487: *q = *--sa; /* set q[0] = a(n) */
489: c = (aa+bb)/2.0;
490: h = (bb-aa)/2.0;
491: h2 = h/2.0;
493: for (m=1;m<=n-1;m++) { /* the main loop for translation */
495: /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
496: mm = m;
497: sq = q;
498: sq2 = q2;
499: c2 = c-(*--sx);
500: while (mm--) *(sq2++) = c2*(*sq++);
501: *sq2 = 0.0; /* q2[m] = 0.0 */
502: *(q2+1) += h*(*q); /* q2[1] = q2[1] + h*q[0] */
504: /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
505: mm = m-1;
506: sq2 = q2;
507: sq = q+1;
508: while (mm--) *(sq2++) += h2*(*sq++);
510: /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
511: mm = m-1;
512: sq2 = q2+2;
513: sq = q+1;
514: while (mm--) *(sq2++) += h2*(*sq++);
516: /* compute q[0:m] = q2[0:m] */
517: mm = m+1;
518: sq2 = q2;
519: sq = q;
520: while (mm--) *sq++ = *sq2++;
521: *q += (*--sa); /* q[0] = q[0] + p[n-m-1] */
522: }
523: return(0);
524: }
526: /*
527: FILTLAN function PolynomialEvaluationInChebyshevBasis
529: evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
530: the `translated' (i.e. scale-and-shift) Chebyshev polynomials
532: input:
533: c is a vector of Chebyshev coefficients which defines the polynomial
534: P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
535: where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
536: c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
537: note that T_i(z) is the Chebyshev polynomial of the first kind,
538: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
540: output:
541: the evaluated value of P(z) at z=z0
542: */
543: PETSC_STATIC_INLINE PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)544: {
545: PetscInt ii,deg1;
546: PetscReal y,zz,t0,t1,t2,*sc;
549: deg1 = m;
550: if (aa==-1.0 && bb==1.0) zz = z0; /* treat it as a special case to reduce rounding errors */
551: else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);
553: /* compute y = P(z0), where we utilize the Chebyshev recursion */
554: sc = pp+(idx-1)*m; /* sc points to column idx of pp */
555: y = *sc++;
556: t1 = 1.0; t2 = zz;
557: ii = deg1-1;
558: while (ii--) {
559: /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
560: the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
561: t0 = 2*zz*t1 - t2;
562: /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
563: t2 = t1;
564: t1 = t0;
565: y += (*sc++)*t0;
566: }
567: PetscFunctionReturn(y);
568: }
570: #define basisTranslated PETSC_TRUE571: /*
572: FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis
574: evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
575: in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval
577: input:
578: intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
579: pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
580: in a basis of the `translated' Chebyshev polynomials in each interval
581: the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
582: if basisTranslated == false, then
583: P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
584: where T_i(z) is the Chebyshev polynomial of the first kind,
585: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
586: if basisTranslated == true, then
587: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
588: where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
589: c = (intv(j)+intv(j+1)) / 2, h = (intv(j+1)-intv(j)) / 2
591: output:
592: the evaluated value of P(z) at z=z0
594: note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
595: if z0 flies over the last interval, then the polynomial in the last interval will be used to evaluate P(z0)
596: */
597: PETSC_STATIC_INLINE PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)598: {
599: PetscReal *sintv,aa,bb,resul;
600: PetscInt idx;
603: /* determine the interval which contains z0 */
604: sintv = &intv[1];
605: idx = 1;
606: if (npoints>2 && z0 >= *sintv) {
607: sintv++;
608: while (++idx < npoints-1) {
609: if (z0 < *sintv) break;
610: sintv++;
611: }
612: }
613: /* idx==1 if npoints<=2; otherwise idx satisfies:
614: intv(idx) <= z0 < intv(idx+1), if 2 <= idx <= npoints-2
615: z0 < intv(idx+1), if idx == 1
616: intv(idx) <= z0, if idx == npoints-1
617: in addition, sintv points to &intv(idx+1) */
618: if (basisTranslated) {
619: /* the basis consists of `translated' Chebyshev polynomials */
620: /* find the interval of concern, [aa,bb] */
621: aa = *(sintv-1);
622: bb = *sintv;
623: resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
624: } else {
625: /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
626: resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
627: }
628: PetscFunctionReturn(resul);
629: }
631: /*
632: FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis
634: compute the weighted inner product of two piecewise polynomials expanded
635: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
637: pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
638: for z in the j-th interval, P(z) equals
639: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
640: and Q(z) equals
641: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
642: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
643: S_i((z-c)/h) = T_i(z), c = (aa+bb)) / 2, h = (bb-aa) / 2,
644: with T_i(z) the Chebyshev polynomial of the first kind
645: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
647: the (scaled) j-th interval inner product is defined by
648: <P_j,Q_j> = (Pi/2)*( pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j) ),
649: which comes from the property
650: <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j
652: the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
653: which is the return value
655: note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
656: */
657: PETSC_STATIC_INLINE PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)658: {
659: PetscInt nintv,deg1,i,k;
660: PetscReal *sp,*sq,ans=0.0,ans2;
663: deg1 = PetscMin(prows,qrows); /* number of effective coefficients, one more than the effective polynomial degree */
664: if (!deg1) PetscFunctionReturn(0.0);
665: nintv = PetscMin(pcols,qcols); /* number of intervals */
667: /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
668: compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
669: for (i=0;i<nintv;i++) { /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
670: sp = pp+i*ldp;
671: sq = qq+i*ldq;
672: ans2 = (*sp) * (*sq); /* the first term pp(1,i)*qq(1,i) is being added twice */
673: for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++); /* add pp(k,i)*qq(k,i) */
674: ans += ans2*intervalWeights[i]; /* compute ans += ans2*intervalWeights(i) */
675: }
676: PetscFunctionReturn(ans*PETSC_PI/2.0);
677: }
679: /*
680: FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX
682: compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
683: in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval
685: P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively
687: For z in the j-th interval, P(z) equals
688: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
689: and Q(z) equals
690: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
691: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
692: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
693: with T_i(z) the Chebyshev polynomial of the first kind
694: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
696: the returned matrix is qq which represents Q(z) = z*P(z)
697: */
698: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)699: {
700: PetscInt i,j;
701: PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;
704: for (j=0;j<nintv;j++) { /* consider interval between intv(j) and intv(j+1) */
705: sp = pp+j*ldp;
706: sq = qq+j*ldq;
707: sp2 = sp;
708: sq2 = sq+1;
709: c = 0.5*(intv[j] + intv[j+1]); /* compute c = (intv(j) + intv(j+1))/2 */
710: h = 0.5*(intv[j+1] - intv[j]); /* compute h = (intv(j+1) - intv(j))/2 and h2 = h/2 */
711: h2 = 0.5*h;
712: i = deg1;
713: while (i--) *sq++ = c*(*sp++); /* compute q(1:deg1,j) = c*p(1:deg1,j) */
714: *sq++ = 0.0; /* set q(deg1+1,j) = 0.0 */
715: *(sq2++) += h*(*sp2++); /* compute q(2,j) = q(2,j) + h*p(1,j) */
716: i = deg1-1;
717: while (i--) { /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
718: tmp = h2*(*sp2++);
719: *(sq2-2) += tmp;
720: *(sq2++) += tmp;
721: }
722: }
723: return(0);
724: }
726: /* ////////////////////////////////////////////////////////////////////////////
727: // Conjugate Residual Method in the Polynomial Space
728: //////////////////////////////////////////////////////////////////////////// */
730: /*
731: B := alpha*A + beta*B
733: A,B are nxk
734: */
735: PETSC_STATIC_INLINE PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)736: {
738: PetscInt i,j;
741: if (beta==(PetscReal)1.0) {
742: for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
743: PetscLogFlops(2.0*n*k);
744: } else {
745: for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
746: PetscLogFlops(3.0*n*k);
747: }
748: return(0);
749: }
751: /*
752: FILTLAN function FilteredConjugateResidualPolynomial
754: ** Conjugate Residual Method in the Polynomial Space
756: this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
757: where P(z), the base filter, is the input piecewise polynomial, and
758: Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
759: niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
760: both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
761: and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively
763: input:
764: intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
765: w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
766: the interval weights define the inner product of two continuous functions and then
767: the derived w-norm ||P(z)-Q(z)||_w
768: pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
769: to be specific, for z in [intv(j), intv(j+1)), P(z) equals
770: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
771: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
772: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
773: with T_i(z) the Chebyshev polynomial of the first kind,
774: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
776: output:
777: the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
778: and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
779: this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
780: to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
781: Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)
783: note:
784: 1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
785: for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
786: 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
787: in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
788: */
789: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)790: {
792: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
793: PetscReal rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;
796: nintv = m-1;
797: ld = niter+2; /* leading dimension */
798: PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol);
799: PetscMemzero(cpol,ld*nintv*sizeof(PetscReal));
800: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
801: sppol = 2;
802: srpol = 2;
803: scpol = 2;
804: for (j=0;j<nintv;j++) {
805: ppol[j*ld] = 1.0;
806: rpol[j*ld] = 1.0;
807: cpol[j*ld] = 1.0;
808: }
809: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
810: rpol is the r-polynomial (corresponding to the residual vector r in CG)
811: cpol is the "corrected" residual polynomial (result of this function) */
812: sappol = 3;
813: sarpol = 3;
814: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
815: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
816: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
817: for (i=0;i<niter;i++) {
818: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
819: alp0 = rho/den;
820: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
821: alp = (rho-rho1)/den;
822: srpol++;
823: scpol++;
824: Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
825: Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
826: if (i+1 == niter) break;
827: sarpol++;
828: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
829: rho0 = rho;
830: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
831: bet = rho / rho0;
832: sppol++;
833: sappol++;
834: Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
835: Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
836: }
837: PetscFree4(ppol,rpol,appol,arpol);
838: return(0);
839: }
841: /*
842: FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct
844: this routine employs a conjugate-residual-type algorithm in polynomial space to compute
845: x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
846: P(z) is a given piecewise polynomial, called the base filter,
847: s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
848: and b and x0 are given vectors
850: note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
851: and presented as a matrix of Chebyshev coefficients pp
853: input:
854: A is a sparse matrix
855: x0, b are vectors
856: niter is the number of conjugate-residual iterations
857: intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
858: w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
859: the interval weights define the inner product of two continuous functions and then
860: the derived w-norm ||P(z)-Q(z)||_w
861: pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
862: to be specific, for z in [intv(j), intv(j+1)), P(z) equals
863: P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
864: where S_i(z) is the `translated' Chebyshev polynomial in that interval,
865: S_i((z-c)/h) = T_i(z), c = (intv(j)+intv(j+1))) / 2, h = (intv(j+1)-intv(j)) / 2,
866: with T_i(z) the Chebyshev polynomial of the first kind,
867: T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
868: tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
869: than tol, then stop the conjugate-residual iteration
871: output:
872: the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
873: subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
874: in short, z*s(z) approximates 1-P(z)
876: note:
877: 1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
878: for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
879: 2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
880: in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
881: 3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
882: in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
883: z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
884: */
885: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)886: {
888: PetscInt i,j,srpol,scpol,sarpol,sppol,sappol,ld;
889: PetscReal rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
890: Vec r=work[0],p=work[1],ap=work[2],w=work[3];
891: PetscScalar alpha;
894: ld = niter+3; /* leading dimension */
895: PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol);
896: /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
897: sppol = 2;
898: srpol = 2;
899: scpol = 2;
900: for (j=0;j<nintv;j++) {
901: ppol[j*ld] = 1.0;
902: rpol[j*ld] = 1.0;
903: cpol[j*ld] = 1.0;
904: }
905: /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
906: rpol is the r-polynomial (corresponding to the residual vector r in CG)
907: cpol is the "corrected" residual polynomial */
908: sappol = 3;
909: sarpol = 3;
910: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
911: for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
912: rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
913: rho = rho00;
915: /* corrected CR in vector space */
916: /* we assume x0 is always 0 */
917: VecSet(x,0.0);
918: VecCopy(b,r); /* initial residual r = b-A*x0 */
919: VecCopy(r,p); /* p = r */
920: MatMult(A,p,ap); /* ap = A*p */
922: for (i=0;i<niter;i++) {
923: /* iteration in the polynomial space */
924: den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
925: alp0 = rho/den;
926: rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
927: alp = (rho-rho1)/den;
928: srpol++;
929: scpol++;
930: Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
931: Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
932: sarpol++;
933: FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
934: rho0 = rho;
935: rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
937: /* update x in the vector space */
938: alpha = alp;
939: VecAXPY(x,alpha,p); /* x += alp*p */
940: if (rho < tol*rho00) break;
942: /* finish the iteration in the polynomial space */
943: bet = rho / rho0;
944: sppol++;
945: sappol++;
946: Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
947: Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
949: /* finish the iteration in the vector space */
950: alpha = -alp0;
951: VecAXPY(r,alpha,ap); /* r -= alp0*ap */
952: alpha = bet;
953: VecAYPX(p,alpha,r); /* p = r + bet*p */
954: MatMult(A,r,w); /* ap = A*r + bet*ap */
955: VecAYPX(ap,alpha,w);
956: }
957: PetscFree5(ppol,rpol,cpol,appol,arpol);
958: return(0);
959: }
961: /*
962: Gateway to FILTLAN for evaluating y=p(A)*x
963: */
964: PetscErrorCode STFilter_FILTLAN_Apply(ST st,Vec x,Vec y)965: {
967: ST_FILTER *ctx = (ST_FILTER*)st->data;
968: PetscInt npoints;
971: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
972: FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(st->T[0],x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work);
973: VecCopy(y,st->work[0]);
974: MatMult(st->T[0],st->work[0],y);
975: return(0);
976: }
978: /*
979: FILTLAN function PolynomialFilterInterface::setFilter
981: Creates the shifted (and scaled) matrix and the base filter P(z)
982: */
983: PetscErrorCode STFilter_FILTLAN_setFilter(ST st)984: {
986: ST_FILTER *ctx = (ST_FILTER*)st->data;
987: PetscInt i,npoints;
988: PetscReal frame2[4];
989: PetscScalar alpha;
990: const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };
993: MatDestroy(&st->T[0]);
994: if (ctx->frame[0] == ctx->frame[1]) { /* low pass filter, convert it to high pass filter */
995: /* T = frame[3]*eye(n) - A */
996: MatDuplicate(st->A[0],MAT_COPY_VALUES,&st->T[0]);
997: MatScale(st->T[0],-1.0);
998: alpha = ctx->frame[3];
999: MatShift(st->T[0],alpha);
1000: for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
1001: FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1002: /* translate the intervals back */
1003: for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1004: } else { /* it can be a mid-pass filter or a high-pass filter */
1005: if (ctx->frame[0] == 0.0) {
1006: PetscObjectReference((PetscObject)st->A[0]);
1007: st->T[0] = st->A[0];
1008: FILTLAN_GetIntervals(ctx->intervals,ctx->frame,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1009: for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1010: } else {
1011: /* T = A - frame[0]*eye(n) */
1012: MatDuplicate(st->A[0],MAT_COPY_VALUES,&st->T[0]);
1013: alpha = -ctx->frame[0];
1014: MatShift(st->T[0],alpha);
1015: for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1016: FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1017: /* translate the intervals back */
1018: for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1019: }
1020: }
1021: npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1022: PetscFree(ctx->baseFilter);
1023: PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter);
1024: FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree);
1025: PetscInfo1(st,"Computed value of yLimit = %g\n",ctx->filterInfo->yLimit);
1026: return(0);
1027: }