Actual source code: filtlan.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    This file is an adaptation of several subroutines from FILTLAN, the
 12:    Filtered Lanczos Package, authored by Haw-ren Fang and Yousef Saad.

 14:    More information at:
 15:    http://www-users.cs.umn.edu/~saad/software/filtlan

 17:    References:

 19:        [1] H. Fang and Y. Saad, "A filtered Lanczos procedure for extreme and interior
 20:            eigenvalue problems", SIAM J. Sci. Comput. 34(4):A2220-A2246, 2012.
 21: */

 23: #include <slepc/private/stimpl.h>
 24: #include "./filter.h"

 26: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal*,PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal*,PetscInt);
 27: static PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal*,PetscInt,PetscReal*,PetscInt,PetscReal);
 28: static PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt,PetscReal,PetscReal,PetscReal*,PetscReal*,PetscReal*,PetscReal*);

 30: /* ////////////////////////////////////////////////////////////////////////////
 31:    //    Newton - Hermite Polynomial Interpolation
 32:    //////////////////////////////////////////////////////////////////////////// */

 34: /*
 35:    FILTLAN function NewtonPolynomial

 37:    build P(z) by Newton's divided differences in the form
 38:        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1)),
 39:    such that P(x(i)) = y(i) for i=1,...,n, where
 40:        x,y are input vectors of length n, and a is the output vector of length n
 41:    if x(i)==x(j) for some i!=j, then it is assumed that the derivative of P(z) is to be zero at x(i),
 42:        and the Hermite polynomial interpolation is applied
 43:    in general, if there are k x(i)'s with the same value x0, then
 44:        the j-th order derivative of P(z) is zero at z=x0 for j=1,...,k-1
 45: */
 46: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_NewtonPolynomial(PetscInt n,PetscReal *x,PetscReal *y,PetscReal *sa,PetscReal *sf)
 47: {
 49:   PetscReal      d,*sx=x,*sy=y;
 50:   PetscInt       j,k;

 53:   PetscMemcpy(sf,sy,n*sizeof(PetscReal));

 55:   /* apply Newton's finite difference method */
 56:   sa[0] = sf[0];
 57:   for (j=1;j<n;j++) {
 58:     for (k=n-1;k>=j;k--) {
 59:       d = sx[k]-sx[k-j];
 60:       if (d == 0.0) sf[k] = 0.0;  /* assume that the derivative is 0.0 and apply the Hermite interpolation */
 61:       else sf[k] = (sf[k]-sf[k-1]) / d;
 62:     }
 63:     sa[j] = sf[j];
 64:   }
 65:   return(0);
 66: }

 68: /*
 69:    FILTLAN function HermiteBaseFilterInChebyshevBasis

 71:    compute a base filter P(z) which is a continuous, piecewise polynomial P(z) expanded
 72:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials in each interval

 74:    The base filter P(z) equals P_j(z) for z in the j-th interval [intv(j), intv(j+1)), where
 75:    P_j(z) a Hermite interpolating polynomial

 77:    input:
 78:    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
 79:    HiLowFlags determines the shape of the base filter P(z)
 80:    Consider the j-th interval [intv(j), intv(j+1)]
 81:    HighLowFlag[j-1]==1,  P(z)==1 for z in [intv(j), intv(j+1)]
 82:                    ==0,  P(z)==0 for z in [intv(j), intv(j+1)]
 83:                    ==-1, [intv(j), intv(j+1)] is a transition interval;
 84:                          P(intv(j)) and P(intv(j+1)) are defined such that P(z) is continuous
 85:    baseDeg is the degree of smoothness of the Hermite (piecewise polynomial) interpolation
 86:    to be precise, the i-th derivative of P(z) is zero, i.e. d^{i}P(z)/dz^i==0, at all interval
 87:    end points z=intv(j) for i=1,...,baseDeg

 89:    output:
 90:    P(z) expanded in a basis of `translated' (scale-and-shift) Chebyshev polynomials
 91:    to be precise, for z in the j-th interval [intv(j),intv(j+1)), P(z) equals
 92:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
 93:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
 94:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
 95:    with T_i(z) the Chebyshev polynomial of the first kind,
 96:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
 97:    the return matrix is the matrix of Chebyshev coefficients pp just described

 99:    note that the degree of P(z) in each interval is (at most) 2*baseDeg+1, with 2*baseDeg+2 coefficients
100:    let n be the length of intv; then there are n-1 intervals
101:    therefore the return matrix pp is of size (2*baseDeg+2)-by-(n-1)
102: */
103: static PetscErrorCode FILTLAN_HermiteBaseFilterInChebyshevBasis(PetscReal *baseFilter,PetscReal *intv,PetscInt npoints,const PetscInt *HighLowFlags,PetscInt baseDeg)
104: {
106:   PetscInt       m,ii,jj;
107:   PetscReal      flag,flag0,flag2,aa,bb,*px,*py,*sx,*sy,*pp,*qq,*sq,*sbf,*work,*currentPoint = intv;
108:   const PetscInt *hilo = HighLowFlags;

111:   m = 2*baseDeg+2;
112:   jj = npoints-1;  /* jj is initialized as the number of intervals */
113:   PetscMalloc5(m,&px,m,&py,m,&pp,m,&qq,m,&work);
114:   sbf = baseFilter;

116:   while (jj--) {  /* the main loop to compute the Chebyshev coefficients */

118:     flag = (PetscReal)(*hilo++);  /* get the flag of the current interval */
119:     if (flag == -1.0) {  /* flag == -1 means that the current interval is a transition polynomial */

121:       flag2 = (PetscReal)(*hilo);  /* get flag2, the flag of the next interval */
122:       flag0 = 1.0-flag2;       /* the flag of the previous interval is 1-flag2 */

124:       /* two pointers for traversing x[] and y[] */
125:       sx = px;
126:       sy = py;

128:       /* find the current interval [aa,bb] */
129:       aa = *currentPoint++;
130:       bb = *currentPoint;

132:       /* now left-hand side */
133:       ii = baseDeg+1;
134:       while (ii--) {
135:         *sy++ = flag0;
136:         *sx++ = aa;
137:       }

139:       /* now right-hand side */
140:       ii = baseDeg+1;
141:       while (ii--) {
142:         *sy++ = flag2;
143:         *sx++ = bb;
144:       }

146:       /* build a Newton polynomial (indeed, the generalized Hermite interpolating polynomial) with x[] and y[] */
147:       FILTLAN_NewtonPolynomial(m,px,py,pp,work);

149:       /* pp contains coefficients of the Newton polynomial P(z) in the current interval [aa,bb], where
150:          P(z) = pp(1) + pp(2)*(z-px(1)) + pp(3)*(z-px(1))*(z-px(2)) + ... + pp(n)*(z-px(1))*...*(z-px(n-1)) */

152:       /* translate the Newton coefficients to the Chebyshev coefficients */
153:       FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(m,aa,bb,pp,px,qq,work);
154:       /* qq contains coefficients of the polynomial in [aa,bb] in the `translated' Chebyshev basis */

156:       /* copy the Chebyshev coefficients to baseFilter
157:          OCTAVE/MATLAB: B(:,j) = qq, where j = (npoints-1)-jj and B is the return matrix */
158:       sq = qq;
159:       ii = 2*baseDeg+2;
160:       while (ii--) *sbf++ = *sq++;

162:     } else {

164:       /* a constant polynomial P(z)=flag, where either flag==0 or flag==1
165:        OCTAVE/MATLAB: B(1,j) = flag, where j = (npoints-1)-jj and B is the return matrix */
166:       *sbf++ = flag;

168:       /* the other coefficients are all zero, since P(z) is a constant
169:        OCTAVE/MATLAB: B(1,j) = 0, where j = (npoints-1)-jj and B is the return matrix */
170:       ii = 2*baseDeg+1;
171:       while (ii--) *sbf++ = 0.0;

173:       /* for the next point */
174:       currentPoint++;
175:     }
176:   }
177:   PetscFree5(px,py,pp,qq,work);
178:   return(0);
179: }

181: /* ////////////////////////////////////////////////////////////////////////////
182:    //    Base Filter
183:    //////////////////////////////////////////////////////////////////////////// */

185: /*
186:    FILTLAN function GetIntervals

188:    this routine determines the intervals (including the transition one(s)) by an iterative process

190:    frame is a vector consisting of 4 ordered elements:
191:        [frame(1),frame(4)] is the interval which (tightly) contains all eigenvalues, and
192:        [frame(2),frame(3)] is the interval in which the eigenvalues are sought
193:    baseDeg is the left-and-right degree of the base filter for each interval
194:    polyDeg is the (maximum possible) degree of s(z), with z*s(z) the polynomial filter
195:    intv is the output; the j-th interval is [intv(j),intv(j+1))
196:    opts is a collection of interval options

198:    the base filter P(z) is a piecewise polynomial from Hermite interpolation with degree baseDeg
199:    at each end point of intervals

201:    the polynomial filter Q(z) is in the form z*s(z), i.e. Q(0)==0, such that ||(1-P(z))-z*s(z)||_w is
202:    minimized with s(z) a polynomial of degree up to polyDeg

204:    the resulting polynomial filter Q(z) satisfies Q(x)>=Q(y) for x in [frame[1],frame[2]], and
205:    y in [frame[0],frame[3]] but not in [frame[1],frame[2]]

207:    the routine fills a PolynomialFilterInfo struct which gives some information of the polynomial filter
208: */
209: static PetscErrorCode FILTLAN_GetIntervals(PetscReal *intervals,PetscReal *frame,PetscInt polyDeg,PetscInt baseDeg,FILTLAN_IOP opts,FILTLAN_PFI filterInfo)
210: {
211:   PetscErrorCode  ierr;
212:   PetscReal       intv[6],x,y,z1,z2,c,c1,c2,fc,fc2,halfPlateau,leftDelta,rightDelta,gridSize;
213:   PetscReal       yLimit,ySummit,yLeftLimit,yRightLimit,bottom,qIndex,*baseFilter,*polyFilter;
214:   PetscReal       yLimitGap=0.0,yLeftSummit=0.0,yLeftBottom=0.0,yRightSummit=0.0,yRightBottom=0.0;
215:   PetscInt        i,ii,npoints,numIter,numLeftSteps=1,numRightSteps=1,numMoreLooked=0;
216:   PetscBool       leftBoundaryMet=PETSC_FALSE,rightBoundaryMet=PETSC_FALSE,stepLeft,stepRight;
217:   const PetscReal a=frame[0],a1=frame[1],b1=frame[2],b=frame[3];
218:   const PetscInt  HighLowFlags[5] = { 1, -1, 0, -1, 1 };  /* if filterType is 1, only first 3 elements will be used */
219:   const PetscInt  numLookMore = 2*(PetscInt)(0.5+(PetscLogReal(2.0)/PetscLogReal(opts->shiftStepExpanRate)));

222:   if (a>a1 || a1>b1 || b1>b) SETERRQ(PETSC_COMM_SELF,1,"Values in the frame vector should be non-decreasing");
223:   if (a1 == b1) SETERRQ(PETSC_COMM_SELF,1,"The range of wanted eigenvalues cannot be of size zero");
224:   filterInfo->filterType = 2;      /* mid-pass filter, for interior eigenvalues */
225:   if (b == b1) {
226:     if (a == a1) SETERRQ(PETSC_COMM_SELF,1,"A polynomial filter should not cover all eigenvalues");
227:     filterInfo->filterType = 1;    /* high-pass filter, for largest eigenvalues */
228:   } else if (a == a1) SETERRQ(PETSC_COMM_SELF,1,"filterType==3 for smallest eigenvalues should be pre-converted to filterType==1 for largest eigenvalues");

230:   /* the following recipe follows Yousef Saad (2005, 2006) with a few minor adaptations / enhancements */
231:   halfPlateau = 0.5*(b1-a1)*opts->initialPlateau;    /* half length of the "plateau" of the (dual) base filter */
232:   leftDelta = (b1-a1)*opts->initialShiftStep;        /* initial left shift */
233:   rightDelta = leftDelta;                            /* initial right shift */
234:   opts->numGridPoints = PetscMax(opts->numGridPoints,(PetscInt)(2.0*(b-a)/halfPlateau));
235:   gridSize = (b-a) / (PetscReal)(opts->numGridPoints);

237:   for (i=0;i<6;i++) intv[i] = 0.0;
238:   if (filterInfo->filterType == 2) {  /* for interior eigenvalues */
239:     npoints = 6;
240:     intv[0] = a;
241:     intv[5] = b;
242:     /* intv[1], intv[2], intv[3], and intv[4] to be determined */
243:   } else { /* filterType == 1 (or 3 with conversion), for extreme eigenvalues */
244:     npoints = 4;
245:     intv[0] = a;
246:     intv[3] = b;
247:     /* intv[1], and intv[2] to be determined */
248:   }
249:   z1 = a1 - leftDelta;
250:   z2 = b1 + rightDelta;
251:   filterInfo->filterOK = 0;  /* not yet found any OK filter */

253:   /* allocate matrices */
254:   PetscMalloc2((polyDeg+2)*(npoints-1),&polyFilter,(2*baseDeg+2)*(npoints-1),&baseFilter);

256:   /* initialize the intervals, mainly for the case opts->maxOuterIter == 0 */
257:   intervals[0] = intv[0];
258:   intervals[3] = intv[3];
259:   intervals[5] = intv[5];
260:   intervals[1] = z1;
261:   if (filterInfo->filterType == 2) {  /* a mid-pass filter for interior eigenvalues */
262:     intervals[4] = z2;
263:     c = (a1+b1) / 2.0;
264:     intervals[2] = c - halfPlateau;
265:     intervals[3] = c + halfPlateau;
266:   } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
267:     intervals[2] = z1 + (b1-z1)*opts->transIntervalRatio;
268:   }

270:   /* the main loop */
271:   for (numIter=1;numIter<=opts->maxOuterIter;numIter++) {
272:     if (z1 <= a) {  /* outer loop updates z1 and z2 */
273:       z1 = a;
274:       leftBoundaryMet = PETSC_TRUE;
275:     }
276:     if (filterInfo->filterType == 2) {  /* a <= z1 < (a1) */
277:       if (z2 >= b) {  /* a mid-pass filter for interior eigenvalues */
278:         z2 = b;
279:         rightBoundaryMet = PETSC_TRUE;
280:       }
281:       /* a <= z1 < c-h < c+h < z2 <= b, where h is halfPlateau */
282:       /* [z1, c-h] and [c+h, z2] are transition interval */
283:       intv[1] = z1;
284:       intv[4] = z2;
285:       c1 = z1 + halfPlateau;
286:       intv[2] = z1;           /* i.e. c1 - halfPlateau */
287:       intv[3] = c1 + halfPlateau;
288:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
289:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
290:       /* fc1 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1); */
291:       c2 = z2 - halfPlateau;
292:       intv[2] = c2 - halfPlateau;
293:       intv[3] = z2;           /* i.e. c2 + halfPlateau */
294:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
295:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
296:       fc2 = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
297:       yLimitGap = PETSC_MAX_REAL;
298:       ii = opts->maxInnerIter;
299:       while (ii-- && !(yLimitGap <= opts->yLimitTol)) {
300:         /* recursive bisection to get c such that p(a1) are p(b1) approximately the same */
301:         c = (c1+c2) / 2.0;
302:         intv[2] = c - halfPlateau;
303:         intv[3] = c + halfPlateau;
304:         FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,6,HighLowFlags,baseDeg);
305:         FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,6,opts->intervalWeights,polyDeg);
306:         fc = FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1) - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
307:         if (fc*fc2 < 0.0) {
308:           c1 = c;
309:           /* fc1 = fc; */
310:         } else {
311:           c2 = c;
312:           fc2 = fc;
313:         }
314:         yLimitGap = PetscAbsReal(fc);
315:       }
316:     } else {  /* filterType == 1 (or 3 with conversion) for extreme eigenvalues */
317:       intv[1] = z1;
318:       intv[2] = z1 + (b1-z1)*opts->transIntervalRatio;
319:       FILTLAN_HermiteBaseFilterInChebyshevBasis(baseFilter,intv,4,HighLowFlags,baseDeg);
320:       FILTLAN_FilteredConjugateResidualPolynomial(polyFilter,baseFilter,2*baseDeg+2,intv,4,opts->intervalWeights,polyDeg);
321:     }
322:     /* polyFilter contains the coefficients of the polynomial filter which approximates phi(x)
323:        expanded in the `translated' Chebyshev basis */
324:     /* psi(x) = 1.0 - phi(x) is the dual base filter approximated by a polynomial in the form x*p(x) */
325:     yLeftLimit  = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,a1);
326:     yRightLimit = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,b1);
327:     yLimit  = (yLeftLimit < yRightLimit) ? yLeftLimit : yRightLimit;
328:     ySummit = (yLeftLimit > yRightLimit) ? yLeftLimit : yRightLimit;
329:     x = a1;
330:     while ((x+=gridSize) < b1) {
331:       y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
332:       if (y < yLimit)  yLimit  = y;
333:       if (y > ySummit) ySummit = y;
334:     }
335:     /* now yLimit is the minimum of x*p(x) for x in [a1, b1] */
336:     stepLeft  = PETSC_FALSE;
337:     stepRight = PETSC_FALSE;
338:     if ((yLimit < yLeftLimit && yLimit < yRightLimit) || yLimit < opts->yBottomLine) {
339:       /* very bad, step to see what will happen */
340:       stepLeft = PETSC_TRUE;
341:       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
342:     } else if (filterInfo->filterType == 2) {
343:       if (yLeftLimit < yRightLimit) {
344:         if (yRightLimit-yLeftLimit > opts->yLimitTol) stepLeft = PETSC_TRUE;
345:       } else if (yLeftLimit-yRightLimit > opts->yLimitTol) stepRight = PETSC_TRUE;
346:     }
347:     if (!stepLeft) {
348:       yLeftBottom = yLeftLimit;
349:       x = a1;
350:       while ((x-=gridSize) >= a) {
351:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
352:         if (y < yLeftBottom) yLeftBottom = y;
353:         else if (y > yLeftBottom) break;
354:       }
355:       yLeftSummit = yLeftBottom;
356:       while ((x-=gridSize) >= a) {
357:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
358:         if (y > yLeftSummit) {
359:           yLeftSummit = y;
360:           if (yLeftSummit > yLimit*opts->yRippleLimit) {
361:             stepLeft = PETSC_TRUE;
362:             break;
363:           }
364:         }
365:         if (y < yLeftBottom) yLeftBottom = y;
366:       }
367:     }
368:     if (filterInfo->filterType == 2 && !stepRight) {
369:       yRightBottom = yRightLimit;
370:       x = b1;
371:       while ((x+=gridSize) <= b) {
372:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
373:         if (y < yRightBottom) yRightBottom = y;
374:         else if (y > yRightBottom) break;
375:       }
376:       yRightSummit = yRightBottom;
377:       while ((x+=gridSize) <= b) {
378:         y = 1.0 - FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(polyFilter,polyDeg+2,intv,npoints,x);
379:         if (y > yRightSummit) {
380:           yRightSummit = y;
381:           if (yRightSummit > yLimit*opts->yRippleLimit) {
382:             stepRight = PETSC_TRUE;
383:             break;
384:           }
385:         }
386:         if (y < yRightBottom) yRightBottom = y;
387:       }
388:     }
389:     if (!stepLeft && !stepRight) {
390:       if (filterInfo->filterType == 2) bottom = PetscMin(yLeftBottom,yRightBottom);
391:       else bottom = yLeftBottom;
392:       qIndex = 1.0 - (yLimit-bottom) / (ySummit-bottom);
393:       if (filterInfo->filterOK == 0 || filterInfo->filterQualityIndex < qIndex) {
394:         /* found the first OK filter or a better filter */
395:         for (i=0;i<6;i++) intervals[i] = intv[i];
396:         filterInfo->filterOK           = 1;
397:         filterInfo->filterQualityIndex = qIndex;
398:         filterInfo->numIter            = numIter;
399:         filterInfo->yLimit             = yLimit;
400:         filterInfo->ySummit            = ySummit;
401:         filterInfo->numLeftSteps       = numLeftSteps;
402:         filterInfo->yLeftSummit        = yLeftSummit;
403:         filterInfo->yLeftBottom        = yLeftBottom;
404:         if (filterInfo->filterType == 2) {
405:           filterInfo->yLimitGap        = yLimitGap;
406:           filterInfo->numRightSteps    = numRightSteps;
407:           filterInfo->yRightSummit     = yRightSummit;
408:           filterInfo->yRightBottom     = yRightBottom;
409:         }
410:         numMoreLooked = 0;
411:       } else if (++numMoreLooked == numLookMore) {
412:         /* filter has been optimal */
413:         filterInfo->filterOK = 2;
414:         break;
415:       }
416:       /* try stepping further to see whether it can improve */
417:       stepLeft = PETSC_TRUE;
418:       if (filterInfo->filterType == 2) stepRight = PETSC_TRUE;
419:     }
420:     /* check whether we can really "step" */
421:     if (leftBoundaryMet) {
422:       if (filterInfo->filterType == 1 || rightBoundaryMet) break;  /* cannot step further, so break the loop */
423:       if (stepLeft) {
424:         /* cannot step left, so try stepping right */
425:         stepLeft  = PETSC_FALSE;
426:         stepRight = PETSC_TRUE;
427:       }
428:     }
429:     if (rightBoundaryMet && stepRight) {
430:       /* cannot step right, so try stepping left */
431:       stepRight = PETSC_FALSE;
432:       stepLeft  = PETSC_TRUE;
433:     }
434:     /* now "step" */
435:     if (stepLeft) {
436:       numLeftSteps++;
437:       if (filterInfo->filterType == 2) leftDelta *= opts->shiftStepExpanRate; /* expand the step for faster convergence */
438:       z1 -= leftDelta;
439:     }
440:     if (stepRight) {
441:       numRightSteps++;
442:       rightDelta *= opts->shiftStepExpanRate;  /* expand the step for faster convergence */
443:       z2 += rightDelta;
444:     }
445:     if (filterInfo->filterType == 2) {
446:       /* shrink the "plateau" of the (dual) base filter */
447:       if (stepLeft && stepRight) halfPlateau /= opts->plateauShrinkRate;
448:       else halfPlateau /= PetscSqrtReal(opts->plateauShrinkRate);
449:     }
450:   }
451:   if (!filterInfo->filterOK) SETERRQ(PETSC_COMM_SELF,1,"STFILTER cannot get the filter specified; please adjust your filter parameters (e.g. increasing the polynomial degree)");

453:   filterInfo->totalNumIter = numIter;
454:   PetscFree2(polyFilter,baseFilter);
455:   return(0);
456: }

458: /* ////////////////////////////////////////////////////////////////////////////
459:    //    Chebyshev Polynomials
460:    //////////////////////////////////////////////////////////////////////////// */

462: /*
463:    FILTLAN function ExpandNewtonPolynomialInChebyshevBasis

465:    translate the coefficients of a Newton polynomial to the coefficients
466:    in a basis of the `translated' (scale-and-shift) Chebyshev polynomials

468:    input:
469:    a Newton polynomial defined by vectors a and x:
470:        P(z) = a(1) + a(2)*(z-x(1)) + a(3)*(z-x(1))*(z-x(2)) + ... + a(n)*(z-x(1))*...*(z-x(n-1))
471:    the interval [aa,bb] defines the `translated' Chebyshev polynomials S_i(z) = T_i((z-c)/h),
472:        where c=(aa+bb)/2 and h=(bb-aa)/2, and T_i is the Chebyshev polynomial of the first kind
473:    note that T_i is defined by T_0(z)=1, T_1(z)=z, and T_i(z)=2*z*T_{i-1}(z)+T_{i-2}(z) for i>=2

475:    output:
476:    a vector q containing the Chebyshev coefficients:
477:        P(z) = q(1)*S_0(z) + q(2)*S_1(z) + ... + q(n)*S_{n-1}(z)
478: */
479: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_ExpandNewtonPolynomialInChebyshevBasis(PetscInt n,PetscReal aa,PetscReal bb,PetscReal *a,PetscReal *x,PetscReal *q,PetscReal *q2)
480: {
481:   PetscInt  m,mm;
482:   PetscReal *sa,*sx,*sq,*sq2,c,c2,h,h2;

485:   sa = a+n;    /* pointers for traversing a and x */
486:   sx = x+n-1;
487:   *q = *--sa;  /* set q[0] = a(n) */

489:   c = (aa+bb)/2.0;
490:   h = (bb-aa)/2.0;
491:   h2 = h/2.0;

493:   for (m=1;m<=n-1;m++) {  /* the main loop for translation */

495:     /* compute q2[0:m-1] = (c-x[n-m-1])*q[0:m-1] */
496:     mm = m;
497:     sq = q;
498:     sq2 = q2;
499:     c2 = c-(*--sx);
500:     while (mm--) *(sq2++) = c2*(*sq++);
501:     *sq2 = 0.0;         /* q2[m] = 0.0 */
502:     *(q2+1) += h*(*q);  /* q2[1] = q2[1] + h*q[0] */

504:     /* compute q2[0:m-2] = q2[0:m-2] + h2*q[1:m-1] */
505:     mm = m-1;
506:     sq2 = q2;
507:     sq = q+1;
508:     while (mm--) *(sq2++) += h2*(*sq++);

510:     /* compute q2[2:m] = q2[2:m] + h2*q[1:m-1] */
511:     mm = m-1;
512:     sq2 = q2+2;
513:     sq = q+1;
514:     while (mm--) *(sq2++) += h2*(*sq++);

516:     /* compute q[0:m] = q2[0:m] */
517:     mm = m+1;
518:     sq2 = q2;
519:     sq = q;
520:     while (mm--) *sq++ = *sq2++;
521:     *q += (*--sa);      /* q[0] = q[0] + p[n-m-1] */
522:   }
523:   return(0);
524: }

526: /*
527:    FILTLAN function PolynomialEvaluationInChebyshevBasis

529:    evaluate P(z) at z=z0, where P(z) is a polynomial expanded in a basis of
530:    the `translated' (i.e. scale-and-shift) Chebyshev polynomials

532:    input:
533:    c is a vector of Chebyshev coefficients which defines the polynomial
534:        P(z) = c(1)*S_0(z) + c(2)*S_1(z) + ... + c(n)*S_{n-1}(z),
535:    where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
536:        c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2
537:    note that T_i(z) is the Chebyshev polynomial of the first kind,
538:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

540:    output:
541:    the evaluated value of P(z) at z=z0
542: */
543: PETSC_STATIC_INLINE PetscReal FILTLAN_PolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscInt idx,PetscReal z0,PetscReal aa,PetscReal bb)
544: {
545:   PetscInt  ii,deg1;
546:   PetscReal y,zz,t0,t1,t2,*sc;

549:   deg1 = m;
550:   if (aa==-1.0 && bb==1.0) zz = z0;  /* treat it as a special case to reduce rounding errors */
551:   else zz = (aa==bb)? 0.0 : -1.0+2.0*(z0-aa)/(bb-aa);

553:   /* compute y = P(z0), where we utilize the Chebyshev recursion */
554:   sc = pp+(idx-1)*m;   /* sc points to column idx of pp */
555:   y = *sc++;
556:   t1 = 1.0; t2 = zz;
557:   ii = deg1-1;
558:   while (ii--) {
559:     /* Chebyshev recursion: T_0(zz)=1, T_1(zz)=zz, and T_{i+1}(zz) = 2*zz*T_i(zz) + T_{i-1}(zz) for i>=2
560:        the values of T_{i+1}(zz), T_i(zz), T_{i-1}(zz) are stored in t0, t1, t2, respectively */
561:     t0 = 2*zz*t1 - t2;
562:     /* it also works for the base case / the first iteration, where t0 equals 2*zz*1-zz == zz which is T_1(zz) */
563:     t2 = t1;
564:     t1 = t0;
565:     y += (*sc++)*t0;
566:   }
567:   PetscFunctionReturn(y);
568: }

570: #define basisTranslated PETSC_TRUE
571: /*
572:    FILTLAN function PiecewisePolynomialEvaluationInChebyshevBasis

574:    evaluate P(z) at z=z0, where P(z) is a piecewise polynomial expanded
575:    in a basis of the (optionally translated, i.e. scale-and-shift) Chebyshev polynomials for each interval

577:    input:
578:    intv is a vector which defines the intervals; the j-th interval is [intv(j), intv(j+1))
579:    pp is a matrix of Chebyshev coefficients which defines a piecewise polynomial P(z)
580:    in a basis of the `translated' Chebyshev polynomials in each interval
581:    the polynomial P_j(z) in the j-th interval, i.e. when z is in [intv(j), intv(j+1)), is defined by the j-th column of pp:
582:        if basisTranslated == false, then
583:            P_j(z) = pp(1,j)*T_0(z) + pp(2,j)*T_1(z) + ... + pp(n,j)*T_{n-1}(z),
584:        where T_i(z) is the Chebyshev polynomial of the first kind,
585:            T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
586:        if basisTranslated == true, then
587:            P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
588:        where S_i is the `translated' Chebyshev polynomial S_i((z-c)/h) = T_i(z), with
589:            c = (intv(j)+intv(j+1)) / 2,  h = (intv(j+1)-intv(j)) / 2

591:    output:
592:    the evaluated value of P(z) at z=z0

594:    note that if z0 falls below the first interval, then the polynomial in the first interval will be used to evaluate P(z0)
595:              if z0 flies over  the last  interval, then the polynomial in the last  interval will be used to evaluate P(z0)
596: */
597: PETSC_STATIC_INLINE PetscReal FILTLAN_PiecewisePolynomialEvaluationInChebyshevBasis(PetscReal *pp,PetscInt m,PetscReal *intv,PetscInt npoints,PetscReal z0)
598: {
599:   PetscReal *sintv,aa,bb,resul;
600:   PetscInt  idx;

603:   /* determine the interval which contains z0 */
604:   sintv = &intv[1];
605:   idx = 1;
606:   if (npoints>2 && z0 >= *sintv) {
607:     sintv++;
608:     while (++idx < npoints-1) {
609:       if (z0 < *sintv) break;
610:       sintv++;
611:     }
612:   }
613:   /* idx==1 if npoints<=2; otherwise idx satisfies:
614:          intv(idx) <= z0 < intv(idx+1),  if 2 <= idx <= npoints-2
615:          z0 < intv(idx+1),               if idx == 1
616:          intv(idx) <= z0,                if idx == npoints-1
617:      in addition, sintv points to &intv(idx+1) */
618:   if (basisTranslated) {
619:     /* the basis consists of `translated' Chebyshev polynomials */
620:     /* find the interval of concern, [aa,bb] */
621:     aa = *(sintv-1);
622:     bb = *sintv;
623:     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,aa,bb);
624:   } else {
625:     /* the basis consists of standard Chebyshev polynomials, with interval [-1.0,1.0] for integration */
626:     resul = FILTLAN_PolynomialEvaluationInChebyshevBasis(pp,m,idx,z0,-1.0,1.0);
627:   }
628:   PetscFunctionReturn(resul);
629: }

631: /*
632:    FILTLAN function PiecewisePolynomialInnerProductInChebyshevBasis

634:    compute the weighted inner product of two piecewise polynomials expanded
635:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval

637:    pp and qq are two matrices of Chebyshev coefficients which define the piecewise polynomials P(z) and Q(z), respectively
638:    for z in the j-th interval, P(z) equals
639:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
640:    and Q(z) equals
641:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
642:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
643:        S_i((z-c)/h) = T_i(z),  c = (aa+bb)) / 2,  h = (bb-aa) / 2,
644:    with T_i(z) the Chebyshev polynomial of the first kind
645:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

647:    the (scaled) j-th interval inner product is defined by
648:        <P_j,Q_j> = (Pi/2)*( pp(1,j)*qq(1,j) + sum_{k} pp(k,j)*qq(k,j) ),
649:    which comes from the property
650:        <T_0,T_0>=pi, <T_i,T_i>=pi/2 for i>=1, and <T_i,T_j>=0 for i!=j

652:    the weighted inner product is <P,Q> = sum_{j} intervalWeights(j)*<P_j,Q_j>,
653:    which is the return value

655:    note that for unit weights, pass an empty vector of intervalWeights (i.e. of length 0)
656: */
657: PETSC_STATIC_INLINE PetscReal FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(PetscReal *pp,PetscInt prows,PetscInt pcols,PetscInt ldp,PetscReal *qq,PetscInt qrows,PetscInt qcols,PetscInt ldq,const PetscReal *intervalWeights)
658: {
659:   PetscInt  nintv,deg1,i,k;
660:   PetscReal *sp,*sq,ans=0.0,ans2;

663:   deg1 = PetscMin(prows,qrows);  /* number of effective coefficients, one more than the effective polynomial degree */
664:   if (!deg1) PetscFunctionReturn(0.0);
665:   nintv = PetscMin(pcols,qcols);  /* number of intervals */

667:   /* scaled by intervalWeights(i) in the i-th interval (we assume intervalWeights[] are always provided).
668:      compute ans = sum_{i=1,...,nintv} intervalWeights(i)*[ pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) ] */
669:   for (i=0;i<nintv;i++) {   /* compute ans2 = pp(1,i)*qq(1,i) + sum_{k=1,...,deg} pp(k,i)*qq(k,i) */
670:     sp = pp+i*ldp;
671:     sq = qq+i*ldq;
672:     ans2 = (*sp) * (*sq);  /* the first term pp(1,i)*qq(1,i) is being added twice */
673:     for (k=0;k<deg1;k++) ans2 += (*sp++)*(*sq++);  /* add pp(k,i)*qq(k,i) */
674:     ans += ans2*intervalWeights[i];  /* compute ans += ans2*intervalWeights(i) */
675:   }
676:   PetscFunctionReturn(ans*PETSC_PI/2.0);
677: }

679: /*
680:    FILTLAN function PiecewisePolynomialInChebyshevBasisMultiplyX

682:    compute Q(z) = z*P(z), where P(z) and Q(z) are piecewise polynomials expanded
683:    in a basis of `translated' (i.e. scale-and-shift) Chebyshev polynomials for each interval

685:    P(z) and Q(z) are stored as matrices of Chebyshev coefficients pp and qq, respectively

687:    For z in the j-th interval, P(z) equals
688:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(n,j)*S_{n-1}(z),
689:    and Q(z) equals
690:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(n,j)*S_{n-1}(z),
691:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
692:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
693:    with T_i(z) the Chebyshev polynomial of the first kind
694:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

696:    the returned matrix is qq which represents Q(z) = z*P(z)
697: */
698: PETSC_STATIC_INLINE PetscErrorCode FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(PetscReal *pp,PetscInt deg1,PetscInt ldp,PetscReal *intv,PetscInt nintv,PetscReal *qq,PetscInt ldq)
699: {
700:   PetscInt  i,j;
701:   PetscReal c,h,h2,tmp,*sp,*sq,*sp2,*sq2;

704:   for (j=0;j<nintv;j++) {    /* consider interval between intv(j) and intv(j+1) */
705:     sp = pp+j*ldp;
706:     sq = qq+j*ldq;
707:     sp2 = sp;
708:     sq2 = sq+1;
709:     c = 0.5*(intv[j] + intv[j+1]);   /* compute c = (intv(j) + intv(j+1))/2 */
710:     h = 0.5*(intv[j+1] - intv[j]);   /* compute h = (intv(j+1) - intv(j))/2  and  h2 = h/2 */
711:     h2 = 0.5*h;
712:     i = deg1;
713:     while (i--) *sq++ = c*(*sp++);    /* compute q(1:deg1,j) = c*p(1:deg1,j) */
714:     *sq++ = 0.0;                      /* set q(deg1+1,j) = 0.0 */
715:     *(sq2++) += h*(*sp2++);           /* compute q(2,j) = q(2,j) + h*p(1,j) */
716:     i = deg1-1;
717:     while (i--) {       /* compute q(3:deg1+1,j) += h2*p(2:deg1,j) and then q(1:deg1-1,j) += h2*p(2:deg1,j) */
718:       tmp = h2*(*sp2++);
719:       *(sq2-2) += tmp;
720:       *(sq2++) += tmp;
721:     }
722:   }
723:   return(0);
724: }

726: /* ////////////////////////////////////////////////////////////////////////////
727:    //    Conjugate Residual Method in the Polynomial Space
728:    //////////////////////////////////////////////////////////////////////////// */

730: /*
731:     B := alpha*A + beta*B

733:     A,B are nxk
734: */
735: PETSC_STATIC_INLINE PetscErrorCode Mat_AXPY_BLAS(PetscInt n,PetscInt k,PetscReal alpha,const PetscReal *A,PetscInt lda,PetscReal beta,PetscReal *B,PetscInt ldb)
736: {
738:   PetscInt       i,j;

741:   if (beta==(PetscReal)1.0) {
742:     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] += alpha*A[i+j*lda];
743:     PetscLogFlops(2.0*n*k);
744:   } else {
745:     for (j=0;j<k;j++) for (i=0;i<n;i++) B[i+j*ldb] = alpha*A[i+j*lda] + beta*B[i+j*ldb];
746:     PetscLogFlops(3.0*n*k);
747:   }
748:   return(0);
749: }

751: /*
752:    FILTLAN function FilteredConjugateResidualPolynomial

754:    ** Conjugate Residual Method in the Polynomial Space

756:    this routine employs a conjugate-residual-type algorithm in polynomial space to minimize ||P(z)-Q(z)||_w,
757:    where P(z), the base filter, is the input piecewise polynomial, and
758:          Q(z) is the output polynomial satisfying Q(0)==1, i.e. the constant term of Q(z) is 1
759:    niter is the number of conjugate-residual iterations; therefore, the degree of Q(z) is up to niter+1
760:    both P(z) and Q(z) are expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
761:    and presented as matrices of Chebyshev coefficients, denoted by pp and qq, respectively

763:    input:
764:    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
765:    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
766:        the interval weights define the inner product of two continuous functions and then
767:        the derived w-norm ||P(z)-Q(z)||_w
768:    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
769:    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
770:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
771:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
772:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
773:    with T_i(z) the Chebyshev polynomial of the first kind,
774:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2

776:    output:
777:    the return matrix, denoted by qq, represents a polynomial Q(z) with degree up to 1+niter
778:    and satisfying Q(0)==1, such that ||P(z))-Q(z)||_w is minimized
779:    this polynomial Q(z) is expanded in the `translated' Chebyshev basis for each interval
780:    to be precise, considering z in [intv(j), intv(j+1)), Q(z) equals
781:        Q_j(z) = qq(1,j)*S_0(z) + qq(2,j)*S_1(z) + ... + qq(niter+2,j)*S_{niter+1}(z)

783:    note:
784:    1. since Q(0)==1, P(0)==1 is expected; if P(0)!=1, one can translate P(z)
785:       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
786:    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
787:       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
788: */
789: static PetscErrorCode FILTLAN_FilteredConjugateResidualPolynomial(PetscReal *cpol,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt m,PetscReal *intervalWeights,PetscInt niter)
790: {
792:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld,nintv;
793:   PetscReal      rho,rho0,rho1,den,bet,alp,alp0,*ppol,*rpol,*appol,*arpol;

796:   nintv = m-1;
797:   ld = niter+2;  /* leading dimension */
798:   PetscCalloc4(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&appol,ld*nintv,&arpol);
799:   PetscMemzero(cpol,ld*nintv*sizeof(PetscReal));
800:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
801:   sppol = 2;
802:   srpol = 2;
803:   scpol = 2;
804:   for (j=0;j<nintv;j++) {
805:     ppol[j*ld] = 1.0;
806:     rpol[j*ld] = 1.0;
807:     cpol[j*ld] = 1.0;
808:   }
809:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
810:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
811:      cpol is the "corrected" residual polynomial (result of this function) */
812:   sappol = 3;
813:   sarpol = 3;
814:   FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
815:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
816:   rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
817:   for (i=0;i<niter;i++) {
818:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
819:     alp0 = rho/den;
820:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
821:     alp  = (rho-rho1)/den;
822:     srpol++;
823:     scpol++;
824:     Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
825:     Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
826:     if (i+1 == niter) break;
827:     sarpol++;
828:     FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
829:     rho0 = rho;
830:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
831:     bet  = rho / rho0;
832:     sppol++;
833:     sappol++;
834:     Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
835:     Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);
836:   }
837:   PetscFree4(ppol,rpol,appol,arpol);
838:   return(0);
839: }

841: /*
842:    FILTLAN function FilteredConjugateResidualMatrixPolynomialVectorProduct

844:    this routine employs a conjugate-residual-type algorithm in polynomial space to compute
845:    x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized, where
846:    P(z) is a given piecewise polynomial, called the base filter,
847:    s(z) is a polynomial of degree up to niter, the number of conjugate-residual iterations,
848:    and b and x0 are given vectors

850:    note that P(z) is expanded in the `translated' (scale-and-shift) Chebyshev basis for each interval,
851:    and presented as a matrix of Chebyshev coefficients pp

853:    input:
854:    A is a sparse matrix
855:    x0, b are vectors
856:    niter is the number of conjugate-residual iterations
857:    intv is a vector which defines the intervals; the j-th interval is [intv(j),intv(j+1))
858:    w is a vector of Chebyshev weights; the weight of j-th interval is w(j)
859:        the interval weights define the inner product of two continuous functions and then
860:        the derived w-norm ||P(z)-Q(z)||_w
861:    pp is a matrix of Chebyshev coefficients which defines the piecewise polynomial P(z)
862:    to be specific, for z in [intv(j), intv(j+1)), P(z) equals
863:        P_j(z) = pp(1,j)*S_0(z) + pp(2,j)*S_1(z) + ... + pp(niter+2,j)*S_{niter+1}(z),
864:    where S_i(z) is the `translated' Chebyshev polynomial in that interval,
865:        S_i((z-c)/h) = T_i(z),  c = (intv(j)+intv(j+1))) / 2,  h = (intv(j+1)-intv(j)) / 2,
866:    with T_i(z) the Chebyshev polynomial of the first kind,
867:        T_0(z) = 1, T_1(z) = z, and T_i(z) = 2*z*T_{i-1}(z) - T_{i-2}(z) for i>=2
868:    tol is the tolerance; if the residual polynomial in z-norm is dropped by a factor lower
869:        than tol, then stop the conjugate-residual iteration

871:    output:
872:    the return vector is x = x0 + s(A)*r0 with r0 = b - A*x0, such that ||(1-P(z))-z*s(z)||_w is minimized,
873:    subject to that s(z) is a polynomial of degree up to niter, where P(z) is the base filter
874:    in short, z*s(z) approximates 1-P(z)

876:    note:
877:    1. since z*s(z) approximates 1-P(z), P(0)==1 is expected; if P(0)!=1, one can translate P(z)
878:       for example, if P(0)==0, one can use 1-P(z) as input instead of P(z)
879:    2. typically, the base filter, defined by pp and intv, is from Hermite interpolation
880:       in intervals [intv(j),intv(j+1)) for j=1,...,nintv, with nintv the number of intervals
881:    3. a common application is to compute R(A)*b, where R(z) approximates 1-P(z)
882:       in this case, one can set x0 = 0 and then the return vector is x = s(A)*b, where
883:       z*s(z) approximates 1-P(z); therefore, A*x is the wanted R(A)*b
884: */
885: static PetscErrorCode FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(Mat A,Vec b,Vec x,PetscReal *baseFilter,PetscInt nbase,PetscReal *intv,PetscInt nintv,PetscReal *intervalWeights,PetscInt niter,Vec *work)
886: {
888:   PetscInt       i,j,srpol,scpol,sarpol,sppol,sappol,ld;
889:   PetscReal      rho,rho0,rho00,rho1,den,bet,alp,alp0,*cpol,*ppol,*rpol,*appol,*arpol,tol=0.0;
890:   Vec            r=work[0],p=work[1],ap=work[2],w=work[3];
891:   PetscScalar    alpha;

894:   ld = niter+3;  /* leading dimension */
895:   PetscCalloc5(ld*nintv,&ppol,ld*nintv,&rpol,ld*nintv,&cpol,ld*nintv,&appol,ld*nintv,&arpol);
896:   /* initialize polynomial ppol to be 1 (i.e. multiplicative identity) in all intervals */
897:   sppol = 2;
898:   srpol = 2;
899:   scpol = 2;
900:   for (j=0;j<nintv;j++) {
901:     ppol[j*ld] = 1.0;
902:     rpol[j*ld] = 1.0;
903:     cpol[j*ld] = 1.0;
904:   }
905:   /* ppol is the initial p-polynomial (corresponding to the A-conjugate vector p in CG)
906:      rpol is the r-polynomial (corresponding to the residual vector r in CG)
907:      cpol is the "corrected" residual polynomial */
908:   sappol = 3;
909:   sarpol = 3;
910:   FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(ppol,sppol,ld,intv,nintv,appol,ld);
911:   for (i=0;i<3;i++) for (j=0;j<nintv;j++) arpol[i+j*ld] = appol[i+j*ld];
912:   rho00 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);
913:   rho = rho00;

915:   /* corrected CR in vector space */
916:   /* we assume x0 is always 0 */
917:   VecSet(x,0.0);
918:   VecCopy(b,r);     /* initial residual r = b-A*x0 */
919:   VecCopy(r,p);     /* p = r */
920:   MatMult(A,p,ap);  /* ap = A*p */

922:   for (i=0;i<niter;i++) {
923:     /* iteration in the polynomial space */
924:     den = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(appol,sappol,nintv,ld,appol,sappol,nintv,ld,intervalWeights);
925:     alp0 = rho/den;
926:     rho1 = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(baseFilter,nbase,nintv,nbase,appol,sappol,nintv,ld,intervalWeights);
927:     alp  = (rho-rho1)/den;
928:     srpol++;
929:     scpol++;
930:     Mat_AXPY_BLAS(srpol,nintv,-alp0,appol,ld,1.0,rpol,ld);
931:     Mat_AXPY_BLAS(scpol,nintv,-alp,appol,ld,1.0,cpol,ld);
932:     sarpol++;
933:     FILTLAN_PiecewisePolynomialInChebyshevBasisMultiplyX(rpol,srpol,ld,intv,nintv,arpol,ld);
934:     rho0 = rho;
935:     rho = FILTLAN_PiecewisePolynomialInnerProductInChebyshevBasis(rpol,srpol,nintv,ld,arpol,sarpol,nintv,ld,intervalWeights);

937:     /* update x in the vector space */
938:     alpha = alp;
939:     VecAXPY(x,alpha,p);   /* x += alp*p */
940:     if (rho < tol*rho00) break;

942:     /* finish the iteration in the polynomial space */
943:     bet = rho / rho0;
944:     sppol++;
945:     sappol++;
946:     Mat_AXPY_BLAS(sppol,nintv,1.0,rpol,ld,bet,ppol,ld);
947:     Mat_AXPY_BLAS(sappol,nintv,1.0,arpol,ld,bet,appol,ld);

949:     /* finish the iteration in the vector space */
950:     alpha = -alp0;
951:     VecAXPY(r,alpha,ap);    /* r -= alp0*ap */
952:     alpha = bet;
953:     VecAYPX(p,alpha,r);     /* p = r + bet*p */
954:     MatMult(A,r,w);         /* ap = A*r + bet*ap */
955:     VecAYPX(ap,alpha,w);
956:   }
957:   PetscFree5(ppol,rpol,cpol,appol,arpol);
958:   return(0);
959: }

961: /*
962:    Gateway to FILTLAN for evaluating y=p(A)*x
963: */
964: PetscErrorCode STFilter_FILTLAN_Apply(ST st,Vec x,Vec y)
965: {
967:   ST_FILTER      *ctx = (ST_FILTER*)st->data;
968:   PetscInt       npoints;

971:   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
972:   FILTLAN_FilteredConjugateResidualMatrixPolynomialVectorProduct(st->T[0],x,y,ctx->baseFilter,2*ctx->baseDegree+2,ctx->intervals,npoints-1,ctx->opts->intervalWeights,ctx->polyDegree,st->work);
973:   VecCopy(y,st->work[0]);
974:   MatMult(st->T[0],st->work[0],y);
975:   return(0);
976: }

978: /*
979:    FILTLAN function PolynomialFilterInterface::setFilter

981:    Creates the shifted (and scaled) matrix and the base filter P(z)
982: */
983: PetscErrorCode STFilter_FILTLAN_setFilter(ST st)
984: {
986:   ST_FILTER      *ctx = (ST_FILTER*)st->data;
987:   PetscInt       i,npoints;
988:   PetscReal      frame2[4];
989:   PetscScalar    alpha;
990:   const PetscInt HighLowFlags[5] = { 1, -1, 0, -1, 1 };

993:   MatDestroy(&st->T[0]);
994:   if (ctx->frame[0] == ctx->frame[1]) {  /* low pass filter, convert it to high pass filter */
995:     /* T = frame[3]*eye(n) - A */
996:     MatDuplicate(st->A[0],MAT_COPY_VALUES,&st->T[0]);
997:     MatScale(st->T[0],-1.0);
998:     alpha = ctx->frame[3];
999:     MatShift(st->T[0],alpha);
1000:     for (i=0;i<4;i++) frame2[i] = ctx->frame[3] - ctx->frame[3-i];
1001:     FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1002:     /* translate the intervals back */
1003:     for (i=0;i<4;i++) ctx->intervals2[i] = ctx->frame[3] - ctx->intervals[3-i];
1004:   } else {  /* it can be a mid-pass filter or a high-pass filter */
1005:       if (ctx->frame[0] == 0.0) {
1006:       PetscObjectReference((PetscObject)st->A[0]);
1007:       st->T[0] = st->A[0];
1008:       FILTLAN_GetIntervals(ctx->intervals,ctx->frame,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1009:       for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i];
1010:     } else {
1011:       /* T = A - frame[0]*eye(n) */
1012:       MatDuplicate(st->A[0],MAT_COPY_VALUES,&st->T[0]);
1013:       alpha = -ctx->frame[0];
1014:       MatShift(st->T[0],alpha);
1015:       for (i=0;i<4;i++) frame2[i] = ctx->frame[i] - ctx->frame[0];
1016:       FILTLAN_GetIntervals(ctx->intervals,frame2,ctx->polyDegree,ctx->baseDegree,ctx->opts,ctx->filterInfo);
1017:       /* translate the intervals back */
1018:       for (i=0;i<6;i++) ctx->intervals2[i] = ctx->intervals[i] + ctx->frame[0];
1019:     }
1020:   }
1021:   npoints = (ctx->filterInfo->filterType == 2)? 6: 4;
1022:   PetscFree(ctx->baseFilter);
1023:   PetscMalloc1((2*ctx->baseDegree+2)*(npoints-1),&ctx->baseFilter);
1024:   FILTLAN_HermiteBaseFilterInChebyshevBasis(ctx->baseFilter,ctx->intervals,npoints,HighLowFlags,ctx->baseDegree);
1025:   PetscInfo1(st,"Computed value of yLimit = %g\n",ctx->filterInfo->yLimit);
1026:   return(0);
1027: }