Actual source code: invit.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */

 11: #include <slepc/private/dsimpl.h>
 12: #include <slepcblaslapack.h>

 14: struct HRtr
 15: {
 16:   PetscScalar *data;
 17:   PetscInt    m;
 18:   PetscInt    idx[2];
 19:   PetscInt    n[2];
 20:   PetscScalar tau[2];
 21:   PetscReal   alpha;
 22:   PetscReal   cs;
 23:   PetscReal   sn;
 24:   PetscInt    type;
 25: };

 27: /*
 28:   Generates a hyperbolic rotation
 29:     if x1*x1 - x2*x2 != 0
 30:       r = sqrt(|x1*x1 - x2*x2|)
 31:       c = x1/r  s = x2/r

 33:       | c -s||x1|   |d*r|
 34:       |-s  c||x2| = | 0 |
 35:       where d = 1 for type==1 and -1 for type==2
 36:   Returns the condition number of the reduction
 37: */
 38: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
 39: {
 40:   PetscReal t,n2,xa,xb;
 41:   PetscInt  type_;

 44:   if (x2==0.0) {
 45:     *r = PetscAbsReal(x1);
 46:     *c = (x1>=0)?1.0:-1.0;
 47:     *s = 0.0;
 48:     if (type) *type = 1;
 49:     return(0);
 50:   }
 51:   if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
 52:     /* hyperbolic rotation doesn't exist */
 53:     *c = 0.0;
 54:     *s = 0.0;
 55:     *r = 0.0;
 56:     if (type) *type = 0;
 57:     *cond = PETSC_MAX_REAL;
 58:     return(0);
 59:   }

 61:   if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
 62:     xa = x1; xb = x2; type_ = 1;
 63:   } else {
 64:     xa = x2; xb = x1; type_ = 2;
 65:   }
 66:   t = xb/xa;
 67:   n2 = PetscAbsReal(1 - t*t);
 68:   *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
 69:   *c = x1/(*r);
 70:   *s = x2/(*r);
 71:   if (type_ == 2) *r *= -1;
 72:   if (type) *type = type_;
 73:   if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
 74:   return(0);
 75: }

 77: /*
 78:                                 |c  s|
 79:   Applies an hyperbolic rotator |s  c|
 80:            |c  s|
 81:     [x1 x2]|s  c|
 82: */
 83: static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
 84: {
 85:   PetscInt    i;
 86:   PetscReal   t;
 87:   PetscScalar tmp;

 90:   if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
 91:     t = s/c;
 92:     for (i=0;i<n;i++) {
 93:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
 94:       x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
 95:     }
 96:   } else { /* Type II */
 97:     t = c/s;
 98:     for (i=0;i<n;i++) {
 99:       tmp = x1[i*inc1];
100:       x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
101:       x2[i*inc2] = t*x1[i*inc1] + tmp/s;
102:     }
103:   }
104:   return(0);
105: }

107: /*
108:   Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).

110:   Input:
111:     A symmetric (only lower triangular part is referred)
112:     s vector +1 and -1 (signature matrix)
113:   Output:
114:     d,e
115:     s
116:     Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
117: */
118: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
119: {
120: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
122:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
123: #else
125:   PetscInt       i,j,k,*ii,*jj,i0=0,ik=0,tmp,type;
126:   PetscInt       nwu=0;
127:   PetscReal      *ss,cond=1.0,cs,sn,r;
128:   PetscScalar    tau,t,*AA;
129:   PetscBLASInt   n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm;
130:   PetscBool      breakdown = PETSC_TRUE;

133:   if (n<3) {
134:     if (n==1) Q[0]=1;
135:     if (n==2) {
136:       Q[0] = Q[1+ldq] = 1;
137:       Q[1] = Q[ldq] = 0;
138:     }
139:     return(0);
140:   }
141:   PetscBLASIntCast(lda,&lda_);
142:   PetscBLASIntCast(n,&n_);
143:   PetscBLASIntCast(ldq,&ldq_);
144:   ss = rwork;
145:   perm = iwork;
146:   AA = work;
147:   for (i=0;i<n;i++) {
148:     PetscMemcpy(AA+i*n,A+i*lda,n*sizeof(PetscScalar));
149:   }
150:   nwu += n*n;
151:   k=0;
152:   while (breakdown && k<n) {
153:     breakdown = PETSC_FALSE;
154:     /* Classify (and flip) A and s according to sign */
155:     if (flip) {
156:       for (i=0;i<n;i++) {
157:         perm[i] = n-1-perm_[i];
158:         if (perm[i]==0) i0 = i;
159:         if (perm[i]==k) ik = i;
160:       }
161:     } else {
162:       for (i=0;i<n;i++) {
163:         perm[i] = perm_[i];
164:         if (perm[i]==0) i0 = i;
165:         if (perm[i]==k) ik = i;
166:       }
167:     }
168:     perm[ik] = 0;
169:     perm[i0] = k;
170:     i=1;
171:     while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
172:       if (s[perm[i]]!=s[perm[0]]) {
173:         j=i+1;
174:         while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
175:         tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
176:       }
177:       i++;
178:     }
179:     for (i=0;i<n;i++) {
180:       ss[i] = s[perm[i]];
181:     }
182:     if (flip) {
183:       ii = &j;
184:       jj = &i;
185:     } else {
186:       ii = &i;
187:       jj = &j;
188:     }
189:     for (i=0;i<n;i++)
190:       for (j=0;j<n;j++)
191:         A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
192:     /* Initialize Q */
193:     for (i=0;i<n;i++) {
194:       PetscMemzero(Q+i*ldq,n*sizeof(PetscScalar));
195:       Q[perm[i]+i*ldq] = 1.0;
196:     }
197:     for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
198:     n0 = ni-1;
199:     n1 = n_-ni;
200:     for (j=0;j<n-2;j++) {
201:       PetscBLASIntCast(n-j-1,&m);
202:       /* Forming and applying reflectors */
203:       if (n0 > 1) {
204:         PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
205:         /* Apply reflector */
206:         if (PetscAbsScalar(tau) != 0.0) {
207:           t=*(A+ni-n0+j*lda);  *(A+ni-n0+j*lda)=1.0;
208:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
209:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
210:           /* Update Q */
211:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
212:           *(A+ni-n0+j*lda) = t;
213:           for (i=1;i<n0;i++) {
214:             *(A+ni-n0+j*lda+i) = 0.0;  *(A+j+(ni-n0+i)*lda) = 0.0;
215:           }
216:           *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
217:         }
218:       }
219:       if (n1 > 1) {
220:         PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
221:         /* Apply reflector */
222:         if (PetscAbsScalar(tau) != 0.0) {
223:           t=*(A+n-n1+j*lda);  *(A+n-n1+j*lda)=1.0;
224:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
225:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
226:           /* Update Q */
227:           PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
228:           *(A+n-n1+j*lda) = t;
229:           for (i=1;i<n1;i++) {
230:             *(A+n-n1+i+j*lda) = 0.0;  *(A+j+(n-n1+i)*lda) = 0.0;
231:           }
232:           *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
233:         }
234:       }
235:       /* Hyperbolic rotation */
236:       if (n0 > 0 && n1 > 0) {
237:         HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
238:         /* Check condition number */
239:         if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
240:           breakdown = PETSC_TRUE;
241:           k++;
242:           if (k==n || flip) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
243:           break;
244:         }
245:         A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
246:         A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
247:         /* Apply to A */
248:         HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn);
249:         HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn);

251:         /* Update Q */
252:         HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn);
253:         if (type==2) {
254:           ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
255:           n0++;ni++;n1--;
256:         }
257:       }
258:       if (n0>0) n0--;
259:       else n1--;
260:     }
261:   }

263:   /* flip matrices */
264:   if (flip) {
265:     for (i=0;i<n-1;i++) {
266:       d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
267:       e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
268:       s[i] = ss[n-i-1];
269:     }
270:     s[n-1] = ss[0];
271:     d[n-1] = PetscRealPart(A[0]);
272:     for (i=0;i<n;i++) {
273:       ierr=PetscMemcpy(work+i*n,Q+i*ldq,n*sizeof(PetscScalar));
274:     }
275:     for (i=0;i<n;i++)
276:       for (j=0;j<n;j++)
277:         Q[i+j*ldq] = work[i+(n-j-1)*n];
278:   } else {
279:     for (i=0;i<n-1;i++) {
280:       d[i] = PetscRealPart(A[i+i*lda]);
281:       e[i] = PetscRealPart(A[i+1+i*lda]);
282:       s[i] = ss[i];
283:     }
284:     s[n-1] = ss[n-1];
285:     d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
286:   }
287:   return(0);
288: #endif
289: }

291: static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
292: {
293: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
295:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
296: #else
298:   PetscScalar    *x,*y;
299:   PetscReal      ncond2=1.0;
300:   PetscBLASInt   n0_,n1_,inc=1;

303:   /* Hyperbolic transformation to make zeros in x */
304:   x = tr1->data;
305:   tr1->n[0] = n0;
306:   tr1->n[1] = n1;
307:   tr1->idx[0] = idx0;
308:   tr1->idx[1] = idx1;
309:   PetscBLASIntCast(tr1->n[0],&n0_);
310:   PetscBLASIntCast(tr1->n[1],&n1_);
311:   if (tr1->n[0] > 1) {
312:     PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
313:   }
314:   if (tr1->n[1]> 1) {
315:     PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
316:   }
317:   if (tr1->idx[0]<tr1->idx[1]) {
318:     HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&(tr1->type),&(tr1->cs),&(tr1->sn),&(tr1->alpha),ncond);
319:   } else {
320:     tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
321:     *ncond = 1.0;
322:   }
323:   if (sz==2) {
324:     y = tr2->data;
325:     /* Apply first transformation to second column */
326:     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
327:       x[tr1->idx[0]] = 1.0;
328:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
329:     }
330:     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
331:       x[tr1->idx[1]] = 1.0;
332:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
333:     }
334:     if (tr1->idx[0]<tr1->idx[1]) {
335:       HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn);
336:     }
337:     tr2->n[0] = tr1->n[0];
338:     tr2->n[1] = tr1->n[1];
339:     tr2->idx[0] = tr1->idx[0];
340:     tr2->idx[1] = tr1->idx[1];
341:     if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
342:       tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
343:     }
344:     if (tr2->n[0]>0) {
345:       tr2->n[0]--; tr2->idx[0]++;
346:       if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
347:     } else {
348:       tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
349:     }
350:     /* Hyperbolic transformation to make zeros in y */
351:     PetscBLASIntCast(tr2->n[0],&n0_);
352:     PetscBLASIntCast(tr2->n[1],&n1_);
353:     if (tr2->n[0] > 1) {
354:       PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
355:     }
356:     if (tr2->n[1]> 1) {
357:       PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
358:     }
359:     if (tr2->idx[0]<tr2->idx[1]) {
360:       HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&(tr2->type),&(tr2->cs),&(tr2->sn),&(tr2->alpha),&ncond2);
361:     } else {
362:       tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
363:       ncond2 = 1.0;
364:     }
365:     if (ncond2>*ncond) *ncond = ncond2;
366:   }
367:   return(0);
368: #endif
369: }

371: /*
372:   Auxiliary function to try perform one iteration of hr routine,
373:   checking condition number. If it is < tolD, apply the
374:   transformation to H and R, if not, ok=false and it do nothing
375:   tolE, tolerance to exchange complex pairs to improve conditioning
376: */
377: static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
378: {
379: #if defined(SLEPC_MISSING_LAPACK_LARF)
381:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARF - Lapack routine is unavailable");
382: #else
384:   struct HRtr    *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
385:   PetscScalar    *x,*y;
386:   PetscReal      ncond,ncond_e;
387:   PetscInt       nwu=0,i,d=1;
388:   PetscBLASInt   n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
389:   PetscReal      tolD = 1e+5;

392:   if (cond) *cond = 1.0;
393:   PetscBLASIntCast(n,&n_);
394:   PetscBLASIntCast(ldr,&ldr_);
395:   PetscBLASIntCast(ldh,&ldh_);
396:   x = work+nwu;
397:   nwu += n;
398:   PetscMemcpy(x,R+j*ldr,n*sizeof(PetscScalar));
399:   *exg = PETSC_FALSE;
400:   *ok = PETSC_TRUE;
401:   tr1_t.data = x;
402:   if (sz==1) {
403:     /* Hyperbolic transformation to make zeros in x */
404:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu);
405:     /* Check condition number to single column*/
406:     if (ncond>tolD) *ok = PETSC_FALSE;
407:     tr1 = &tr1_t;
408:     tr2 = &tr2_t;
409:   } else {
410:     y = work+nwu;
411:     nwu += n;
412:     PetscMemcpy(y,R+(j+1)*ldr,n*sizeof(PetscScalar));
413:     tr2_t.data = y;
414:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu);
415:     /* Computing hyperbolic transformations also for exchanged vectors */
416:     tr1_te.data = work+nwu;
417:     nwu += n;
418:     PetscMemcpy(tr1_te.data,R+(j+1)*ldr,n*sizeof(PetscScalar));
419:     tr2_te.data = work+nwu;
420:     nwu += n;
421:     PetscMemcpy(tr2_te.data,R+j*ldr,n*sizeof(PetscScalar));
422:     MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu);
423:     if (ncond > d*ncond_e) {
424:       *exg = PETSC_TRUE;
425:       tr1 = &tr1_te;
426:       tr2 = &tr2_te;
427:       ncond = ncond_e;
428:     } else {
429:       tr1 = &tr1_t;
430:       tr2 = &tr2_t;
431:     }
432:     if (ncond>tolD) *ok = PETSC_FALSE;
433:   }
434:   if (*ok) {
435:     /* Everything is OK, apply transformations to R and H */
436:     /* First column */
437:     if (cond && *cond<ncond) *cond = ncond;
438:     x = tr1->data;
439:     PetscBLASIntCast(tr1->n[0],&n0_);
440:     PetscBLASIntCast(tr1->n[1],&n1_);
441:     PetscBLASIntCast(n-j-sz,&mr);
442:     if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
443:       x[tr1->idx[0]] = 1.0;
444:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
445:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
446:     }
447:     if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
448:       x[tr1->idx[1]] = 1.0;
449:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
450:       PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
451:     }
452:     if (tr1->idx[0]<tr1->idx[1]) {
453:       HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn);
454:       if (tr1->type==1) {
455:         HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn);
456:       } else {
457:         HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn);
458:         s[tr1->idx[0]] = -s[tr1->idx[0]];
459:         s[tr1->idx[1]] = -s[tr1->idx[1]];
460:       }
461:     }
462:     for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
463:     for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
464:     *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
465:     if (sz==2) {
466:       y = tr2->data;
467:       /* Second column */
468:       PetscBLASIntCast(tr2->n[0],&n0_);
469:       PetscBLASIntCast(tr2->n[1],&n1_);
470:       PetscBLASIntCast(n-j-sz,&mr);
471:       PetscBLASIntCast(n-tr2->idx[0],&mh);
472:       if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
473:         y[tr2->idx[0]] = 1.0;
474:         PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
475:         PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
476:       }
477:       if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
478:         y[tr2->idx[1]] = 1.0;
479:         PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
480:         PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
481:       }
482:       if (tr2->idx[0]<tr2->idx[1]) {
483:         HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn);
484:         if (tr2->type==1) {
485:           HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn);
486:         } else {
487:           HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn);
488:           s[tr2->idx[0]] = -s[tr2->idx[0]];
489:           s[tr2->idx[1]] = -s[tr2->idx[1]];
490:         }
491:       }
492:       for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
493:       *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
494:       for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
495:       *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
496:       *n0 = tr2->n[0];
497:       *n1 = tr2->n[1];
498:       *idx0 = tr2->idx[0];
499:       *idx1 = tr2->idx[1];
500:       if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
501:         (*idx1)++; (*n1)--; (*n0)++;
502:       }
503:     } else {
504:       *n0 = tr1->n[0];
505:       *n1 = tr1->n[1];
506:       *idx0 = tr1->idx[0];
507:       *idx1 = tr1->idx[1];
508:       if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
509:         (*idx1)++; (*n1)--; (*n0)++;
510:       }
511:     }
512:     if (*n0>0) {
513:       (*n0)--; (*idx0)++;
514:       if (*n1==0) *idx1 = *idx0;
515:     } else {
516:       (*n1)--; (*idx1)++; *idx0 = *idx1;
517:     }
518:   }
519:   return(0);
520: #endif
521: }

523: /*
524:   compute V = HR whit H s-orthogonal and R upper triangular
525: */
526: static PetscErrorCode PseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
527: {
529:   PetscInt       i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,t1,t2,nwu=0;
530:   PetscScalar    *col1,*col2;
531:   PetscBool      exg=PETSC_FALSE,ok=PETSC_FALSE;

534:   n = *nv;
535:   col1 = work+nwu;
536:   nwu += n;
537:   col2 = work+nwu;
538:   nwu += n;
539:   /* Sort R and s according to sing(s) */
540:   np = 0;
541:   for (i=0;i<n;i++) if (s[i]>0) np++;
542:   if (s[0]>0) n1 = np;
543:   else n1 = n-np;
544:   n0 = 0;
545:   for (i=0;i<n;i++) {
546:     if (s[i]==s[0]) {
547:       s[n0] = s[0];
548:       perm[n0++] = i;
549:     } else perm[n1++] = i;
550:   }
551:   for (i=n0;i<n;i++) s[i] = -s[0];
552:   n1 -= n0;
553:   idx0 = 0;
554:   idx1 = n0;
555:   if (idx1==n) idx1=idx0;
556:   for (i=0;i<n;i++) {
557:     for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
558:   }
559:   /* Initialize H */
560:   for (i=0;i<n;i++) {
561:     PetscMemzero(V+i*ldv,n*sizeof(PetscScalar));
562:     V[perm[i]+i*ldv] = 1.0;
563:   }
564:   for (i=0;i<n;i++) perm[i] = i;
565:   j = 0;
566:   while (j<n-k) {
567:     if (cmplxEig[j]==0) sz=1;
568:     else sz=2;
569:     TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu);
570:     if (ok) {
571:       if (exg) cmplxEig[j] = -cmplxEig[j];
572:       j = j+sz;
573:     } else { /* to be discarded */
574:       k = k+1;
575:       if (cmplxEig[j]==0) {
576:         if (j<n) {
577:           t1 = perm[j];
578:           for (i=j;i<n-1;i++) perm[i] = perm[i+1];
579:           perm[n-1] = t1;
580:           t1 = cmplxEig[j];
581:           for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
582:           cmplxEig[n-1] = t1;
583:           PetscMemcpy(col1,R+j*ldr,n*sizeof(PetscScalar));
584:           for (i=j;i<n-1;i++) {
585:             PetscMemcpy(R+i*ldr,R+(i+1)*ldr,n*sizeof(PetscScalar));
586:           }
587:           PetscMemcpy(R+(n-1)*ldr,col1,n*sizeof(PetscScalar));
588:         }
589:       } else {
590:         k = k+1;
591:         if (j<n-1) {
592:           t1 = perm[j];
593:           t2 = perm[j+1];
594:           for (i=j;i<n-2;i++) perm[i] = perm[i+2];
595:           perm[n-2] = t1;
596:           perm[n-1] = t2;
597:           t1 = cmplxEig[j];
598:           t2 = cmplxEig[j+1];
599:           for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
600:           cmplxEig[n-2] = t1;
601:           cmplxEig[n-1] = t2;
602:           PetscMemcpy(col1,R+j*ldr,n*sizeof(PetscScalar));
603:           PetscMemcpy(col2,R+(j+1)*ldr,n*sizeof(PetscScalar));
604:           for (i=j;i<n-2;i++) {
605:             PetscMemcpy(R+i*ldr,R+(i+2)*ldr,n*sizeof(PetscScalar));
606:           }
607:           PetscMemcpy(R+(n-2)*ldr,col1,n*sizeof(PetscScalar));
608:           PetscMemcpy(R+(n-1)*ldr,col2,n*sizeof(PetscScalar));
609:         }
610:       }
611:     }
612:   }
613:   if (k!=0) {
614:     if (breakdown) *breakdown = PETSC_TRUE;
615:     *nv = n-k;
616:   }
617:   return(0);
618: }

620: PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
621: {
623:   PetscInt       lws,nwus=0,nwui=0,lwi;
624:   PetscInt       off,n,nv,ld,i,ldr,l;
625:   PetscScalar    *W,*X,*R,*ts,zeroS=0.0,oneS=1.0;
626:   PetscReal      *s,vi,vr,tr,*d,*e;
627:   PetscBLASInt   ld_,n_,nv_,*perm,*cmplxEig;

630:   l = ds->l;
631:   n = ds->n-l;
632:   PetscBLASIntCast(n,&n_);
633:   ld = ds->ld;
634:   PetscBLASIntCast(ld,&ld_);
635:   off = l*ld+l;
636:   s = ds->rmat[DS_MAT_D];
637:   if (!ds->compact) {
638:     for (i=l;i<ds->n;i++) s[i] = PetscRealPart(*(ds->mat[DS_MAT_B]+i*ld+i));
639:   }
640:   lws = n*n+7*n;
641:   lwi = 2*n;
642:   DSAllocateWork_Private(ds,lws,0,lwi);
643:   R = ds->work+nwus;
644:   nwus += n*n;
645:   ldr = n;
646:   perm = ds->iwork + nwui;
647:   nwui += n;
648:   cmplxEig = ds->iwork+nwui;
649:   X = ds->mat[mat];
650:   for (i=0;i<n;i++) {
651: #if defined(PETSC_USE_COMPLEX)
652:     vi = PetscImaginaryPart(wr[l+i]);
653: #else
654:     vi = PetscRealPart(wi[l+i]);
655: #endif
656:     if (vi!=0) {
657:       cmplxEig[i] = 1;
658:       cmplxEig[i+1] = 2;
659:       i++;
660:     } else cmplxEig[i] = 0;
661:   }
662:   nv = n;

664:   /* Perform HR decomposition */
665:   /* Hyperbolic rotators */
666:   PseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus);
667:   /* Sort wr,wi perm */
668:   ts = ds->work+nwus;
669:   PetscMemcpy(ts,wr+l,n*sizeof(PetscScalar));
670:   for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
671: #if !defined(PETSC_USE_COMPLEX)
672:   PetscMemcpy(ts,wi+l,n*sizeof(PetscScalar));
673:   for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
674: #endif
675:   /* Projected Matrix */
676:   PetscMemzero(ds->rmat[DS_MAT_T]+2*ld,ld*sizeof(PetscReal));
677:   d = ds->rmat[DS_MAT_T];
678:   e = d+ld;
679:   for (i=0;i<nv;i++) {
680:     if (cmplxEig[i]==0) { /* Real */
681:       d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
682:       e[l+i] = 0.0;
683:     } else {
684:       vr = PetscRealPart(wr[l+i]);
685: #if defined(PETSC_USE_COMPLEX)
686:       vi = PetscImaginaryPart(wr[l+i]);
687: #else
688:       vi = PetscRealPart(wi[l+i]);
689: #endif
690:       if (cmplxEig[i]==-1) vi = -vi;
691:       tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
692:       d[l+i] = (vr-tr)*s[l+i];
693:       d[l+i+1] = (vr+tr)*s[l+i+1];
694:       e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
695:       e[l+i+1] = 0.0;
696:       i++;
697:     }
698:   }
699:   /* accumulate previous Q */
700:   if (accum) {
701:     PetscBLASIntCast(nv,&nv_);
702:     DSAllocateMat_Private(ds,DS_MAT_W);
703:     W = ds->mat[DS_MAT_W];
704:     DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
705:     PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&oneS,W+off,&ld_,X+off,&ld_,&zeroS,ds->mat[DS_MAT_Q]+off,&ld_));
706:   } else {
707:     PetscMemzero(ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
708:     for (i=0;i<ds->l;i++) *(ds->mat[DS_MAT_Q]+i+i*ld) = 1.0;
709:     for (i=0;i<n;i++) { PetscMemcpy(ds->mat[DS_MAT_Q]+off+i*ld,X+off+i*ld,n*sizeof(PetscScalar)); }
710:   }
711:   ds->t = nv+l;
712:   if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE); }
713:   return(0);
714: }

716: /*
717:    Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
718: */
719: PetscErrorCode DSIntermediate_GHIEP(DS ds)
720: {
722:   PetscInt       i,ld,off;
723:   PetscInt       nwall,nwallr,nwalli;
724:   PetscScalar    *A,*B,*Q;
725:   PetscReal      *d,*e,*s;

728:   ld = ds->ld;
729:   A = ds->mat[DS_MAT_A];
730:   B = ds->mat[DS_MAT_B];
731:   Q = ds->mat[DS_MAT_Q];
732:   d = ds->rmat[DS_MAT_T];
733:   e = ds->rmat[DS_MAT_T]+ld;
734:   s = ds->rmat[DS_MAT_D];
735:   off = ds->l+ds->l*ld;
736:   PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
737:   nwall = ld*ld+ld;
738:   nwallr = ld;
739:   nwalli = ld;
740:   DSAllocateWork_Private(ds,nwall,nwallr,nwalli);
741:   for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
742:   for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
743:   if (ds->compact) {
744:     if (ds->state < DS_STATE_INTERMEDIATE) {
745:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
746:       TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
747:       ds->k = ds->l;
748:       PetscMemzero(d+2*ld+ds->l,(ds->n-ds->l)*sizeof(PetscReal));
749:     }
750:   } else {
751:     if (ds->state < DS_STATE_INTERMEDIATE) {
752:       for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
753:       TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
754:       PetscMemzero(d+2*ld,(ds->n)*sizeof(PetscReal));
755:       ds->k = ds->l;
756:       DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
757:     } else {
758:       DSSwitchFormat_GHIEP(ds,PETSC_TRUE);
759:     }
760:   }
761:   return(0);
762: }