Actual source code: invit.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: struct HRtr
15: {
16: PetscScalar *data;
17: PetscInt m;
18: PetscInt idx[2];
19: PetscInt n[2];
20: PetscScalar tau[2];
21: PetscReal alpha;
22: PetscReal cs;
23: PetscReal sn;
24: PetscInt type;
25: };
27: /*
28: Generates a hyperbolic rotation
29: if x1*x1 - x2*x2 != 0
30: r = sqrt(|x1*x1 - x2*x2|)
31: c = x1/r s = x2/r
33: | c -s||x1| |d*r|
34: |-s c||x2| = | 0 |
35: where d = 1 for type==1 and -1 for type==2
36: Returns the condition number of the reduction
37: */
38: static PetscErrorCode HRGen(PetscReal x1,PetscReal x2,PetscInt *type,PetscReal *c,PetscReal *s,PetscReal *r,PetscReal *cond)
39: {
40: PetscReal t,n2,xa,xb;
41: PetscInt type_;
44: if (x2==0.0) {
45: *r = PetscAbsReal(x1);
46: *c = (x1>=0)?1.0:-1.0;
47: *s = 0.0;
48: if (type) *type = 1;
49: return(0);
50: }
51: if (PetscAbsReal(x1) == PetscAbsReal(x2)) {
52: /* hyperbolic rotation doesn't exist */
53: *c = 0.0;
54: *s = 0.0;
55: *r = 0.0;
56: if (type) *type = 0;
57: *cond = PETSC_MAX_REAL;
58: return(0);
59: }
61: if (PetscAbsReal(x1)>PetscAbsReal(x2)) {
62: xa = x1; xb = x2; type_ = 1;
63: } else {
64: xa = x2; xb = x1; type_ = 2;
65: }
66: t = xb/xa;
67: n2 = PetscAbsReal(1 - t*t);
68: *r = PetscSqrtReal(n2)*PetscAbsReal(xa);
69: *c = x1/(*r);
70: *s = x2/(*r);
71: if (type_ == 2) *r *= -1;
72: if (type) *type = type_;
73: if (cond) *cond = (PetscAbsReal(*c) + PetscAbsReal(*s))/PetscAbsReal(PetscAbsReal(*c) - PetscAbsReal(*s));
74: return(0);
75: }
77: /*
78: |c s|
79: Applies an hyperbolic rotator |s c|
80: |c s|
81: [x1 x2]|s c|
82: */
83: static PetscErrorCode HRApply(PetscInt n,PetscScalar *x1,PetscInt inc1,PetscScalar *x2,PetscInt inc2,PetscReal c,PetscReal s)
84: {
85: PetscInt i;
86: PetscReal t;
87: PetscScalar tmp;
90: if (PetscAbsReal(c)>PetscAbsReal(s)) { /* Type I */
91: t = s/c;
92: for (i=0;i<n;i++) {
93: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
94: x2[i*inc2] = t*x1[i*inc1] + x2[i*inc2]/c;
95: }
96: } else { /* Type II */
97: t = c/s;
98: for (i=0;i<n;i++) {
99: tmp = x1[i*inc1];
100: x1[i*inc1] = c*x1[i*inc1] + s*x2[i*inc2];
101: x2[i*inc2] = t*x1[i*inc1] + tmp/s;
102: }
103: }
104: return(0);
105: }
107: /*
108: Reduction to tridiagonal-diagonal form (see F. Tisseur, SIMAX 26(1), 2004).
110: Input:
111: A symmetric (only lower triangular part is referred)
112: s vector +1 and -1 (signature matrix)
113: Output:
114: d,e
115: s
116: Q s-orthogonal matrix with Q^T*A*Q = T (symmetric tridiagonal matrix)
117: */
118: static PetscErrorCode TridiagDiag_HHR(PetscInt n,PetscScalar *A,PetscInt lda,PetscReal *s,PetscScalar* Q,PetscInt ldq,PetscBool flip,PetscReal *d,PetscReal *e,PetscInt *perm_,PetscScalar *work,PetscReal *rwork,PetscBLASInt *iwork)
119: {
120: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
122: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
123: #else
125: PetscInt i,j,k,*ii,*jj,i0=0,ik=0,tmp,type;
126: PetscInt nwu=0;
127: PetscReal *ss,cond=1.0,cs,sn,r;
128: PetscScalar tau,t,*AA;
129: PetscBLASInt n0,n1,ni,inc=1,m,n_,lda_,ldq_,*perm;
130: PetscBool breakdown = PETSC_TRUE;
133: if (n<3) {
134: if (n==1) Q[0]=1;
135: if (n==2) {
136: Q[0] = Q[1+ldq] = 1;
137: Q[1] = Q[ldq] = 0;
138: }
139: return(0);
140: }
141: PetscBLASIntCast(lda,&lda_);
142: PetscBLASIntCast(n,&n_);
143: PetscBLASIntCast(ldq,&ldq_);
144: ss = rwork;
145: perm = iwork;
146: AA = work;
147: for (i=0;i<n;i++) {
148: PetscMemcpy(AA+i*n,A+i*lda,n*sizeof(PetscScalar));
149: }
150: nwu += n*n;
151: k=0;
152: while (breakdown && k<n) {
153: breakdown = PETSC_FALSE;
154: /* Classify (and flip) A and s according to sign */
155: if (flip) {
156: for (i=0;i<n;i++) {
157: perm[i] = n-1-perm_[i];
158: if (perm[i]==0) i0 = i;
159: if (perm[i]==k) ik = i;
160: }
161: } else {
162: for (i=0;i<n;i++) {
163: perm[i] = perm_[i];
164: if (perm[i]==0) i0 = i;
165: if (perm[i]==k) ik = i;
166: }
167: }
168: perm[ik] = 0;
169: perm[i0] = k;
170: i=1;
171: while (i<n-1 && s[perm[i-1]]==s[perm[0]]) {
172: if (s[perm[i]]!=s[perm[0]]) {
173: j=i+1;
174: while (j<n-1 && s[perm[j]]!=s[perm[0]])j++;
175: tmp = perm[i]; perm[i] = perm[j]; perm[j] = tmp;
176: }
177: i++;
178: }
179: for (i=0;i<n;i++) {
180: ss[i] = s[perm[i]];
181: }
182: if (flip) {
183: ii = &j;
184: jj = &i;
185: } else {
186: ii = &i;
187: jj = &j;
188: }
189: for (i=0;i<n;i++)
190: for (j=0;j<n;j++)
191: A[i+j*lda] = AA[perm[*ii]+perm[*jj]*n];
192: /* Initialize Q */
193: for (i=0;i<n;i++) {
194: PetscMemzero(Q+i*ldq,n*sizeof(PetscScalar));
195: Q[perm[i]+i*ldq] = 1.0;
196: }
197: for (ni=1;ni<n && ss[ni]==ss[0]; ni++);
198: n0 = ni-1;
199: n1 = n_-ni;
200: for (j=0;j<n-2;j++) {
201: PetscBLASIntCast(n-j-1,&m);
202: /* Forming and applying reflectors */
203: if (n0 > 1) {
204: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0,A+ni-n0+j*lda,A+ni-n0+j*lda+1,&inc,&tau));
205: /* Apply reflector */
206: if (PetscAbsScalar(tau) != 0.0) {
207: t=*(A+ni-n0+j*lda); *(A+ni-n0+j*lda)=1.0;
208: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n0,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
209: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0,&m,A+ni-n0+j*lda,&inc,&tau,A+j+1+(j+1)*lda,&lda_,work+nwu));
210: /* Update Q */
211: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0,A+ni-n0+j*lda,&inc,&tau,Q+(j+1)*ldq,&ldq_,work+nwu));
212: *(A+ni-n0+j*lda) = t;
213: for (i=1;i<n0;i++) {
214: *(A+ni-n0+j*lda+i) = 0.0; *(A+j+(ni-n0+i)*lda) = 0.0;
215: }
216: *(A+j+(ni-n0)*lda) = *(A+ni-n0+j*lda);
217: }
218: }
219: if (n1 > 1) {
220: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1,A+n-n1+j*lda,A+n-n1+j*lda+1,&inc,&tau));
221: /* Apply reflector */
222: if (PetscAbsScalar(tau) != 0.0) {
223: t=*(A+n-n1+j*lda); *(A+n-n1+j*lda)=1.0;
224: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&m,&n1,A+n-n1+j*lda,&inc,&tau,A+j+1+(n-n1)*lda,&lda_,work+nwu));
225: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1,&m,A+n-n1+j*lda,&inc,&tau,A+n-n1+(j+1)*lda,&lda_,work+nwu));
226: /* Update Q */
227: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1,A+n-n1+j*lda,&inc,&tau,Q+(n-n1)*ldq,&ldq_,work+nwu));
228: *(A+n-n1+j*lda) = t;
229: for (i=1;i<n1;i++) {
230: *(A+n-n1+i+j*lda) = 0.0; *(A+j+(n-n1+i)*lda) = 0.0;
231: }
232: *(A+j+(n-n1)*lda) = *(A+n-n1+j*lda);
233: }
234: }
235: /* Hyperbolic rotation */
236: if (n0 > 0 && n1 > 0) {
237: HRGen(PetscRealPart(A[ni-n0+j*lda]),PetscRealPart(A[n-n1+j*lda]),&type,&cs,&sn,&r,&cond);
238: /* Check condition number */
239: if (cond > 1.0/(10*PETSC_SQRT_MACHINE_EPSILON)) {
240: breakdown = PETSC_TRUE;
241: k++;
242: if (k==n || flip) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Breakdown in construction of hyperbolic transformation");
243: break;
244: }
245: A[ni-n0+j*lda] = r; A[n-n1+j*lda] = 0.0;
246: A[j+(ni-n0)*lda] = r; A[j+(n-n1)*lda] = 0.0;
247: /* Apply to A */
248: HRApply(m,A+j+1+(ni-n0)*lda,1,A+j+1+(n-n1)*lda,1,cs,-sn);
249: HRApply(m,A+ni-n0+(j+1)*lda,lda,A+n-n1+(j+1)*lda,lda,cs,-sn);
251: /* Update Q */
252: HRApply(n,Q+(ni-n0)*ldq,1,Q+(n-n1)*ldq,1,cs,-sn);
253: if (type==2) {
254: ss[ni-n0] = -ss[ni-n0]; ss[n-n1] = -ss[n-n1];
255: n0++;ni++;n1--;
256: }
257: }
258: if (n0>0) n0--;
259: else n1--;
260: }
261: }
263: /* flip matrices */
264: if (flip) {
265: for (i=0;i<n-1;i++) {
266: d[i] = PetscRealPart(A[n-i-1+(n-i-1)*lda]);
267: e[i] = PetscRealPart(A[n-i-1+(n-i-2)*lda]);
268: s[i] = ss[n-i-1];
269: }
270: s[n-1] = ss[0];
271: d[n-1] = PetscRealPart(A[0]);
272: for (i=0;i<n;i++) {
273: ierr=PetscMemcpy(work+i*n,Q+i*ldq,n*sizeof(PetscScalar));
274: }
275: for (i=0;i<n;i++)
276: for (j=0;j<n;j++)
277: Q[i+j*ldq] = work[i+(n-j-1)*n];
278: } else {
279: for (i=0;i<n-1;i++) {
280: d[i] = PetscRealPart(A[i+i*lda]);
281: e[i] = PetscRealPart(A[i+1+i*lda]);
282: s[i] = ss[i];
283: }
284: s[n-1] = ss[n-1];
285: d[n-1] = PetscRealPart(A[n-1 + (n-1)*lda]);
286: }
287: return(0);
288: #endif
289: }
291: static PetscErrorCode MadeHRtr(PetscInt sz,PetscInt n,PetscInt idx0,PetscInt n0,PetscInt idx1,PetscInt n1,struct HRtr *tr1,struct HRtr *tr2,PetscReal *ncond,PetscScalar *work)
292: {
293: #if defined(SLEPC_MISSING_LAPACK_LARFG) || defined(SLEPC_MISSING_LAPACK_LARF)
295: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARFG/LARF - Lapack routines are unavailable");
296: #else
298: PetscScalar *x,*y;
299: PetscReal ncond2=1.0;
300: PetscBLASInt n0_,n1_,inc=1;
303: /* Hyperbolic transformation to make zeros in x */
304: x = tr1->data;
305: tr1->n[0] = n0;
306: tr1->n[1] = n1;
307: tr1->idx[0] = idx0;
308: tr1->idx[1] = idx1;
309: PetscBLASIntCast(tr1->n[0],&n0_);
310: PetscBLASIntCast(tr1->n[1],&n1_);
311: if (tr1->n[0] > 1) {
312: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,x+tr1->idx[0],x+tr1->idx[0]+1,&inc,tr1->tau));
313: }
314: if (tr1->n[1]> 1) {
315: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,x+tr1->idx[1],x+tr1->idx[1]+1,&inc,tr1->tau+1));
316: }
317: if (tr1->idx[0]<tr1->idx[1]) {
318: HRGen(PetscRealPart(x[tr1->idx[0]]),PetscRealPart(x[tr1->idx[1]]),&(tr1->type),&(tr1->cs),&(tr1->sn),&(tr1->alpha),ncond);
319: } else {
320: tr1->alpha = PetscRealPart(x[tr1->idx[0]]);
321: *ncond = 1.0;
322: }
323: if (sz==2) {
324: y = tr2->data;
325: /* Apply first transformation to second column */
326: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
327: x[tr1->idx[0]] = 1.0;
328: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&inc,x+tr1->idx[0],&inc,tr1->tau,y+tr1->idx[0],&n0_,work));
329: }
330: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
331: x[tr1->idx[1]] = 1.0;
332: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&inc,x+tr1->idx[1],&inc,tr1->tau+1,y+tr1->idx[1],&n1_,work));
333: }
334: if (tr1->idx[0]<tr1->idx[1]) {
335: HRApply(1,y+tr1->idx[0],1,y+tr1->idx[1],1,tr1->cs,-tr1->sn);
336: }
337: tr2->n[0] = tr1->n[0];
338: tr2->n[1] = tr1->n[1];
339: tr2->idx[0] = tr1->idx[0];
340: tr2->idx[1] = tr1->idx[1];
341: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
342: tr2->idx[1]++; tr2->n[1]--; tr2->n[0]++;
343: }
344: if (tr2->n[0]>0) {
345: tr2->n[0]--; tr2->idx[0]++;
346: if (tr2->n[1]==0) tr2->idx[1] = tr2->idx[0];
347: } else {
348: tr2->n[1]--; tr2->idx[1]++; tr2->idx[0] = tr2->idx[1];
349: }
350: /* Hyperbolic transformation to make zeros in y */
351: PetscBLASIntCast(tr2->n[0],&n0_);
352: PetscBLASIntCast(tr2->n[1],&n1_);
353: if (tr2->n[0] > 1) {
354: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n0_,y+tr2->idx[0],y+tr2->idx[0]+1,&inc,tr2->tau));
355: }
356: if (tr2->n[1]> 1) {
357: PetscStackCallBLAS("LAPACKlarfg",LAPACKlarfg_(&n1_,y+tr2->idx[1],y+tr2->idx[1]+1,&inc,tr2->tau+1));
358: }
359: if (tr2->idx[0]<tr2->idx[1]) {
360: HRGen(PetscRealPart(y[tr2->idx[0]]),PetscRealPart(y[tr2->idx[1]]),&(tr2->type),&(tr2->cs),&(tr2->sn),&(tr2->alpha),&ncond2);
361: } else {
362: tr2->alpha = PetscRealPart(y[tr2->idx[0]]);
363: ncond2 = 1.0;
364: }
365: if (ncond2>*ncond) *ncond = ncond2;
366: }
367: return(0);
368: #endif
369: }
371: /*
372: Auxiliary function to try perform one iteration of hr routine,
373: checking condition number. If it is < tolD, apply the
374: transformation to H and R, if not, ok=false and it do nothing
375: tolE, tolerance to exchange complex pairs to improve conditioning
376: */
377: static PetscErrorCode TryHRIt(PetscInt n,PetscInt j,PetscInt sz,PetscScalar *H,PetscInt ldh,PetscScalar *R,PetscInt ldr,PetscReal *s,PetscBool *exg,PetscBool *ok,PetscInt *n0,PetscInt *n1,PetscInt *idx0,PetscInt *idx1,PetscReal *cond,PetscScalar *work)
378: {
379: #if defined(SLEPC_MISSING_LAPACK_LARF)
381: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LARF - Lapack routine is unavailable");
382: #else
384: struct HRtr *tr1,*tr2,tr1_t,tr2_t,tr1_te,tr2_te;
385: PetscScalar *x,*y;
386: PetscReal ncond,ncond_e;
387: PetscInt nwu=0,i,d=1;
388: PetscBLASInt n0_,n1_,inc=1,mh,mr,n_,ldr_,ldh_;
389: PetscReal tolD = 1e+5;
392: if (cond) *cond = 1.0;
393: PetscBLASIntCast(n,&n_);
394: PetscBLASIntCast(ldr,&ldr_);
395: PetscBLASIntCast(ldh,&ldh_);
396: x = work+nwu;
397: nwu += n;
398: PetscMemcpy(x,R+j*ldr,n*sizeof(PetscScalar));
399: *exg = PETSC_FALSE;
400: *ok = PETSC_TRUE;
401: tr1_t.data = x;
402: if (sz==1) {
403: /* Hyperbolic transformation to make zeros in x */
404: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,NULL,&ncond,work+nwu);
405: /* Check condition number to single column*/
406: if (ncond>tolD) *ok = PETSC_FALSE;
407: tr1 = &tr1_t;
408: tr2 = &tr2_t;
409: } else {
410: y = work+nwu;
411: nwu += n;
412: PetscMemcpy(y,R+(j+1)*ldr,n*sizeof(PetscScalar));
413: tr2_t.data = y;
414: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_t,&tr2_t,&ncond,work+nwu);
415: /* Computing hyperbolic transformations also for exchanged vectors */
416: tr1_te.data = work+nwu;
417: nwu += n;
418: PetscMemcpy(tr1_te.data,R+(j+1)*ldr,n*sizeof(PetscScalar));
419: tr2_te.data = work+nwu;
420: nwu += n;
421: PetscMemcpy(tr2_te.data,R+j*ldr,n*sizeof(PetscScalar));
422: MadeHRtr(sz,n,*idx0,*n0,*idx1,*n1,&tr1_te,&tr2_te,&ncond_e,work+nwu);
423: if (ncond > d*ncond_e) {
424: *exg = PETSC_TRUE;
425: tr1 = &tr1_te;
426: tr2 = &tr2_te;
427: ncond = ncond_e;
428: } else {
429: tr1 = &tr1_t;
430: tr2 = &tr2_t;
431: }
432: if (ncond>tolD) *ok = PETSC_FALSE;
433: }
434: if (*ok) {
435: /* Everything is OK, apply transformations to R and H */
436: /* First column */
437: if (cond && *cond<ncond) *cond = ncond;
438: x = tr1->data;
439: PetscBLASIntCast(tr1->n[0],&n0_);
440: PetscBLASIntCast(tr1->n[1],&n1_);
441: PetscBLASIntCast(n-j-sz,&mr);
442: if (tr1->n[0] > 1 && PetscAbsScalar(tr1->tau[0])!=0.0) {
443: x[tr1->idx[0]] = 1.0;
444: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,x+tr1->idx[0],&inc,tr1->tau,R+(j+sz)*ldr+tr1->idx[0],&ldr_,work+nwu));
445: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,x+tr1->idx[0],&inc,tr1->tau,H+(tr1->idx[0])*ldh,&ldh_,work+nwu));
446: }
447: if (tr1->n[1] > 1 && PetscAbsScalar(tr1->tau[1])!=0.0) {
448: x[tr1->idx[1]] = 1.0;
449: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,x+tr1->idx[1],&inc,tr1->tau+1,R+(j+sz)*ldr+tr1->idx[1],&ldr_,work+nwu));
450: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,x+tr1->idx[1],&inc,tr1->tau+1,H+(tr1->idx[1])*ldh,&ldh_,work+nwu));
451: }
452: if (tr1->idx[0]<tr1->idx[1]) {
453: HRApply(mr,R+(j+sz)*ldr+tr1->idx[0],ldr,R+(j+sz)*ldr+tr1->idx[1],ldr,tr1->cs,-tr1->sn);
454: if (tr1->type==1) {
455: HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,tr1->cs,tr1->sn);
456: } else {
457: HRApply(n,H+(tr1->idx[0])*ldh,1,H+(tr1->idx[1])*ldh,1,-tr1->cs,-tr1->sn);
458: s[tr1->idx[0]] = -s[tr1->idx[0]];
459: s[tr1->idx[1]] = -s[tr1->idx[1]];
460: }
461: }
462: for (i=0;i<tr1->idx[0];i++) *(R+j*ldr+i) = x[i];
463: for (i=tr1->idx[0]+1;i<n;i++) *(R+j*ldr+i) = 0.0;
464: *(R+j*ldr+tr1->idx[0]) = tr1->alpha;
465: if (sz==2) {
466: y = tr2->data;
467: /* Second column */
468: PetscBLASIntCast(tr2->n[0],&n0_);
469: PetscBLASIntCast(tr2->n[1],&n1_);
470: PetscBLASIntCast(n-j-sz,&mr);
471: PetscBLASIntCast(n-tr2->idx[0],&mh);
472: if (tr2->n[0] > 1 && PetscAbsScalar(tr2->tau[0])!=0.0) {
473: y[tr2->idx[0]] = 1.0;
474: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n0_,&mr,y+tr2->idx[0],&inc,tr2->tau,R+(j+2)*ldr+tr2->idx[0],&ldr_,work+nwu));
475: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n0_,y+tr2->idx[0],&inc,tr2->tau,H+(tr2->idx[0])*ldh,&ldh_,work+nwu));
476: }
477: if (tr2->n[1] > 1 && PetscAbsScalar(tr2->tau[1])!=0.0) {
478: y[tr2->idx[1]] = 1.0;
479: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("L",&n1_,&mr,y+tr2->idx[1],&inc,tr2->tau+1,R+(j+2)*ldr+tr2->idx[1],&ldr_,work+nwu));
480: PetscStackCallBLAS("LAPACKlarf",LAPACKlarf_("R",&n_,&n1_,y+tr2->idx[1],&inc,tr2->tau+1,H+(tr2->idx[1])*ldh,&ldh_,work+nwu));
481: }
482: if (tr2->idx[0]<tr2->idx[1]) {
483: HRApply(mr,R+(j+2)*ldr+tr2->idx[0],ldr,R+(j+2)*ldr+tr2->idx[1],ldr,tr2->cs,-tr2->sn);
484: if (tr2->type==1) {
485: HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,tr2->cs,tr2->sn);
486: } else {
487: HRApply(n,H+(tr2->idx[0])*ldh,1,H+(tr2->idx[1])*ldh,1,-tr2->cs,-tr2->sn);
488: s[tr2->idx[0]] = -s[tr2->idx[0]];
489: s[tr2->idx[1]] = -s[tr2->idx[1]];
490: }
491: }
492: for (i=0;i<tr2->idx[0]-1;i++) *(R+(j+1)*ldr+i) = y[i];
493: *(R+(j+1)*ldr+tr2->idx[0]-1) = y[tr2->idx[0]-1];
494: for (i=tr2->idx[0]+1;i<n;i++) *(R+(j+1)*ldr+i) = 0.0;
495: *(R+(j+1)*ldr+tr2->idx[0]) = tr2->alpha;
496: *n0 = tr2->n[0];
497: *n1 = tr2->n[1];
498: *idx0 = tr2->idx[0];
499: *idx1 = tr2->idx[1];
500: if (tr2->idx[0]<tr2->idx[1] && tr2->type==2) {
501: (*idx1)++; (*n1)--; (*n0)++;
502: }
503: } else {
504: *n0 = tr1->n[0];
505: *n1 = tr1->n[1];
506: *idx0 = tr1->idx[0];
507: *idx1 = tr1->idx[1];
508: if (tr1->idx[0]<tr1->idx[1] && tr1->type==2) {
509: (*idx1)++; (*n1)--; (*n0)++;
510: }
511: }
512: if (*n0>0) {
513: (*n0)--; (*idx0)++;
514: if (*n1==0) *idx1 = *idx0;
515: } else {
516: (*n1)--; (*idx1)++; *idx0 = *idx1;
517: }
518: }
519: return(0);
520: #endif
521: }
523: /*
524: compute V = HR whit H s-orthogonal and R upper triangular
525: */
526: static PetscErrorCode PseudoOrthog_HR(PetscInt *nv,PetscScalar *V,PetscInt ldv,PetscReal *s,PetscScalar *R,PetscInt ldr,PetscBLASInt *perm,PetscBLASInt *cmplxEig,PetscBool *breakdown,PetscScalar *work)
527: {
529: PetscInt i,j,n,n0,n1,np,idx0,idx1,sz=1,k=0,t1,t2,nwu=0;
530: PetscScalar *col1,*col2;
531: PetscBool exg=PETSC_FALSE,ok=PETSC_FALSE;
534: n = *nv;
535: col1 = work+nwu;
536: nwu += n;
537: col2 = work+nwu;
538: nwu += n;
539: /* Sort R and s according to sing(s) */
540: np = 0;
541: for (i=0;i<n;i++) if (s[i]>0) np++;
542: if (s[0]>0) n1 = np;
543: else n1 = n-np;
544: n0 = 0;
545: for (i=0;i<n;i++) {
546: if (s[i]==s[0]) {
547: s[n0] = s[0];
548: perm[n0++] = i;
549: } else perm[n1++] = i;
550: }
551: for (i=n0;i<n;i++) s[i] = -s[0];
552: n1 -= n0;
553: idx0 = 0;
554: idx1 = n0;
555: if (idx1==n) idx1=idx0;
556: for (i=0;i<n;i++) {
557: for (j=0;j<n;j++) R[j*ldr+i] = V[j*ldv+perm[i]];
558: }
559: /* Initialize H */
560: for (i=0;i<n;i++) {
561: PetscMemzero(V+i*ldv,n*sizeof(PetscScalar));
562: V[perm[i]+i*ldv] = 1.0;
563: }
564: for (i=0;i<n;i++) perm[i] = i;
565: j = 0;
566: while (j<n-k) {
567: if (cmplxEig[j]==0) sz=1;
568: else sz=2;
569: TryHRIt(n,j,sz,V,ldv,R,ldr,s,&exg,&ok,&n0,&n1,&idx0,&idx1,NULL,work+nwu);
570: if (ok) {
571: if (exg) cmplxEig[j] = -cmplxEig[j];
572: j = j+sz;
573: } else { /* to be discarded */
574: k = k+1;
575: if (cmplxEig[j]==0) {
576: if (j<n) {
577: t1 = perm[j];
578: for (i=j;i<n-1;i++) perm[i] = perm[i+1];
579: perm[n-1] = t1;
580: t1 = cmplxEig[j];
581: for (i=j;i<n-1;i++) cmplxEig[i] = cmplxEig[i+1];
582: cmplxEig[n-1] = t1;
583: PetscMemcpy(col1,R+j*ldr,n*sizeof(PetscScalar));
584: for (i=j;i<n-1;i++) {
585: PetscMemcpy(R+i*ldr,R+(i+1)*ldr,n*sizeof(PetscScalar));
586: }
587: PetscMemcpy(R+(n-1)*ldr,col1,n*sizeof(PetscScalar));
588: }
589: } else {
590: k = k+1;
591: if (j<n-1) {
592: t1 = perm[j];
593: t2 = perm[j+1];
594: for (i=j;i<n-2;i++) perm[i] = perm[i+2];
595: perm[n-2] = t1;
596: perm[n-1] = t2;
597: t1 = cmplxEig[j];
598: t2 = cmplxEig[j+1];
599: for (i=j;i<n-2;i++) cmplxEig[i] = cmplxEig[i+2];
600: cmplxEig[n-2] = t1;
601: cmplxEig[n-1] = t2;
602: PetscMemcpy(col1,R+j*ldr,n*sizeof(PetscScalar));
603: PetscMemcpy(col2,R+(j+1)*ldr,n*sizeof(PetscScalar));
604: for (i=j;i<n-2;i++) {
605: PetscMemcpy(R+i*ldr,R+(i+2)*ldr,n*sizeof(PetscScalar));
606: }
607: PetscMemcpy(R+(n-2)*ldr,col1,n*sizeof(PetscScalar));
608: PetscMemcpy(R+(n-1)*ldr,col2,n*sizeof(PetscScalar));
609: }
610: }
611: }
612: }
613: if (k!=0) {
614: if (breakdown) *breakdown = PETSC_TRUE;
615: *nv = n-k;
616: }
617: return(0);
618: }
620: PetscErrorCode DSGHIEPOrthogEigenv(DS ds,DSMatType mat,PetscScalar *wr,PetscScalar *wi,PetscBool accum)
621: {
623: PetscInt lws,nwus=0,nwui=0,lwi;
624: PetscInt off,n,nv,ld,i,ldr,l;
625: PetscScalar *W,*X,*R,*ts,zeroS=0.0,oneS=1.0;
626: PetscReal *s,vi,vr,tr,*d,*e;
627: PetscBLASInt ld_,n_,nv_,*perm,*cmplxEig;
630: l = ds->l;
631: n = ds->n-l;
632: PetscBLASIntCast(n,&n_);
633: ld = ds->ld;
634: PetscBLASIntCast(ld,&ld_);
635: off = l*ld+l;
636: s = ds->rmat[DS_MAT_D];
637: if (!ds->compact) {
638: for (i=l;i<ds->n;i++) s[i] = PetscRealPart(*(ds->mat[DS_MAT_B]+i*ld+i));
639: }
640: lws = n*n+7*n;
641: lwi = 2*n;
642: DSAllocateWork_Private(ds,lws,0,lwi);
643: R = ds->work+nwus;
644: nwus += n*n;
645: ldr = n;
646: perm = ds->iwork + nwui;
647: nwui += n;
648: cmplxEig = ds->iwork+nwui;
649: X = ds->mat[mat];
650: for (i=0;i<n;i++) {
651: #if defined(PETSC_USE_COMPLEX)
652: vi = PetscImaginaryPart(wr[l+i]);
653: #else
654: vi = PetscRealPart(wi[l+i]);
655: #endif
656: if (vi!=0) {
657: cmplxEig[i] = 1;
658: cmplxEig[i+1] = 2;
659: i++;
660: } else cmplxEig[i] = 0;
661: }
662: nv = n;
664: /* Perform HR decomposition */
665: /* Hyperbolic rotators */
666: PseudoOrthog_HR(&nv,X+off,ld,s+l,R,ldr,perm,cmplxEig,NULL,ds->work+nwus);
667: /* Sort wr,wi perm */
668: ts = ds->work+nwus;
669: PetscMemcpy(ts,wr+l,n*sizeof(PetscScalar));
670: for (i=0;i<n;i++) wr[i+l] = ts[perm[i]];
671: #if !defined(PETSC_USE_COMPLEX)
672: PetscMemcpy(ts,wi+l,n*sizeof(PetscScalar));
673: for (i=0;i<n;i++) wi[i+l] = ts[perm[i]];
674: #endif
675: /* Projected Matrix */
676: PetscMemzero(ds->rmat[DS_MAT_T]+2*ld,ld*sizeof(PetscReal));
677: d = ds->rmat[DS_MAT_T];
678: e = d+ld;
679: for (i=0;i<nv;i++) {
680: if (cmplxEig[i]==0) { /* Real */
681: d[l+i] = PetscRealPart(wr[l+i]*s[l+i]);
682: e[l+i] = 0.0;
683: } else {
684: vr = PetscRealPart(wr[l+i]);
685: #if defined(PETSC_USE_COMPLEX)
686: vi = PetscImaginaryPart(wr[l+i]);
687: #else
688: vi = PetscRealPart(wi[l+i]);
689: #endif
690: if (cmplxEig[i]==-1) vi = -vi;
691: tr = PetscRealPart((R[i+(i+1)*ldr]/R[i+i*ldr]))*vi;
692: d[l+i] = (vr-tr)*s[l+i];
693: d[l+i+1] = (vr+tr)*s[l+i+1];
694: e[l+i] = PetscRealPart(s[l+i]*(R[(i+1)+(i+1)*ldr]/R[i+i*ldr])*vi);
695: e[l+i+1] = 0.0;
696: i++;
697: }
698: }
699: /* accumulate previous Q */
700: if (accum) {
701: PetscBLASIntCast(nv,&nv_);
702: DSAllocateMat_Private(ds,DS_MAT_W);
703: W = ds->mat[DS_MAT_W];
704: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
705: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n_,&nv_,&n_,&oneS,W+off,&ld_,X+off,&ld_,&zeroS,ds->mat[DS_MAT_Q]+off,&ld_));
706: } else {
707: PetscMemzero(ds->mat[DS_MAT_Q],ld*ld*sizeof(PetscScalar));
708: for (i=0;i<ds->l;i++) *(ds->mat[DS_MAT_Q]+i+i*ld) = 1.0;
709: for (i=0;i<n;i++) { PetscMemcpy(ds->mat[DS_MAT_Q]+off+i*ld,X+off+i*ld,n*sizeof(PetscScalar)); }
710: }
711: ds->t = nv+l;
712: if (!ds->compact) { DSSwitchFormat_GHIEP(ds,PETSC_FALSE); }
713: return(0);
714: }
716: /*
717: Reduce to tridiagonal-diagonal pair by means of TridiagDiag_HHR.
718: */
719: PetscErrorCode DSIntermediate_GHIEP(DS ds)
720: {
722: PetscInt i,ld,off;
723: PetscInt nwall,nwallr,nwalli;
724: PetscScalar *A,*B,*Q;
725: PetscReal *d,*e,*s;
728: ld = ds->ld;
729: A = ds->mat[DS_MAT_A];
730: B = ds->mat[DS_MAT_B];
731: Q = ds->mat[DS_MAT_Q];
732: d = ds->rmat[DS_MAT_T];
733: e = ds->rmat[DS_MAT_T]+ld;
734: s = ds->rmat[DS_MAT_D];
735: off = ds->l+ds->l*ld;
736: PetscMemzero(Q,ld*ld*sizeof(PetscScalar));
737: nwall = ld*ld+ld;
738: nwallr = ld;
739: nwalli = ld;
740: DSAllocateWork_Private(ds,nwall,nwallr,nwalli);
741: for (i=0;i<ds->n;i++) Q[i+i*ld]=1.0;
742: for (i=0;i<ds->n-ds->l;i++) *(ds->perm+i)=i;
743: if (ds->compact) {
744: if (ds->state < DS_STATE_INTERMEDIATE) {
745: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
746: TridiagDiag_HHR(ds->k-ds->l+1,A+off,ld,s+ds->l,Q+off,ld,PETSC_TRUE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
747: ds->k = ds->l;
748: PetscMemzero(d+2*ld+ds->l,(ds->n-ds->l)*sizeof(PetscReal));
749: }
750: } else {
751: if (ds->state < DS_STATE_INTERMEDIATE) {
752: for (i=0;i<ds->n;i++) s[i] = PetscRealPart(B[i+i*ld]);
753: TridiagDiag_HHR(ds->n-ds->l,A+off,ld,s+ds->l,Q+off,ld,PETSC_FALSE,d+ds->l,e+ds->l,ds->perm,ds->work,ds->rwork,ds->iwork);
754: PetscMemzero(d+2*ld,(ds->n)*sizeof(PetscReal));
755: ds->k = ds->l;
756: DSSwitchFormat_GHIEP(ds,PETSC_FALSE);
757: } else {
758: DSSwitchFormat_GHIEP(ds,PETSC_TRUE);
759: }
760: }
761: return(0);
762: }