Actual source code: bvlapack.c
slepc-3.11.2 2019-07-30
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
10: /*
11: BV private kernels that use the LAPACK
12: */
14: #include <slepc/private/bvimpl.h>
15: #include <slepcblaslapack.h>
17: /*
18: Compute ||A|| for an mxn matrix
19: */
20: PetscErrorCode BVNorm_LAPACK_Private(BV bv,PetscInt m_,PetscInt n_,const PetscScalar *A,NormType type,PetscReal *nrm,PetscBool mpi)
21: {
23: PetscBLASInt m,n,i,j;
24: PetscMPIInt len;
25: PetscReal lnrm,*rwork=NULL,*rwork2=NULL;
28: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
29: PetscBLASIntCast(m_,&m);
30: PetscBLASIntCast(n_,&n);
31: if (type==NORM_FROBENIUS || type==NORM_2) {
32: lnrm = LAPACKlange_("F",&m,&n,(PetscScalar*)A,&m,rwork);
33: if (mpi) {
34: lnrm = lnrm*lnrm;
35: MPI_Allreduce(&lnrm,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)bv));
36: *nrm = PetscSqrtReal(*nrm);
37: } else *nrm = lnrm;
38: PetscLogFlops(2.0*m*n);
39: } else if (type==NORM_1) {
40: if (mpi) {
41: BVAllocateWork_Private(bv,2*n_);
42: rwork = (PetscReal*)bv->work;
43: rwork2 = rwork+n_;
44: PetscMemzero(rwork,n_*sizeof(PetscReal));
45: PetscMemzero(rwork2,n_*sizeof(PetscReal));
46: for (j=0;j<n_;j++) {
47: for (i=0;i<m_;i++) {
48: rwork[j] += PetscAbsScalar(A[i+j*m_]);
49: }
50: }
51: PetscMPIIntCast(n_,&len);
52: MPI_Allreduce(rwork,rwork2,len,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)bv));
53: *nrm = 0.0;
54: for (j=0;j<n_;j++) if (rwork2[j] > *nrm) *nrm = rwork2[j];
55: } else {
56: *nrm = LAPACKlange_("O",&m,&n,(PetscScalar*)A,&m,rwork);
57: }
58: PetscLogFlops(1.0*m*n);
59: } else if (type==NORM_INFINITY) {
60: BVAllocateWork_Private(bv,m_);
61: rwork = (PetscReal*)bv->work;
62: lnrm = LAPACKlange_("I",&m,&n,(PetscScalar*)A,&m,rwork);
63: if (mpi) {
64: MPI_Allreduce(&lnrm,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)bv));
65: } else *nrm = lnrm;
66: PetscLogFlops(1.0*m*n);
67: }
68: PetscFPTrapPop();
69: return(0);
70: }
72: /*
73: Compute the upper Cholesky factor in R and its inverse in S.
74: If S == R then the inverse overwrites the Cholesky factor.
75: */
76: PetscErrorCode BVMatCholInv_LAPACK_Private(BV bv,Mat R,Mat S)
77: {
78: #if defined(PETSC_MISSING_LAPACK_POTRF) || defined(SLEPC_MISSING_LAPACK_TRTRI)
80: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF/TRTRI - Lapack routine is unavailable");
81: #else
83: PetscInt i,k,l,n,m,ld,lds;
84: PetscScalar *pR,*pS;
85: PetscBLASInt info,n_,m_,ld_,lds_;
88: l = bv->l;
89: k = bv->k;
90: MatGetSize(R,&m,NULL);
91: n = k-l;
92: PetscBLASIntCast(m,&m_);
93: PetscBLASIntCast(n,&n_);
94: ld = m;
95: ld_ = m_;
96: MatDenseGetArray(R,&pR);
98: if (S==R) {
99: BVAllocateWork_Private(bv,m*k);
100: pS = bv->work;
101: lds = ld;
102: lds_ = ld_;
103: } else {
104: MatDenseGetArray(S,&pS);
105: MatGetSize(S,&lds,NULL);
106: PetscBLASIntCast(lds,&lds_);
107: }
109: /* save a copy of matrix in S */
110: for (i=l;i<k;i++) {
111: PetscMemcpy(pS+i*lds+l,pR+i*ld+l,n*sizeof(PetscScalar));
112: }
114: /* compute upper Cholesky factor in R */
115: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n_,pR+l*ld+l,&ld_,&info));
116: PetscLogFlops((1.0*n*n*n)/3.0);
118: if (info) { /* LAPACKpotrf failed, retry on diagonally perturbed matrix */
119: for (i=l;i<k;i++) {
120: PetscMemcpy(pR+i*ld+l,pS+i*lds+l,n*sizeof(PetscScalar));
121: pR[i+i*ld] += 50.0*PETSC_MACHINE_EPSILON;
122: }
123: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n_,pR+l*ld+l,&ld_,&info));
124: SlepcCheckLapackInfo("potrf",info);
125: PetscLogFlops((1.0*n*n*n)/3.0);
126: }
128: /* compute S = inv(R) */
129: if (S==R) {
130: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pR+l*ld+l,&ld_,&info));
131: } else {
132: PetscMemzero(pS+l*lds,(k-l)*k*sizeof(PetscScalar));
133: for (i=l;i<k;i++) {
134: PetscMemcpy(pS+i*lds+l,pR+i*ld+l,n*sizeof(PetscScalar));
135: }
136: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pS+l*lds+l,&lds_,&info));
137: }
138: SlepcCheckLapackInfo("trtri",info);
139: PetscLogFlops(0.33*n*n*n);
141: /* Zero out entries below the diagonal */
142: for (i=l;i<k-1;i++) {
143: PetscMemzero(pR+i*ld+i+1,(k-i-1)*sizeof(PetscScalar));
144: if (S!=R) { PetscMemzero(pS+i*lds+i+1,(k-i-1)*sizeof(PetscScalar)); }
145: }
146: MatDenseRestoreArray(R,&pR);
147: if (S!=R) { MatDenseRestoreArray(S,&pS); }
148: return(0);
149: #endif
150: }
152: /*
153: Compute the inverse of an upper triangular matrix R, store it in S.
154: If S == R then the inverse overwrites R.
155: */
156: PetscErrorCode BVMatTriInv_LAPACK_Private(BV bv,Mat R,Mat S)
157: {
158: #if defined(SLEPC_MISSING_LAPACK_TRTRI)
160: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRTRI - Lapack routine is unavailable");
161: #else
163: PetscInt i,k,l,n,m,ld,lds;
164: PetscScalar *pR,*pS;
165: PetscBLASInt info,n_,m_,ld_,lds_;
168: l = bv->l;
169: k = bv->k;
170: MatGetSize(R,&m,NULL);
171: n = k-l;
172: PetscBLASIntCast(m,&m_);
173: PetscBLASIntCast(n,&n_);
174: ld = m;
175: ld_ = m_;
176: MatDenseGetArray(R,&pR);
178: if (S==R) {
179: BVAllocateWork_Private(bv,m*k);
180: pS = bv->work;
181: lds = ld;
182: lds_ = ld_;
183: } else {
184: MatDenseGetArray(S,&pS);
185: MatGetSize(S,&lds,NULL);
186: PetscBLASIntCast(lds,&lds_);
187: }
189: /* compute S = inv(R) */
190: if (S==R) {
191: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pR+l*ld+l,&ld_,&info));
192: } else {
193: PetscMemzero(pS+l*lds,(k-l)*k*sizeof(PetscScalar));
194: for (i=l;i<k;i++) {
195: PetscMemcpy(pS+i*lds+l,pR+i*ld+l,n*sizeof(PetscScalar));
196: }
197: PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pS+l*lds+l,&lds_,&info));
198: }
199: SlepcCheckLapackInfo("trtri",info);
200: PetscLogFlops(0.33*n*n*n);
202: MatDenseRestoreArray(R,&pR);
203: if (S!=R) { MatDenseRestoreArray(S,&pS); }
204: return(0);
205: #endif
206: }
208: /*
209: Compute the matrix to be used for post-multiplying the basis in the SVQB
210: block orthogonalization method.
211: On input R = V'*V, on output S = D*U*Lambda^{-1/2} where (U,Lambda) is
212: the eigendecomposition of D*R*D with D=diag(R)^{-1/2}.
213: If S == R then the result overwrites R.
214: */
215: PetscErrorCode BVMatSVQB_LAPACK_Private(BV bv,Mat R,Mat S)
216: {
217: #if defined(SLEPC_MISSING_LAPACK_SYEV)
219: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYEV - Lapack routine is unavailable");
220: #else
222: PetscInt i,j,k,l,n,m,ld,lds;
223: PetscScalar *pR,*pS,*D,*work,a;
224: PetscReal *eig,dummy;
225: PetscBLASInt info,lwork,n_,m_,ld_,lds_;
226: #if defined(PETSC_USE_COMPLEX)
227: PetscReal *rwork,rdummy;
228: #endif
231: l = bv->l;
232: k = bv->k;
233: MatGetSize(R,&m,NULL);
234: n = k-l;
235: PetscBLASIntCast(m,&m_);
236: PetscBLASIntCast(n,&n_);
237: ld = m;
238: ld_ = m_;
239: MatDenseGetArray(R,&pR);
241: if (S==R) {
242: pS = pR;
243: lds = ld;
244: lds_ = ld_;
245: } else {
246: MatDenseGetArray(S,&pS);
247: MatGetSize(S,&lds,NULL);
248: PetscBLASIntCast(lds,&lds_);
249: }
251: /* workspace query and memory allocation */
252: lwork = -1;
253: #if defined(PETSC_USE_COMPLEX)
254: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS,&lds_,&dummy,&a,&lwork,&rdummy,&info));
255: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
256: PetscMalloc4(n,&eig,n,&D,lwork,&work,PetscMax(1,3*n-2),&rwork);
257: #else
258: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS,&lds_,&dummy,&a,&lwork,&info));
259: PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
260: PetscMalloc3(n,&eig,n,&D,lwork,&work);
261: #endif
263: /* copy and scale matrix */
264: for (i=l;i<k;i++) D[i-l] = 1.0/PetscSqrtReal(PetscRealPart(pR[i+i*ld]));
265: for (i=l;i<k;i++) for (j=l;j<k;j++) pS[i+j*lds] = pR[i+j*ld]*D[i-l];
266: for (j=l;j<k;j++) for (i=l;i<k;i++) pS[i+j*lds] *= D[j-l];
268: /* compute eigendecomposition */
269: #if defined(PETSC_USE_COMPLEX)
270: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS+l*lds+l,&lds_,eig,work,&lwork,rwork,&info));
271: #else
272: PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS+l*lds+l,&lds_,eig,work,&lwork,&info));
273: #endif
274: SlepcCheckLapackInfo("syev",info);
276: if (S!=R) { /* R = U' */
277: for (i=l;i<k;i++) for (j=l;j<k;j++) pR[i+j*ld] = pS[j+i*lds];
278: }
280: /* compute S = D*U*Lambda^{-1/2} */
281: for (i=l;i<k;i++) for (j=l;j<k;j++) pS[i+j*lds] *= D[i-l];
282: for (j=l;j<k;j++) for (i=l;i<k;i++) pS[i+j*lds] /= PetscSqrtReal(eig[j-l]);
284: if (S!=R) { /* compute R = inv(S) = Lambda^{1/2}*U'/D */
285: for (i=l;i<k;i++) for (j=l;j<k;j++) pR[i+j*ld] *= PetscSqrtReal(eig[i-l]);
286: for (j=l;j<k;j++) for (i=l;i<k;i++) pR[i+j*ld] /= D[j-l];
287: }
289: #if defined(PETSC_USE_COMPLEX)
290: PetscFree4(eig,D,work,rwork);
291: #else
292: PetscFree3(eig,D,work);
293: #endif
294: PetscLogFlops(9.0*n*n*n);
296: MatDenseRestoreArray(R,&pR);
297: if (S!=R) { MatDenseRestoreArray(S,&pS); }
298: return(0);
299: #endif
300: }
302: /*
303: QR factorization of an mxn matrix via parallel TSQR
304: */
305: PetscErrorCode BVOrthogonalize_LAPACK_TSQR(BV bv,PetscInt m_,PetscInt n_,PetscScalar *Q,PetscScalar *R,PetscInt ldr)
306: {
307: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
309: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
310: #else
312: PetscInt level,plevel,nlevels,powtwo,lda,worklen;
313: PetscBLASInt m,n,i,j,k,l,s,nb,sz,lwork,info;
314: PetscScalar *tau,*work,*A=NULL,*QQ=NULL,*Qhalf,*C=NULL,one=1.0,zero=0.0;
315: PetscMPIInt rank,size,count,stride;
316: MPI_Datatype tmat;
319: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
320: PetscBLASIntCast(m_,&m);
321: PetscBLASIntCast(n_,&n);
322: k = PetscMin(m,n);
323: nb = 16;
324: lda = 2*n;
325: MPI_Comm_size(PetscObjectComm((PetscObject)bv),&size);
326: MPI_Comm_rank(PetscObjectComm((PetscObject)bv),&rank);
327: nlevels = (PetscInt)PetscCeilReal(PetscLog2Real((PetscReal)size));
328: powtwo = PetscPowInt(2,(PetscInt)PetscFloorReal(PetscLog2Real((PetscReal)size)));
329: worklen = n+n*nb;
330: if (nlevels) worklen += n*lda+n*lda*nlevels+n*lda;
331: BVAllocateWork_Private(bv,worklen);
332: tau = bv->work;
333: work = bv->work+n;
334: PetscBLASIntCast(n*nb,&lwork);
335: if (nlevels) {
336: A = bv->work+n+n*nb;
337: QQ = bv->work+n+n*nb+n*lda;
338: C = bv->work+n+n*nb+n*lda+n*lda*nlevels;
339: }
341: /* Compute QR */
342: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&m,&n,Q,&m,tau,work,&lwork,&info));
343: SlepcCheckLapackInfo("geqrf",info);
345: /* Extract R */
346: if (R || nlevels) {
347: for (j=0;j<n;j++) {
348: for (i=0;i<=PetscMin(j,m-1);i++) {
349: if (nlevels) A[i+j*lda] = Q[i+j*m];
350: else R[i+j*ldr] = Q[i+j*m];
351: }
352: for (i=PetscMin(j,m-1)+1;i<n;i++) {
353: if (nlevels) A[i+j*lda] = 0.0;
354: else R[i+j*ldr] = 0.0;
355: }
356: }
357: }
359: /* Compute orthogonal matrix in Q */
360: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&m,&k,&k,Q,&m,tau,work,&lwork,&info));
361: SlepcCheckLapackInfo("orgqr",info);
363: if (nlevels) {
365: PetscMPIIntCast(n,&count);
366: PetscMPIIntCast(lda,&stride);
367: PetscBLASIntCast(lda,&l);
368: MPI_Type_vector(count,count,stride,MPIU_SCALAR,&tmat);
369: MPI_Type_commit(&tmat);
371: for (level=nlevels;level>=1;level--) {
373: plevel = PetscPowInt(2,level);
374: s = plevel*PetscFloorReal(rank/(PetscReal)plevel)+(rank+PetscPowInt(2,level-1))%plevel;
376: /* Stack triangular matrices */
377: if (rank<s && s<size) { /* send top part, receive bottom part */
378: MPI_Sendrecv(A,1,tmat,s,111,A+n,1,tmat,s,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
379: } else if (s<size) { /* copy top to bottom, receive top part */
380: MPI_Sendrecv(A,1,tmat,rank,111,A+n,1,tmat,rank,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
381: MPI_Sendrecv(A+n,1,tmat,s,111,A,1,tmat,s,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
382: }
383: if (level<nlevels && size!=powtwo) { /* for cases when size is not a power of 2 */
384: if (rank<size-powtwo) { /* send bottom part */
385: MPI_Send(A+n,1,tmat,rank+powtwo,111,PetscObjectComm((PetscObject)bv));
386: } else if (rank>=powtwo) { /* receive bottom part */
387: MPI_Recv(A+n,1,tmat,rank-powtwo,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
388: }
389: }
390: /* Compute QR and build orthogonal matrix */
391: if (level<nlevels || (level==nlevels && s<size)) {
392: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&l,&n,A,&l,tau,work,&lwork,&info));
393: SlepcCheckLapackInfo("geqrf",info);
394: PetscMemcpy(QQ+(level-1)*n*lda,A,n*lda*sizeof(PetscScalar));
395: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&l,&n,&n,QQ+(level-1)*n*lda,&l,tau,work,&lwork,&info));
396: SlepcCheckLapackInfo("orgqr",info);
397: for (j=0;j<n;j++) {
398: for (i=j+1;i<n;i++) A[i+j*lda] = 0.0;
399: }
400: } else if (level==nlevels) { /* only one triangular matrix, set Q=I */
401: PetscMemzero(QQ+(level-1)*n*lda,n*lda*sizeof(PetscScalar));
402: for (j=0;j<n;j++) QQ[j+j*lda+(level-1)*n*lda] = 1.0;
403: }
404: }
406: /* Extract R */
407: if (R) {
408: for (j=0;j<n;j++) {
409: for (i=0;i<=j;i++) R[i+j*ldr] = A[i+j*lda];
410: for (i=j+1;i<n;i++) R[i+j*ldr] = 0.0;
411: }
412: }
414: /* Accumulate orthogonal matrices */
415: for (level=1;level<=nlevels;level++) {
416: plevel = PetscPowInt(2,level);
417: s = plevel*PetscFloorReal(rank/(PetscReal)plevel)+(rank+PetscPowInt(2,level-1))%plevel;
418: Qhalf = (rank<s)? QQ+(level-1)*n*lda: QQ+(level-1)*n*lda+n;
419: if (level<nlevels) {
420: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&n,&one,QQ+level*n*lda,&l,Qhalf,&l,&zero,C,&l));
421: PetscMemcpy(QQ+level*n*lda,C,n*lda*sizeof(PetscScalar));
422: } else {
423: for (i=0;i<m/l;i++) {
424: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&n,&one,Q+i*l,&m,Qhalf,&l,&zero,C,&l));
425: for (j=0;j<n;j++) { PetscMemcpy(Q+i*l+j*m,C+j*l,l*sizeof(PetscScalar)); }
426: }
427: sz = m%l;
428: if (sz) {
429: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sz,&n,&n,&one,Q+(m/l)*l,&m,Qhalf,&l,&zero,C,&l));
430: for (j=0;j<n;j++) { PetscMemcpy(Q+(m/l)*l+j*m,C+j*l,sz*sizeof(PetscScalar)); }
431: }
432: }
433: }
435: MPI_Type_free(&tmat);
436: }
438: PetscLogFlops(3.0*m*n*n);
439: PetscFPTrapPop();
440: return(0);
441: #endif
442: }
444: /*
445: Reduction operation to compute [~,Rout]=qr([Rin1;Rin2]) in the TSQR algorithm;
446: all matrices are upper triangular stored in packed format
447: */
448: SLEPC_EXTERN void SlepcGivensPacked(void *in,void *inout,PetscMPIInt *len,MPI_Datatype *datatype)
449: {
451: PetscBLASInt n,i,j,k,one=1;
452: PetscMPIInt tsize;
453: PetscScalar v,s,*R2=(PetscScalar*)in,*R1=(PetscScalar*)inout;
454: PetscReal c;
457: MPI_Type_size(*datatype,&tsize);CHKERRABORT(PETSC_COMM_SELF,ierr); /* we assume len=1 */
458: tsize /= sizeof(PetscScalar);
459: n = (-1+(PetscBLASInt)PetscSqrtReal(1+8*tsize))/2;
460: for (j=0;j<n;j++) {
461: for (i=0;i<=j;i++) {
462: LAPACKlartg_(R1+(2*n-j-1)*j/2+j,R2+(2*n-i-1)*i/2+j,&c,&s,&v);
463: R1[(2*n-j-1)*j/2+j] = v;
464: k = n-j-1;
465: if (k) BLASrot_(&k,R1+(2*n-j-1)*j/2+j+1,&one,R2+(2*n-i-1)*i/2+j+1,&one,&c,&s);
466: }
467: }
468: PetscFunctionReturnVoid();
469: }
471: /*
472: Computes the R factor of the QR factorization of an mxn matrix via parallel TSQR
473: */
474: PetscErrorCode BVOrthogonalize_LAPACK_TSQR_OnlyR(BV bv,PetscInt m_,PetscInt n_,PetscScalar *Q,PetscScalar *R,PetscInt ldr)
475: {
476: #if defined(PETSC_MISSING_LAPACK_GEQRF)
478: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable");
479: #else
481: PetscInt worklen;
482: PetscBLASInt m,n,i,j,s,nb,lwork,info;
483: PetscScalar *tau,*work,*A=NULL,*R1=NULL,*R2=NULL;
484: PetscMPIInt size,count;
485: MPI_Datatype tmat;
488: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
489: PetscBLASIntCast(m_,&m);
490: PetscBLASIntCast(n_,&n);
491: nb = 16;
492: s = n+n*(n-1)/2; /* length of packed triangular matrix */
493: MPI_Comm_size(PetscObjectComm((PetscObject)bv),&size);
494: worklen = n+n*nb+2*s+m*n;
495: BVAllocateWork_Private(bv,worklen);
496: tau = bv->work;
497: work = bv->work+n;
498: R1 = bv->work+n+n*nb;
499: R2 = bv->work+n+n*nb+s;
500: A = bv->work+n+n*nb+2*s;
501: PetscBLASIntCast(n*nb,&lwork);
502: PetscMemcpy(A,Q,m*n*sizeof(PetscScalar));
504: /* Compute QR */
505: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&m,&n,A,&m,tau,work,&lwork,&info));
506: SlepcCheckLapackInfo("geqrf",info);
508: if (size==1) {
509: /* Extract R */
510: for (j=0;j<n;j++) {
511: for (i=0;i<=PetscMin(j,m-1);i++) R[i+j*ldr] = A[i+j*m];
512: for (i=PetscMin(j,m-1)+1;i<n;i++) R[i+j*ldr] = 0.0;
513: }
514: } else {
515: /* Use MPI reduction operation to obtain global R */
516: PetscMPIIntCast(s,&count);
517: MPI_Type_contiguous(count,MPIU_SCALAR,&tmat);
518: MPI_Type_commit(&tmat);
519: for (i=0;i<n;i++) {
520: for (j=i;j<n;j++) R1[(2*n-i-1)*i/2+j] = (i<m)?A[i+j*m]:0.0;
521: }
522: MPI_Allreduce(R1,R2,1,tmat,MPIU_TSQR,PetscObjectComm((PetscObject)bv));
523: for (i=0;i<n;i++) {
524: for (j=0;j<i;j++) R[i+j*ldr] = 0.0;
525: for (j=i;j<n;j++) R[i+j*ldr] = R2[(2*n-i-1)*i/2+j];
526: }
527: MPI_Type_free(&tmat);
528: }
530: PetscLogFlops(3.0*m*n*n);
531: PetscFPTrapPop();
532: return(0);
533: #endif
534: }