Actual source code: bvlapack.c

slepc-3.11.2 2019-07-30
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-2019, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    BV private kernels that use the LAPACK
 12: */

 14: #include <slepc/private/bvimpl.h>
 15: #include <slepcblaslapack.h>

 17: /*
 18:     Compute ||A|| for an mxn matrix
 19: */
 20: PetscErrorCode BVNorm_LAPACK_Private(BV bv,PetscInt m_,PetscInt n_,const PetscScalar *A,NormType type,PetscReal *nrm,PetscBool mpi)
 21: {
 23:   PetscBLASInt   m,n,i,j;
 24:   PetscMPIInt    len;
 25:   PetscReal      lnrm,*rwork=NULL,*rwork2=NULL;

 28:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
 29:   PetscBLASIntCast(m_,&m);
 30:   PetscBLASIntCast(n_,&n);
 31:   if (type==NORM_FROBENIUS || type==NORM_2) {
 32:     lnrm = LAPACKlange_("F",&m,&n,(PetscScalar*)A,&m,rwork);
 33:     if (mpi) {
 34:       lnrm = lnrm*lnrm;
 35:       MPI_Allreduce(&lnrm,nrm,1,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)bv));
 36:       *nrm = PetscSqrtReal(*nrm);
 37:     } else *nrm = lnrm;
 38:     PetscLogFlops(2.0*m*n);
 39:   } else if (type==NORM_1) {
 40:     if (mpi) {
 41:       BVAllocateWork_Private(bv,2*n_);
 42:       rwork = (PetscReal*)bv->work;
 43:       rwork2 = rwork+n_;
 44:       PetscMemzero(rwork,n_*sizeof(PetscReal));
 45:       PetscMemzero(rwork2,n_*sizeof(PetscReal));
 46:       for (j=0;j<n_;j++) {
 47:         for (i=0;i<m_;i++) {
 48:           rwork[j] += PetscAbsScalar(A[i+j*m_]);
 49:         }
 50:       }
 51:       PetscMPIIntCast(n_,&len);
 52:       MPI_Allreduce(rwork,rwork2,len,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)bv));
 53:       *nrm = 0.0;
 54:       for (j=0;j<n_;j++) if (rwork2[j] > *nrm) *nrm = rwork2[j];
 55:     } else {
 56:       *nrm = LAPACKlange_("O",&m,&n,(PetscScalar*)A,&m,rwork);
 57:     }
 58:     PetscLogFlops(1.0*m*n);
 59:   } else if (type==NORM_INFINITY) {
 60:     BVAllocateWork_Private(bv,m_);
 61:     rwork = (PetscReal*)bv->work;
 62:     lnrm = LAPACKlange_("I",&m,&n,(PetscScalar*)A,&m,rwork);
 63:     if (mpi) {
 64:       MPI_Allreduce(&lnrm,nrm,1,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)bv));
 65:     } else *nrm = lnrm;
 66:     PetscLogFlops(1.0*m*n);
 67:   }
 68:   PetscFPTrapPop();
 69:   return(0);
 70: }

 72: /*
 73:    Compute the upper Cholesky factor in R and its inverse in S.
 74:    If S == R then the inverse overwrites the Cholesky factor.
 75:  */
 76: PetscErrorCode BVMatCholInv_LAPACK_Private(BV bv,Mat R,Mat S)
 77: {
 78: #if defined(PETSC_MISSING_LAPACK_POTRF) || defined(SLEPC_MISSING_LAPACK_TRTRI)
 80:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"POTRF/TRTRI - Lapack routine is unavailable");
 81: #else
 83:   PetscInt       i,k,l,n,m,ld,lds;
 84:   PetscScalar    *pR,*pS;
 85:   PetscBLASInt   info,n_,m_,ld_,lds_;

 88:   l = bv->l;
 89:   k = bv->k;
 90:   MatGetSize(R,&m,NULL);
 91:   n = k-l;
 92:   PetscBLASIntCast(m,&m_);
 93:   PetscBLASIntCast(n,&n_);
 94:   ld  = m;
 95:   ld_ = m_;
 96:   MatDenseGetArray(R,&pR);

 98:   if (S==R) {
 99:     BVAllocateWork_Private(bv,m*k);
100:     pS = bv->work;
101:     lds = ld;
102:     lds_ = ld_;
103:   } else {
104:     MatDenseGetArray(S,&pS);
105:     MatGetSize(S,&lds,NULL);
106:     PetscBLASIntCast(lds,&lds_);
107:   }

109:   /* save a copy of matrix in S */
110:   for (i=l;i<k;i++) {
111:     PetscMemcpy(pS+i*lds+l,pR+i*ld+l,n*sizeof(PetscScalar));
112:   }

114:   /* compute upper Cholesky factor in R */
115:   PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n_,pR+l*ld+l,&ld_,&info));
116:   PetscLogFlops((1.0*n*n*n)/3.0);

118:   if (info) {  /* LAPACKpotrf failed, retry on diagonally perturbed matrix */
119:     for (i=l;i<k;i++) {
120:       PetscMemcpy(pR+i*ld+l,pS+i*lds+l,n*sizeof(PetscScalar));
121:       pR[i+i*ld] += 50.0*PETSC_MACHINE_EPSILON;
122:     }
123:     PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("U",&n_,pR+l*ld+l,&ld_,&info));
124:     SlepcCheckLapackInfo("potrf",info);
125:     PetscLogFlops((1.0*n*n*n)/3.0);
126:   }

128:   /* compute S = inv(R) */
129:   if (S==R) {
130:     PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pR+l*ld+l,&ld_,&info));
131:   } else {
132:     PetscMemzero(pS+l*lds,(k-l)*k*sizeof(PetscScalar));
133:     for (i=l;i<k;i++) {
134:       PetscMemcpy(pS+i*lds+l,pR+i*ld+l,n*sizeof(PetscScalar));
135:     }
136:     PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pS+l*lds+l,&lds_,&info));
137:   }
138:   SlepcCheckLapackInfo("trtri",info);
139:   PetscLogFlops(0.33*n*n*n);

141:   /* Zero out entries below the diagonal */
142:   for (i=l;i<k-1;i++) {
143:     PetscMemzero(pR+i*ld+i+1,(k-i-1)*sizeof(PetscScalar));
144:     if (S!=R) { PetscMemzero(pS+i*lds+i+1,(k-i-1)*sizeof(PetscScalar)); }
145:   }
146:   MatDenseRestoreArray(R,&pR);
147:   if (S!=R) { MatDenseRestoreArray(S,&pS); }
148:   return(0);
149: #endif
150: }

152: /*
153:    Compute the inverse of an upper triangular matrix R, store it in S.
154:    If S == R then the inverse overwrites R.
155:  */
156: PetscErrorCode BVMatTriInv_LAPACK_Private(BV bv,Mat R,Mat S)
157: {
158: #if defined(SLEPC_MISSING_LAPACK_TRTRI)
160:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"TRTRI - Lapack routine is unavailable");
161: #else
163:   PetscInt       i,k,l,n,m,ld,lds;
164:   PetscScalar    *pR,*pS;
165:   PetscBLASInt   info,n_,m_,ld_,lds_;

168:   l = bv->l;
169:   k = bv->k;
170:   MatGetSize(R,&m,NULL);
171:   n = k-l;
172:   PetscBLASIntCast(m,&m_);
173:   PetscBLASIntCast(n,&n_);
174:   ld  = m;
175:   ld_ = m_;
176:   MatDenseGetArray(R,&pR);

178:   if (S==R) {
179:     BVAllocateWork_Private(bv,m*k);
180:     pS = bv->work;
181:     lds = ld;
182:     lds_ = ld_;
183:   } else {
184:     MatDenseGetArray(S,&pS);
185:     MatGetSize(S,&lds,NULL);
186:     PetscBLASIntCast(lds,&lds_);
187:   }

189:   /* compute S = inv(R) */
190:   if (S==R) {
191:     PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pR+l*ld+l,&ld_,&info));
192:   } else {
193:     PetscMemzero(pS+l*lds,(k-l)*k*sizeof(PetscScalar));
194:     for (i=l;i<k;i++) {
195:       PetscMemcpy(pS+i*lds+l,pR+i*ld+l,n*sizeof(PetscScalar));
196:     }
197:     PetscStackCallBLAS("LAPACKtrtri",LAPACKtrtri_("U","N",&n_,pS+l*lds+l,&lds_,&info));
198:   }
199:   SlepcCheckLapackInfo("trtri",info);
200:   PetscLogFlops(0.33*n*n*n);

202:   MatDenseRestoreArray(R,&pR);
203:   if (S!=R) { MatDenseRestoreArray(S,&pS); }
204:   return(0);
205: #endif
206: }

208: /*
209:    Compute the matrix to be used for post-multiplying the basis in the SVQB
210:    block orthogonalization method.
211:    On input R = V'*V, on output S = D*U*Lambda^{-1/2} where (U,Lambda) is
212:    the eigendecomposition of D*R*D with D=diag(R)^{-1/2}.
213:    If S == R then the result overwrites R.
214:  */
215: PetscErrorCode BVMatSVQB_LAPACK_Private(BV bv,Mat R,Mat S)
216: {
217: #if defined(SLEPC_MISSING_LAPACK_SYEV)
219:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SYEV - Lapack routine is unavailable");
220: #else
222:   PetscInt       i,j,k,l,n,m,ld,lds;
223:   PetscScalar    *pR,*pS,*D,*work,a;
224:   PetscReal      *eig,dummy;
225:   PetscBLASInt   info,lwork,n_,m_,ld_,lds_;
226: #if defined(PETSC_USE_COMPLEX)
227:   PetscReal      *rwork,rdummy;
228: #endif

231:   l = bv->l;
232:   k = bv->k;
233:   MatGetSize(R,&m,NULL);
234:   n = k-l;
235:   PetscBLASIntCast(m,&m_);
236:   PetscBLASIntCast(n,&n_);
237:   ld  = m;
238:   ld_ = m_;
239:   MatDenseGetArray(R,&pR);

241:   if (S==R) {
242:     pS = pR;
243:     lds = ld;
244:     lds_ = ld_;
245:   } else {
246:     MatDenseGetArray(S,&pS);
247:     MatGetSize(S,&lds,NULL);
248:     PetscBLASIntCast(lds,&lds_);
249:   }

251:   /* workspace query and memory allocation */
252:   lwork = -1;
253: #if defined(PETSC_USE_COMPLEX)
254:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS,&lds_,&dummy,&a,&lwork,&rdummy,&info));
255:   PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
256:   PetscMalloc4(n,&eig,n,&D,lwork,&work,PetscMax(1,3*n-2),&rwork);
257: #else
258:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS,&lds_,&dummy,&a,&lwork,&info));
259:   PetscBLASIntCast((PetscInt)PetscRealPart(a),&lwork);
260:   PetscMalloc3(n,&eig,n,&D,lwork,&work);
261: #endif

263:   /* copy and scale matrix */
264:   for (i=l;i<k;i++) D[i-l] = 1.0/PetscSqrtReal(PetscRealPart(pR[i+i*ld]));
265:   for (i=l;i<k;i++) for (j=l;j<k;j++) pS[i+j*lds] = pR[i+j*ld]*D[i-l];
266:   for (j=l;j<k;j++) for (i=l;i<k;i++) pS[i+j*lds] *= D[j-l];

268:   /* compute eigendecomposition */
269: #if defined(PETSC_USE_COMPLEX)
270:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS+l*lds+l,&lds_,eig,work,&lwork,rwork,&info));
271: #else
272:   PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","L",&n_,pS+l*lds+l,&lds_,eig,work,&lwork,&info));
273: #endif
274:   SlepcCheckLapackInfo("syev",info);

276:   if (S!=R) {   /* R = U' */
277:     for (i=l;i<k;i++) for (j=l;j<k;j++) pR[i+j*ld] = pS[j+i*lds];
278:   }

280:   /* compute S = D*U*Lambda^{-1/2} */
281:   for (i=l;i<k;i++) for (j=l;j<k;j++) pS[i+j*lds] *= D[i-l];
282:   for (j=l;j<k;j++) for (i=l;i<k;i++) pS[i+j*lds] /= PetscSqrtReal(eig[j-l]);

284:   if (S!=R) {   /* compute R = inv(S) = Lambda^{1/2}*U'/D */
285:     for (i=l;i<k;i++) for (j=l;j<k;j++) pR[i+j*ld] *= PetscSqrtReal(eig[i-l]);
286:     for (j=l;j<k;j++) for (i=l;i<k;i++) pR[i+j*ld] /= D[j-l];
287:   }

289: #if defined(PETSC_USE_COMPLEX)
290:   PetscFree4(eig,D,work,rwork);
291: #else
292:   PetscFree3(eig,D,work);
293: #endif
294:   PetscLogFlops(9.0*n*n*n);

296:   MatDenseRestoreArray(R,&pR);
297:   if (S!=R) { MatDenseRestoreArray(S,&pS); }
298:   return(0);
299: #endif
300: }

302: /*
303:     QR factorization of an mxn matrix via parallel TSQR
304: */
305: PetscErrorCode BVOrthogonalize_LAPACK_TSQR(BV bv,PetscInt m_,PetscInt n_,PetscScalar *Q,PetscScalar *R,PetscInt ldr)
306: {
307: #if defined(PETSC_MISSING_LAPACK_GEQRF) || defined(PETSC_MISSING_LAPACK_ORGQR)
309:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF/ORGQR - Lapack routines are unavailable");
310: #else
312:   PetscInt       level,plevel,nlevels,powtwo,lda,worklen;
313:   PetscBLASInt   m,n,i,j,k,l,s,nb,sz,lwork,info;
314:   PetscScalar    *tau,*work,*A=NULL,*QQ=NULL,*Qhalf,*C=NULL,one=1.0,zero=0.0;
315:   PetscMPIInt    rank,size,count,stride;
316:   MPI_Datatype   tmat;

319:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
320:   PetscBLASIntCast(m_,&m);
321:   PetscBLASIntCast(n_,&n);
322:   k  = PetscMin(m,n);
323:   nb = 16;
324:   lda = 2*n;
325:   MPI_Comm_size(PetscObjectComm((PetscObject)bv),&size);
326:   MPI_Comm_rank(PetscObjectComm((PetscObject)bv),&rank);
327:   nlevels = (PetscInt)PetscCeilReal(PetscLog2Real((PetscReal)size));
328:   powtwo  = PetscPowInt(2,(PetscInt)PetscFloorReal(PetscLog2Real((PetscReal)size)));
329:   worklen = n+n*nb;
330:   if (nlevels) worklen += n*lda+n*lda*nlevels+n*lda;
331:   BVAllocateWork_Private(bv,worklen);
332:   tau  = bv->work;
333:   work = bv->work+n;
334:   PetscBLASIntCast(n*nb,&lwork);
335:   if (nlevels) {
336:     A  = bv->work+n+n*nb;
337:     QQ = bv->work+n+n*nb+n*lda;
338:     C  = bv->work+n+n*nb+n*lda+n*lda*nlevels;
339:   }

341:   /* Compute QR */
342:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&m,&n,Q,&m,tau,work,&lwork,&info));
343:   SlepcCheckLapackInfo("geqrf",info);

345:   /* Extract R */
346:   if (R || nlevels) {
347:     for (j=0;j<n;j++) {
348:       for (i=0;i<=PetscMin(j,m-1);i++) {
349:         if (nlevels) A[i+j*lda] = Q[i+j*m];
350:         else R[i+j*ldr] = Q[i+j*m];
351:       }
352:       for (i=PetscMin(j,m-1)+1;i<n;i++) {
353:         if (nlevels) A[i+j*lda] = 0.0;
354:         else R[i+j*ldr] = 0.0;
355:       }
356:     }
357:   }

359:   /* Compute orthogonal matrix in Q */
360:   PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&m,&k,&k,Q,&m,tau,work,&lwork,&info));
361:   SlepcCheckLapackInfo("orgqr",info);

363:   if (nlevels) {

365:     PetscMPIIntCast(n,&count);
366:     PetscMPIIntCast(lda,&stride);
367:     PetscBLASIntCast(lda,&l);
368:     MPI_Type_vector(count,count,stride,MPIU_SCALAR,&tmat);
369:     MPI_Type_commit(&tmat);

371:     for (level=nlevels;level>=1;level--) {

373:       plevel = PetscPowInt(2,level);
374:       s = plevel*PetscFloorReal(rank/(PetscReal)plevel)+(rank+PetscPowInt(2,level-1))%plevel;

376:       /* Stack triangular matrices */
377:       if (rank<s && s<size) {  /* send top part, receive bottom part */
378:         MPI_Sendrecv(A,1,tmat,s,111,A+n,1,tmat,s,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
379:       } else if (s<size) {  /* copy top to bottom, receive top part */
380:         MPI_Sendrecv(A,1,tmat,rank,111,A+n,1,tmat,rank,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
381:         MPI_Sendrecv(A+n,1,tmat,s,111,A,1,tmat,s,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
382:       }
383:       if (level<nlevels && size!=powtwo) {  /* for cases when size is not a power of 2 */
384:         if (rank<size-powtwo) {  /* send bottom part */
385:           MPI_Send(A+n,1,tmat,rank+powtwo,111,PetscObjectComm((PetscObject)bv));
386:         } else if (rank>=powtwo) {  /* receive bottom part */
387:           MPI_Recv(A+n,1,tmat,rank-powtwo,111,PetscObjectComm((PetscObject)bv),MPI_STATUS_IGNORE);
388:         }
389:       }
390:       /* Compute QR and build orthogonal matrix */
391:       if (level<nlevels || (level==nlevels && s<size)) {
392:         PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&l,&n,A,&l,tau,work,&lwork,&info));
393:         SlepcCheckLapackInfo("geqrf",info);
394:         PetscMemcpy(QQ+(level-1)*n*lda,A,n*lda*sizeof(PetscScalar));
395:         PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&l,&n,&n,QQ+(level-1)*n*lda,&l,tau,work,&lwork,&info));
396:         SlepcCheckLapackInfo("orgqr",info);
397:         for (j=0;j<n;j++) {
398:           for (i=j+1;i<n;i++) A[i+j*lda] = 0.0;
399:         }
400:       } else if (level==nlevels) {  /* only one triangular matrix, set Q=I */
401:         PetscMemzero(QQ+(level-1)*n*lda,n*lda*sizeof(PetscScalar));
402:         for (j=0;j<n;j++) QQ[j+j*lda+(level-1)*n*lda] = 1.0;
403:       }
404:     }

406:     /* Extract R */
407:     if (R) {
408:       for (j=0;j<n;j++) {
409:         for (i=0;i<=j;i++) R[i+j*ldr] = A[i+j*lda];
410:         for (i=j+1;i<n;i++) R[i+j*ldr] = 0.0;
411:       }
412:     }

414:     /* Accumulate orthogonal matrices */
415:     for (level=1;level<=nlevels;level++) {
416:       plevel = PetscPowInt(2,level);
417:       s = plevel*PetscFloorReal(rank/(PetscReal)plevel)+(rank+PetscPowInt(2,level-1))%plevel;
418:       Qhalf = (rank<s)? QQ+(level-1)*n*lda: QQ+(level-1)*n*lda+n;
419:       if (level<nlevels) {
420:         PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&n,&one,QQ+level*n*lda,&l,Qhalf,&l,&zero,C,&l));
421:         PetscMemcpy(QQ+level*n*lda,C,n*lda*sizeof(PetscScalar));
422:       } else {
423:         for (i=0;i<m/l;i++) {
424:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&l,&n,&n,&one,Q+i*l,&m,Qhalf,&l,&zero,C,&l));
425:           for (j=0;j<n;j++) { PetscMemcpy(Q+i*l+j*m,C+j*l,l*sizeof(PetscScalar)); }
426:         }
427:         sz = m%l;
428:         if (sz) {
429:           PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&sz,&n,&n,&one,Q+(m/l)*l,&m,Qhalf,&l,&zero,C,&l));
430:           for (j=0;j<n;j++) { PetscMemcpy(Q+(m/l)*l+j*m,C+j*l,sz*sizeof(PetscScalar)); }
431:         }
432:       }
433:     }

435:     MPI_Type_free(&tmat);
436:   }

438:   PetscLogFlops(3.0*m*n*n);
439:   PetscFPTrapPop();
440:   return(0);
441: #endif
442: }

444: /*
445:     Reduction operation to compute [~,Rout]=qr([Rin1;Rin2]) in the TSQR algorithm;
446:     all matrices are upper triangular stored in packed format
447: */
448: SLEPC_EXTERN void SlepcGivensPacked(void *in,void *inout,PetscMPIInt *len,MPI_Datatype *datatype)
449: {
451:   PetscBLASInt   n,i,j,k,one=1;
452:   PetscMPIInt    tsize;
453:   PetscScalar    v,s,*R2=(PetscScalar*)in,*R1=(PetscScalar*)inout;
454:   PetscReal      c;

457:   MPI_Type_size(*datatype,&tsize);CHKERRABORT(PETSC_COMM_SELF,ierr);  /* we assume len=1 */
458:   tsize /= sizeof(PetscScalar);
459:   n = (-1+(PetscBLASInt)PetscSqrtReal(1+8*tsize))/2;
460:   for (j=0;j<n;j++) {
461:     for (i=0;i<=j;i++) {
462:       LAPACKlartg_(R1+(2*n-j-1)*j/2+j,R2+(2*n-i-1)*i/2+j,&c,&s,&v);
463:       R1[(2*n-j-1)*j/2+j] = v;
464:       k = n-j-1;
465:       if (k) BLASrot_(&k,R1+(2*n-j-1)*j/2+j+1,&one,R2+(2*n-i-1)*i/2+j+1,&one,&c,&s);
466:     }
467:   }
468:   PetscFunctionReturnVoid();
469: }

471: /*
472:     Computes the R factor of the QR factorization of an mxn matrix via parallel TSQR
473: */
474: PetscErrorCode BVOrthogonalize_LAPACK_TSQR_OnlyR(BV bv,PetscInt m_,PetscInt n_,PetscScalar *Q,PetscScalar *R,PetscInt ldr)
475: {
476: #if defined(PETSC_MISSING_LAPACK_GEQRF)
478:   SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"GEQRF - Lapack routine is unavailable");
479: #else
481:   PetscInt       worklen;
482:   PetscBLASInt   m,n,i,j,s,nb,lwork,info;
483:   PetscScalar    *tau,*work,*A=NULL,*R1=NULL,*R2=NULL;
484:   PetscMPIInt    size,count;
485:   MPI_Datatype   tmat;

488:   PetscFPTrapPush(PETSC_FP_TRAP_OFF);
489:   PetscBLASIntCast(m_,&m);
490:   PetscBLASIntCast(n_,&n);
491:   nb = 16;
492:   s  = n+n*(n-1)/2;  /* length of packed triangular matrix */
493:   MPI_Comm_size(PetscObjectComm((PetscObject)bv),&size);
494:   worklen = n+n*nb+2*s+m*n;
495:   BVAllocateWork_Private(bv,worklen);
496:   tau  = bv->work;
497:   work = bv->work+n;
498:   R1   = bv->work+n+n*nb;
499:   R2   = bv->work+n+n*nb+s;
500:   A    = bv->work+n+n*nb+2*s;
501:   PetscBLASIntCast(n*nb,&lwork);
502:   PetscMemcpy(A,Q,m*n*sizeof(PetscScalar));

504:   /* Compute QR */
505:   PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&m,&n,A,&m,tau,work,&lwork,&info));
506:   SlepcCheckLapackInfo("geqrf",info);

508:   if (size==1) {
509:     /* Extract R */
510:     for (j=0;j<n;j++) {
511:       for (i=0;i<=PetscMin(j,m-1);i++) R[i+j*ldr] = A[i+j*m];
512:       for (i=PetscMin(j,m-1)+1;i<n;i++) R[i+j*ldr] = 0.0;
513:     }
514:   } else {
515:     /* Use MPI reduction operation to obtain global R */
516:     PetscMPIIntCast(s,&count);
517:     MPI_Type_contiguous(count,MPIU_SCALAR,&tmat);
518:     MPI_Type_commit(&tmat);
519:     for (i=0;i<n;i++) {
520:       for (j=i;j<n;j++) R1[(2*n-i-1)*i/2+j] = (i<m)?A[i+j*m]:0.0;
521:     }
522:     MPI_Allreduce(R1,R2,1,tmat,MPIU_TSQR,PetscObjectComm((PetscObject)bv));
523:     for (i=0;i<n;i++) {
524:       for (j=0;j<i;j++) R[i+j*ldr] = 0.0;
525:       for (j=i;j<n;j++) R[i+j*ldr] = R2[(2*n-i-1)*i/2+j];
526:     }
527:     MPI_Type_free(&tmat);
528:   }

530:   PetscLogFlops(3.0*m*n*n);
531:   PetscFPTrapPop();
532:   return(0);
533: #endif
534: }