Actual source code: matscalapack.c

  1: #include <petsc/private/petscscalapack.h>

  3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
  4: "       AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
  5: "                 J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
  6: "                 G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
  7: "       TITLE = {Sca{LAPACK} Users' Guide},\n"
  8: "       PUBLISHER = {SIAM},\n"
  9: "       ADDRESS = {Philadelphia, PA},\n"
 10: "       YEAR = 1997\n"
 11: "}\n";
 12: static PetscBool ScaLAPACKCite = PETSC_FALSE;

 14: #define DEFAULT_BLOCKSIZE 64

 16: /*
 17:     The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
 18:   is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
 19: */
 20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;

 22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
 23: {
 24:   PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
 25:   MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
 26:   return 0;
 27: }

 29: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
 30: {
 31:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
 32:   PetscBool         iascii;
 33:   PetscViewerFormat format;
 34:   Mat               Adense;

 36:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
 37:   if (iascii) {
 38:     PetscViewerGetFormat(viewer,&format);
 39:     if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
 40:       PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
 41:       PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
 42:       PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
 43:       PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
 44:       return 0;
 45:     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
 46:       return 0;
 47:     }
 48:   }
 49:   /* convert to dense format and call MatView() */
 50:   MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
 51:   MatView(Adense,viewer);
 52:   MatDestroy(&Adense);
 53:   return 0;
 54: }

 56: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
 57: {
 58:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
 59:   PetscLogDouble isend[2],irecv[2];

 61:   info->block_size = 1.0;

 63:   isend[0] = a->lld*a->locc;     /* locally allocated */
 64:   isend[1] = a->locr*a->locc;    /* used submatrix */
 65:   if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
 66:     info->nz_allocated   = isend[0];
 67:     info->nz_used        = isend[1];
 68:   } else if (flag == MAT_GLOBAL_MAX) {
 69:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
 70:     info->nz_allocated   = irecv[0];
 71:     info->nz_used        = irecv[1];
 72:   } else if (flag == MAT_GLOBAL_SUM) {
 73:     MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
 74:     info->nz_allocated   = irecv[0];
 75:     info->nz_used        = irecv[1];
 76:   }

 78:   info->nz_unneeded       = 0;
 79:   info->assemblies        = A->num_ass;
 80:   info->mallocs           = 0;
 81:   info->memory            = ((PetscObject)A)->mem;
 82:   info->fill_ratio_given  = 0;
 83:   info->fill_ratio_needed = 0;
 84:   info->factor_mallocs    = 0;
 85:   return 0;
 86: }

 88: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
 89: {
 90:   switch (op) {
 91:     case MAT_NEW_NONZERO_LOCATIONS:
 92:     case MAT_NEW_NONZERO_LOCATION_ERR:
 93:     case MAT_NEW_NONZERO_ALLOCATION_ERR:
 94:     case MAT_SYMMETRIC:
 95:     case MAT_SORTED_FULL:
 96:     case MAT_HERMITIAN:
 97:       break;
 98:     default:
 99:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
100:   }
101:   return 0;
102: }

104: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
105: {
106:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
107:   PetscInt       i,j;
108:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

110:   for (i=0;i<nr;i++) {
111:     if (rows[i] < 0) continue;
112:     PetscBLASIntCast(rows[i]+1,&gridx);
113:     for (j=0;j<nc;j++) {
114:       if (cols[j] < 0) continue;
115:       PetscBLASIntCast(cols[j]+1,&gcidx);
116:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
117:       if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
118:         switch (imode) {
119:           case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
120:           case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
121:           default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
122:         }
123:       } else {
125:         A->assembled = PETSC_FALSE;
126:         MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
127:       }
128:     }
129:   }
130:   return 0;
131: }

133: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
134: {
135:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
136:   PetscScalar    *x2d,*y2d,alpha=1.0;
137:   const PetscInt *ranges;
138:   PetscBLASInt   xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;

140:   if (transpose) {

142:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
143:     PetscLayoutGetRanges(A->rmap,&ranges);
144:     PetscBLASIntCast(ranges[1],&mb);  /* x block size */
145:     xlld = PetscMax(1,A->rmap->n);
146:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
148:     PetscLayoutGetRanges(A->cmap,&ranges);
149:     PetscBLASIntCast(ranges[1],&nb);  /* y block size */
150:     ylld = 1;
151:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));

154:     /* allocate 2d vectors */
155:     lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
156:     lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
157:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
158:     xlld = PetscMax(1,lszx);

160:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
161:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
163:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));

166:     /* redistribute x as a column of a 2d matrix */
167:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

169:     /* redistribute y as a row of a 2d matrix */
170:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));

172:     /* call PBLAS subroutine */
173:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

175:     /* redistribute y from a row of a 2d matrix */
176:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));

178:   } else {   /* non-transpose */

180:     /* create ScaLAPACK descriptors for vectors (1d block distribution) */
181:     PetscLayoutGetRanges(A->cmap,&ranges);
182:     PetscBLASIntCast(ranges[1],&nb);  /* x block size */
183:     xlld = 1;
184:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
186:     PetscLayoutGetRanges(A->rmap,&ranges);
187:     PetscBLASIntCast(ranges[1],&mb);  /* y block size */
188:     ylld = PetscMax(1,A->rmap->n);
189:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));

192:     /* allocate 2d vectors */
193:     lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
194:     lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
195:     PetscMalloc2(lszx,&x2d,lszy,&y2d);
196:     ylld = PetscMax(1,lszy);

198:     /* create ScaLAPACK descriptors for vectors (2d block distribution) */
199:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
201:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));

204:     /* redistribute x as a row of a 2d matrix */
205:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));

207:     /* redistribute y as a column of a 2d matrix */
208:     if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));

210:     /* call PBLAS subroutine */
211:     PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));

213:     /* redistribute y from a column of a 2d matrix */
214:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));

216:   }
217:   PetscFree2(x2d,y2d);
218:   return 0;
219: }

221: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
222: {
223:   const PetscScalar *xarray;
224:   PetscScalar       *yarray;

226:   VecGetArrayRead(x,&xarray);
227:   VecGetArray(y,&yarray);
228:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
229:   VecRestoreArrayRead(x,&xarray);
230:   VecRestoreArray(y,&yarray);
231:   return 0;
232: }

234: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
235: {
236:   const PetscScalar *xarray;
237:   PetscScalar       *yarray;

239:   VecGetArrayRead(x,&xarray);
240:   VecGetArray(y,&yarray);
241:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
242:   VecRestoreArrayRead(x,&xarray);
243:   VecRestoreArray(y,&yarray);
244:   return 0;
245: }

247: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
248: {
249:   const PetscScalar *xarray;
250:   PetscScalar       *zarray;

252:   if (y != z) VecCopy(y,z);
253:   VecGetArrayRead(x,&xarray);
254:   VecGetArray(z,&zarray);
255:   MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
256:   VecRestoreArrayRead(x,&xarray);
257:   VecRestoreArray(z,&zarray);
258:   return 0;
259: }

261: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
262: {
263:   const PetscScalar *xarray;
264:   PetscScalar       *zarray;

266:   if (y != z) VecCopy(y,z);
267:   VecGetArrayRead(x,&xarray);
268:   VecGetArray(z,&zarray);
269:   MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
270:   VecRestoreArrayRead(x,&xarray);
271:   VecRestoreArray(z,&zarray);
272:   return 0;
273: }

275: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
276: {
277:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
278:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
279:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
280:   PetscScalar   sone=1.0,zero=0.0;
281:   PetscBLASInt  one=1;

283:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284:   C->assembled = PETSC_TRUE;
285:   return 0;
286: }

288: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289: {
290:   MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
291:   MatSetType(C,MATSCALAPACK);
292:   MatSetUp(C);
293:   C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
294:   return 0;
295: }

297: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
298: {
299:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
300:   Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
301:   Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
302:   PetscScalar   sone=1.0,zero=0.0;
303:   PetscBLASInt  one=1;

305:   PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
306:   C->assembled = PETSC_TRUE;
307:   return 0;
308: }

310: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
311: {
312:   MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
313:   MatSetType(C,MATSCALAPACK);
314:   MatSetUp(C);
315:   return 0;
316: }

318: /* --------------------------------------- */
319: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
320: {
321:   C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
322:   C->ops->productsymbolic = MatProductSymbolic_AB;
323:   return 0;
324: }

326: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
327: {
328:   C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
329:   C->ops->productsymbolic          = MatProductSymbolic_ABt;
330:   return 0;
331: }

333: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
334: {
335:   Mat_Product    *product = C->product;

337:   switch (product->type) {
338:     case MATPRODUCT_AB:
339:       MatProductSetFromOptions_ScaLAPACK_AB(C);
340:       break;
341:     case MATPRODUCT_ABt:
342:       MatProductSetFromOptions_ScaLAPACK_ABt(C);
343:       break;
344:     default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
345:   }
346:   return 0;
347: }
348: /* --------------------------------------- */

350: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
351: {
352:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
353:   PetscScalar       *darray,*d2d,v;
354:   const PetscInt    *ranges;
355:   PetscBLASInt      j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

357:   VecGetArray(D,&darray);

359:   if (A->rmap->N<=A->cmap->N) {   /* row version */

361:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
362:     PetscLayoutGetRanges(A->rmap,&ranges);
363:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
364:     dlld = PetscMax(1,A->rmap->n);
365:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

368:     /* allocate 2d vector */
369:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
370:     PetscCalloc1(lszd,&d2d);
371:     dlld = PetscMax(1,lszd);

373:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
374:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

377:     /* collect diagonal */
378:     for (j=1;j<=a->M;j++) {
379:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
380:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
381:     }

383:     /* redistribute d from a column of a 2d matrix */
384:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
385:     PetscFree(d2d);

387:   } else {   /* column version */

389:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
390:     PetscLayoutGetRanges(A->cmap,&ranges);
391:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
392:     dlld = 1;
393:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

396:     /* allocate 2d vector */
397:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
398:     PetscCalloc1(lszd,&d2d);

400:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
401:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

404:     /* collect diagonal */
405:     for (j=1;j<=a->N;j++) {
406:       PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
407:       PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
408:     }

410:     /* redistribute d from a row of a 2d matrix */
411:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
412:     PetscFree(d2d);
413:   }

415:   VecRestoreArray(D,&darray);
416:   VecAssemblyBegin(D);
417:   VecAssemblyEnd(D);
418:   return 0;
419: }

421: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
422: {
423:   Mat_ScaLAPACK     *a = (Mat_ScaLAPACK*)A->data;
424:   const PetscScalar *d;
425:   const PetscInt    *ranges;
426:   PetscScalar       *d2d;
427:   PetscBLASInt      i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;

429:   if (R) {
430:     VecGetArrayRead(R,(const PetscScalar **)&d);
431:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
432:     PetscLayoutGetRanges(A->cmap,&ranges);
433:     PetscBLASIntCast(ranges[1],&nb);  /* D block size */
434:     dlld = 1;
435:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));

438:     /* allocate 2d vector */
439:     lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
440:     PetscCalloc1(lszd,&d2d);

442:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
443:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));

446:     /* redistribute d to a row of a 2d matrix */
447:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));

449:     /* broadcast along process columns */
450:     if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
451:     else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);

453:     /* local scaling */
454:     for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];

456:     PetscFree(d2d);
457:     VecRestoreArrayRead(R,(const PetscScalar **)&d);
458:   }
459:   if (L) {
460:     VecGetArrayRead(L,(const PetscScalar **)&d);
461:     /* create ScaLAPACK descriptor for vector (1d block distribution) */
462:     PetscLayoutGetRanges(A->rmap,&ranges);
463:     PetscBLASIntCast(ranges[1],&mb);  /* D block size */
464:     dlld = PetscMax(1,A->rmap->n);
465:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));

468:     /* allocate 2d vector */
469:     lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
470:     PetscCalloc1(lszd,&d2d);
471:     dlld = PetscMax(1,lszd);

473:     /* create ScaLAPACK descriptor for vector (2d block distribution) */
474:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));

477:     /* redistribute d to a column of a 2d matrix */
478:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));

480:     /* broadcast along process rows */
481:     if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
482:     else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);

484:     /* local scaling */
485:     for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];

487:     PetscFree(d2d);
488:     VecRestoreArrayRead(L,(const PetscScalar **)&d);
489:   }
490:   return 0;
491: }

493: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
494: {
495:   *missing = PETSC_FALSE;
496:   return 0;
497: }

499: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
500: {
501:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
502:   PetscBLASInt  n,one=1;

504:   n = x->lld*x->locc;
505:   PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
506:   return 0;
507: }

509: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
510: {
511:   Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
512:   PetscBLASInt  i,n;
513:   PetscScalar   v;

515:   n = PetscMin(x->M,x->N);
516:   for (i=1;i<=n;i++) {
517:     PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
518:     v += alpha;
519:     PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
520:   }
521:   return 0;
522: }

524: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
525: {
526:   Mat_ScaLAPACK  *x = (Mat_ScaLAPACK*)X->data;
527:   Mat_ScaLAPACK  *y = (Mat_ScaLAPACK*)Y->data;
528:   PetscBLASInt   one=1;
529:   PetscScalar    beta=1.0;

531:   MatScaLAPACKCheckDistribution(Y,1,X,3);
532:   PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
533:   PetscObjectStateIncrease((PetscObject)Y);
534:   return 0;
535: }

537: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
538: {
539:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
540:   Mat_ScaLAPACK  *b = (Mat_ScaLAPACK*)B->data;

542:   PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
543:   PetscObjectStateIncrease((PetscObject)B);
544:   return 0;
545: }

547: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
548: {
549:   Mat            Bs;
550:   MPI_Comm       comm;
551:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data,*b;

553:   PetscObjectGetComm((PetscObject)A,&comm);
554:   MatCreate(comm,&Bs);
555:   MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
556:   MatSetType(Bs,MATSCALAPACK);
557:   b = (Mat_ScaLAPACK*)Bs->data;
558:   b->M    = a->M;
559:   b->N    = a->N;
560:   b->mb   = a->mb;
561:   b->nb   = a->nb;
562:   b->rsrc = a->rsrc;
563:   b->csrc = a->csrc;
564:   MatSetUp(Bs);
565:   *B = Bs;
566:   if (op == MAT_COPY_VALUES) {
567:     PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
568:   }
569:   Bs->assembled = PETSC_TRUE;
570:   return 0;
571: }

573: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
574: {
575:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
576:   Mat            Bs = *B;
577:   PetscBLASInt   one=1;
578:   PetscScalar    sone=1.0,zero=0.0;
579: #if defined(PETSC_USE_COMPLEX)
580:   PetscInt       i,j;
581: #endif

583:   if (reuse == MAT_INITIAL_MATRIX) {
584:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
585:     *B = Bs;
586:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
587:   b = (Mat_ScaLAPACK*)Bs->data;
588:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
589: #if defined(PETSC_USE_COMPLEX)
590:   /* undo conjugation */
591:   for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
592: #endif
593:   Bs->assembled = PETSC_TRUE;
594:   return 0;
595: }

597: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
598: {
599:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
600:   PetscInt      i,j;

602:   for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
603:   return 0;
604: }

606: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
607: {
608:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data, *b;
609:   Mat            Bs = *B;
610:   PetscBLASInt   one=1;
611:   PetscScalar    sone=1.0,zero=0.0;

613:   if (reuse == MAT_INITIAL_MATRIX) {
614:     MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
615:     *B = Bs;
616:   } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
617:   b = (Mat_ScaLAPACK*)Bs->data;
618:   PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
619:   Bs->assembled = PETSC_TRUE;
620:   return 0;
621: }

623: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
624: {
625:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
626:   PetscScalar    *x,*x2d;
627:   const PetscInt *ranges;
628:   PetscBLASInt   xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;

630:   VecCopy(B,X);
631:   VecGetArray(X,&x);

633:   /* create ScaLAPACK descriptor for a vector (1d block distribution) */
634:   PetscLayoutGetRanges(A->rmap,&ranges);
635:   PetscBLASIntCast(ranges[1],&mb);  /* x block size */
636:   xlld = PetscMax(1,A->rmap->n);
637:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));

640:   /* allocate 2d vector */
641:   lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
642:   PetscMalloc1(lszx,&x2d);
643:   xlld = PetscMax(1,lszx);

645:   /* create ScaLAPACK descriptor for a vector (2d block distribution) */
646:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));

649:   /* redistribute x as a column of a 2d matrix */
650:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));

652:   /* call ScaLAPACK subroutine */
653:   switch (A->factortype) {
654:     case MAT_FACTOR_LU:
655:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
657:       break;
658:     case MAT_FACTOR_CHOLESKY:
659:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
661:       break;
662:     default:
663:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
664:   }

666:   /* redistribute x from a column of a 2d matrix */
667:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));

669:   PetscFree(x2d);
670:   VecRestoreArray(X,&x);
671:   return 0;
672: }

674: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
675: {
676:   MatSolve_ScaLAPACK(A,B,X);
677:   VecAXPY(X,1,Y);
678:   return 0;
679: }

681: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
682: {
683:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
684:   PetscBool      flg1,flg2;
685:   PetscBLASInt   one=1,info;

687:   PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
688:   PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
690:   MatScaLAPACKCheckDistribution(B,1,X,2);
691:   b = (Mat_ScaLAPACK*)B->data;
692:   x = (Mat_ScaLAPACK*)X->data;
693:   PetscArraycpy(x->loc,b->loc,b->lld*b->locc);

695:   switch (A->factortype) {
696:     case MAT_FACTOR_LU:
697:       PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
699:       break;
700:     case MAT_FACTOR_CHOLESKY:
701:       PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
703:       break;
704:     default:
705:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
706:   }
707:   return 0;
708: }

710: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
711: {
712:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
713:   PetscBLASInt   one=1,info;

715:   if (!a->pivots) {
716:     PetscMalloc1(a->locr+a->mb,&a->pivots);
717:     PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
718:   }
719:   PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
721:   A->factortype = MAT_FACTOR_LU;
722:   A->assembled  = PETSC_TRUE;

724:   PetscFree(A->solvertype);
725:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
726:   return 0;
727: }

729: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
730: {
731:   MatCopy(A,F,SAME_NONZERO_PATTERN);
732:   MatLUFactor_ScaLAPACK(F,0,0,info);
733:   return 0;
734: }

736: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
737: {
738:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
739:   return 0;
740: }

742: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
743: {
744:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
745:   PetscBLASInt   one=1,info;

747:   PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
749:   A->factortype = MAT_FACTOR_CHOLESKY;
750:   A->assembled  = PETSC_TRUE;

752:   PetscFree(A->solvertype);
753:   PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
754:   return 0;
755: }

757: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
758: {
759:   MatCopy(A,F,SAME_NONZERO_PATTERN);
760:   MatCholeskyFactor_ScaLAPACK(F,0,info);
761:   return 0;
762: }

764: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
765: {
766:   /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
767:   return 0;
768: }

770: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
771: {
772:   *type = MATSOLVERSCALAPACK;
773:   return 0;
774: }

776: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
777: {
778:   Mat            B;
779:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

781:   /* Create the factorization matrix */
782:   MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);
783:   B->trivialsymbolic = PETSC_TRUE;
784:   B->factortype = ftype;
785:   PetscFree(B->solvertype);
786:   PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);

788:   PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
789:   *F = B;
790:   return 0;
791: }

793: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
794: {
795:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
796:   MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
797:   return 0;
798: }

800: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
801: {
802:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
803:   PetscBLASInt   one=1,lwork=0;
804:   const char     *ntype;
805:   PetscScalar    *work=NULL,dummy;

807:   switch (type) {
808:     case NORM_1:
809:       ntype = "1";
810:       lwork = PetscMax(a->locr,a->locc);
811:       break;
812:     case NORM_FROBENIUS:
813:       ntype = "F";
814:       work  = &dummy;
815:       break;
816:     case NORM_INFINITY:
817:       ntype = "I";
818:       lwork = PetscMax(a->locr,a->locc);
819:       break;
820:     default:
821:       SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
822:   }
823:   if (lwork) PetscMalloc1(lwork,&work);
824:   *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
825:   if (lwork) PetscFree(work);
826:   return 0;
827: }

829: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
830: {
831:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

833:   PetscArrayzero(a->loc,a->lld*a->locc);
834:   return 0;
835: }

837: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
838: {
839:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
840:   PetscInt       i,n,nb,isrc,nproc,iproc,*idx;

842:   if (rows) {
843:     n     = a->locr;
844:     nb    = a->mb;
845:     isrc  = a->rsrc;
846:     nproc = a->grid->nprow;
847:     iproc = a->grid->myrow;
848:     PetscMalloc1(n,&idx);
849:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
850:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
851:   }
852:   if (cols) {
853:     n     = a->locc;
854:     nb    = a->nb;
855:     isrc  = a->csrc;
856:     nproc = a->grid->npcol;
857:     iproc = a->grid->mycol;
858:     PetscMalloc1(n,&idx);
859:     for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
860:     ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
861:   }
862:   return 0;
863: }

865: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
866: {
867:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
868:   Mat            Bmpi;
869:   MPI_Comm       comm;
870:   PetscInt       i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
871:   const PetscInt *ranges,*branges,*cwork;
872:   const PetscScalar *vwork;
873:   PetscBLASInt   bdesc[9],bmb,zero=0,one=1,lld,info;
874:   PetscScalar    *barray;
875:   PetscBool      differ=PETSC_FALSE;
876:   PetscMPIInt    size;

878:   PetscObjectGetComm((PetscObject)A,&comm);
879:   PetscLayoutGetRanges(A->rmap,&ranges);

881:   if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
882:     MPI_Comm_size(comm,&size);
883:     PetscLayoutGetRanges((*B)->rmap,&branges);
884:     for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
885:   }

887:   if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
888:     MatCreate(comm,&Bmpi);
889:     m = PETSC_DECIDE;
890:     PetscSplitOwnershipEqual(comm,&m,&M);
891:     n = PETSC_DECIDE;
892:     PetscSplitOwnershipEqual(comm,&n,&N);
893:     MatSetSizes(Bmpi,m,n,M,N);
894:     MatSetType(Bmpi,MATDENSE);
895:     MatSetUp(Bmpi);

897:     /* create ScaLAPACK descriptor for B (1d block distribution) */
898:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
899:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
900:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

903:     /* redistribute matrix */
904:     MatDenseGetArray(Bmpi,&barray);
905:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
906:     MatDenseRestoreArray(Bmpi,&barray);
907:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
908:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);

910:     /* transfer rows of auxiliary matrix to the final matrix B */
911:     MatGetOwnershipRange(Bmpi,&rstart,&rend);
912:     for (i=rstart;i<rend;i++) {
913:       MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
914:       MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
915:       MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
916:     }
917:     MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
918:     MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
919:     MatDestroy(&Bmpi);

921:   } else {  /* normal cases */

923:     if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
924:     else {
925:       MatCreate(comm,&Bmpi);
926:       m = PETSC_DECIDE;
927:       PetscSplitOwnershipEqual(comm,&m,&M);
928:       n = PETSC_DECIDE;
929:       PetscSplitOwnershipEqual(comm,&n,&N);
930:       MatSetSizes(Bmpi,m,n,M,N);
931:       MatSetType(Bmpi,MATDENSE);
932:       MatSetUp(Bmpi);
933:     }

935:     /* create ScaLAPACK descriptor for B (1d block distribution) */
936:     PetscBLASIntCast(ranges[1],&bmb);  /* row block size */
937:     lld = PetscMax(A->rmap->n,1);  /* local leading dimension */
938:     PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));

941:     /* redistribute matrix */
942:     MatDenseGetArray(Bmpi,&barray);
943:     PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
944:     MatDenseRestoreArray(Bmpi,&barray);

946:     MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
947:     MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
948:     if (reuse == MAT_INPLACE_MATRIX) {
949:       MatHeaderReplace(A,&Bmpi);
950:     } else *B = Bmpi;
951:   }
952:   return 0;
953: }

955: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
956: {
957:   Mat_ScaLAPACK  *b;
958:   Mat            Bmpi;
959:   MPI_Comm       comm;
960:   PetscInt       M=A->rmap->N,N=A->cmap->N,m,n;
961:   const PetscInt *ranges;
962:   PetscBLASInt   adesc[9],amb,zero=0,one=1,lld,info;
963:   PetscScalar    *aarray;
964:   PetscInt       lda;

966:   PetscObjectGetComm((PetscObject)A,&comm);

968:   if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
969:   else {
970:     MatCreate(comm,&Bmpi);
971:     m = PETSC_DECIDE;
972:     PetscSplitOwnershipEqual(comm,&m,&M);
973:     n = PETSC_DECIDE;
974:     PetscSplitOwnershipEqual(comm,&n,&N);
975:     MatSetSizes(Bmpi,m,n,M,N);
976:     MatSetType(Bmpi,MATSCALAPACK);
977:     MatSetUp(Bmpi);
978:   }
979:   b = (Mat_ScaLAPACK*)Bmpi->data;

981:   /* create ScaLAPACK descriptor for A (1d block distribution) */
982:   PetscLayoutGetRanges(A->rmap,&ranges);
983:   PetscBLASIntCast(ranges[1],&amb);  /* row block size */
984:   MatDenseGetLDA(A,&lda);
985:   lld = PetscMax(lda,1);  /* local leading dimension */
986:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));

989:   /* redistribute matrix */
990:   MatDenseGetArray(A,&aarray);
991:   PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
992:   MatDenseRestoreArray(A,&aarray);

994:   MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
995:   MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
996:   if (reuse == MAT_INPLACE_MATRIX) {
997:     MatHeaderReplace(A,&Bmpi);
998:   } else *B = Bmpi;
999:   return 0;
1000: }

1002: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1003: {
1004:   Mat               mat_scal;
1005:   PetscInt          M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1006:   const PetscInt    *cols;
1007:   const PetscScalar *vals;

1009:   if (reuse == MAT_REUSE_MATRIX) {
1010:     mat_scal = *newmat;
1011:     MatZeroEntries(mat_scal);
1012:   } else {
1013:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1014:     m = PETSC_DECIDE;
1015:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1016:     n = PETSC_DECIDE;
1017:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1018:     MatSetSizes(mat_scal,m,n,M,N);
1019:     MatSetType(mat_scal,MATSCALAPACK);
1020:     MatSetUp(mat_scal);
1021:   }
1022:   for (row=rstart;row<rend;row++) {
1023:     MatGetRow(A,row,&ncols,&cols,&vals);
1024:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1025:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1026:   }
1027:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1028:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1030:   if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A,&mat_scal);
1031:   else *newmat = mat_scal;
1032:   return 0;
1033: }

1035: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1036: {
1037:   Mat               mat_scal;
1038:   PetscInt          M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1039:   const PetscInt    *cols;
1040:   const PetscScalar *vals;
1041:   PetscScalar       v;

1043:   if (reuse == MAT_REUSE_MATRIX) {
1044:     mat_scal = *newmat;
1045:     MatZeroEntries(mat_scal);
1046:   } else {
1047:     MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1048:     m = PETSC_DECIDE;
1049:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1050:     n = PETSC_DECIDE;
1051:     PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1052:     MatSetSizes(mat_scal,m,n,M,N);
1053:     MatSetType(mat_scal,MATSCALAPACK);
1054:     MatSetUp(mat_scal);
1055:   }
1056:   MatGetRowUpperTriangular(A);
1057:   for (row=rstart;row<rend;row++) {
1058:     MatGetRow(A,row,&ncols,&cols,&vals);
1059:     MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1060:     for (j=0;j<ncols;j++) { /* lower triangular part */
1061:       if (cols[j] == row) continue;
1062:       v    = A->hermitian ? PetscConj(vals[j]) : vals[j];
1063:       MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1064:     }
1065:     MatRestoreRow(A,row,&ncols,&cols,&vals);
1066:   }
1067:   MatRestoreRowUpperTriangular(A);
1068:   MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1069:   MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);

1071:   if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A,&mat_scal);
1072:   else *newmat = mat_scal;
1073:   return 0;
1074: }

1076: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1077: {
1078:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1079:   PetscInt       sz=0;

1081:   PetscLayoutSetUp(A->rmap);
1082:   PetscLayoutSetUp(A->cmap);
1083:   if (!a->lld) a->lld = a->locr;

1085:   PetscFree(a->loc);
1086:   PetscIntMultError(a->lld,a->locc,&sz);
1087:   PetscCalloc1(sz,&a->loc);
1088:   PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));

1090:   A->preallocated = PETSC_TRUE;
1091:   return 0;
1092: }

1094: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1095: {
1096:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)A->data;
1097:   Mat_ScaLAPACK_Grid *grid;
1098:   PetscBool          flg;
1099:   MPI_Comm           icomm;

1101:   MatStashDestroy_Private(&A->stash);
1102:   PetscFree(a->loc);
1103:   PetscFree(a->pivots);
1104:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1105:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1106:   if (--grid->grid_refct == 0) {
1107:     Cblacs_gridexit(grid->ictxt);
1108:     Cblacs_gridexit(grid->ictxrow);
1109:     Cblacs_gridexit(grid->ictxcol);
1110:     PetscFree(grid);
1111:     MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1112:   }
1113:   PetscCommDestroy(&icomm);
1114:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1115:   PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1116:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1117:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1118:   PetscFree(A->data);
1119:   return 0;
1120: }

1122: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1123: {
1124:   const PetscInt *ranges;
1125:   PetscMPIInt    size;
1126:   PetscInt       i,n;

1128:   MPI_Comm_size(map->comm,&size);
1129:   if (size>2) {
1130:     PetscLayoutGetRanges(map,&ranges);
1131:     n = ranges[1]-ranges[0];
1132:     for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1134:   }
1135:   return 0;
1136: }

1138: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1139: {
1140:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1141:   PetscBLASInt   info=0;

1143:   PetscLayoutSetUp(A->rmap);
1144:   PetscLayoutSetUp(A->cmap);

1146:   /* check that the layout is as enforced by MatCreateScaLAPACK */
1147:   MatScaLAPACKCheckLayout(A->rmap);
1148:   MatScaLAPACKCheckLayout(A->cmap);

1150:   /* compute local sizes */
1151:   PetscBLASIntCast(A->rmap->N,&a->M);
1152:   PetscBLASIntCast(A->cmap->N,&a->N);
1153:   a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1154:   a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1155:   a->lld  = PetscMax(1,a->locr);

1157:   /* allocate local array */
1158:   MatScaLAPACKSetPreallocation(A);

1160:   /* set up ScaLAPACK descriptor */
1161:   PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1163:   return 0;
1164: }

1166: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1167: {
1168:   PetscInt       nstash,reallocs;

1170:   if (A->nooffprocentries) return 0;
1171:   MatStashScatterBegin_Private(A,&A->stash,NULL);
1172:   MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1173:   PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);
1174:   return 0;
1175: }

1177: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1178: {
1179:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;
1180:   PetscMPIInt    n;
1181:   PetscInt       i,flg,*row,*col;
1182:   PetscScalar    *val;
1183:   PetscBLASInt   gridx,gcidx,lridx,lcidx,rsrc,csrc;

1185:   if (A->nooffprocentries) return 0;
1186:   while (1) {
1187:     MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1188:     if (!flg) break;
1189:     for (i=0;i<n;i++) {
1190:       PetscBLASIntCast(row[i]+1,&gridx);
1191:       PetscBLASIntCast(col[i]+1,&gcidx);
1192:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1194:       switch (A->insertmode) {
1195:         case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1196:         case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1197:         default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1198:       }
1199:     }
1200:   }
1201:   MatStashScatterEnd_Private(&A->stash);
1202:   return 0;
1203: }

1205: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1206: {
1207:   Mat            Adense,As;
1208:   MPI_Comm       comm;

1210:   PetscObjectGetComm((PetscObject)newMat,&comm);
1211:   MatCreate(comm,&Adense);
1212:   MatSetType(Adense,MATDENSE);
1213:   MatLoad(Adense,viewer);
1214:   MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1215:   MatDestroy(&Adense);
1216:   MatHeaderReplace(newMat,&As);
1217:   return 0;
1218: }

1220: /* -------------------------------------------------------------------*/
1221: static struct _MatOps MatOps_Values = {
1222:        MatSetValues_ScaLAPACK,
1223:        0,
1224:        0,
1225:        MatMult_ScaLAPACK,
1226: /* 4*/ MatMultAdd_ScaLAPACK,
1227:        MatMultTranspose_ScaLAPACK,
1228:        MatMultTransposeAdd_ScaLAPACK,
1229:        MatSolve_ScaLAPACK,
1230:        MatSolveAdd_ScaLAPACK,
1231:        0,
1232: /*10*/ 0,
1233:        MatLUFactor_ScaLAPACK,
1234:        MatCholeskyFactor_ScaLAPACK,
1235:        0,
1236:        MatTranspose_ScaLAPACK,
1237: /*15*/ MatGetInfo_ScaLAPACK,
1238:        0,
1239:        MatGetDiagonal_ScaLAPACK,
1240:        MatDiagonalScale_ScaLAPACK,
1241:        MatNorm_ScaLAPACK,
1242: /*20*/ MatAssemblyBegin_ScaLAPACK,
1243:        MatAssemblyEnd_ScaLAPACK,
1244:        MatSetOption_ScaLAPACK,
1245:        MatZeroEntries_ScaLAPACK,
1246: /*24*/ 0,
1247:        MatLUFactorSymbolic_ScaLAPACK,
1248:        MatLUFactorNumeric_ScaLAPACK,
1249:        MatCholeskyFactorSymbolic_ScaLAPACK,
1250:        MatCholeskyFactorNumeric_ScaLAPACK,
1251: /*29*/ MatSetUp_ScaLAPACK,
1252:        0,
1253:        0,
1254:        0,
1255:        0,
1256: /*34*/ MatDuplicate_ScaLAPACK,
1257:        0,
1258:        0,
1259:        0,
1260:        0,
1261: /*39*/ MatAXPY_ScaLAPACK,
1262:        0,
1263:        0,
1264:        0,
1265:        MatCopy_ScaLAPACK,
1266: /*44*/ 0,
1267:        MatScale_ScaLAPACK,
1268:        MatShift_ScaLAPACK,
1269:        0,
1270:        0,
1271: /*49*/ 0,
1272:        0,
1273:        0,
1274:        0,
1275:        0,
1276: /*54*/ 0,
1277:        0,
1278:        0,
1279:        0,
1280:        0,
1281: /*59*/ 0,
1282:        MatDestroy_ScaLAPACK,
1283:        MatView_ScaLAPACK,
1284:        0,
1285:        0,
1286: /*64*/ 0,
1287:        0,
1288:        0,
1289:        0,
1290:        0,
1291: /*69*/ 0,
1292:        0,
1293:        MatConvert_ScaLAPACK_Dense,
1294:        0,
1295:        0,
1296: /*74*/ 0,
1297:        0,
1298:        0,
1299:        0,
1300:        0,
1301: /*79*/ 0,
1302:        0,
1303:        0,
1304:        0,
1305:        MatLoad_ScaLAPACK,
1306: /*84*/ 0,
1307:        0,
1308:        0,
1309:        0,
1310:        0,
1311: /*89*/ 0,
1312:        0,
1313:        MatMatMultNumeric_ScaLAPACK,
1314:        0,
1315:        0,
1316: /*94*/ 0,
1317:        0,
1318:        0,
1319:        MatMatTransposeMultNumeric_ScaLAPACK,
1320:        0,
1321: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1322:        0,
1323:        0,
1324:        MatConjugate_ScaLAPACK,
1325:        0,
1326: /*104*/0,
1327:        0,
1328:        0,
1329:        0,
1330:        0,
1331: /*109*/MatMatSolve_ScaLAPACK,
1332:        0,
1333:        0,
1334:        0,
1335:        MatMissingDiagonal_ScaLAPACK,
1336: /*114*/0,
1337:        0,
1338:        0,
1339:        0,
1340:        0,
1341: /*119*/0,
1342:        MatHermitianTranspose_ScaLAPACK,
1343:        0,
1344:        0,
1345:        0,
1346: /*124*/0,
1347:        0,
1348:        0,
1349:        0,
1350:        0,
1351: /*129*/0,
1352:        0,
1353:        0,
1354:        0,
1355:        0,
1356: /*134*/0,
1357:        0,
1358:        0,
1359:        0,
1360:        0,
1361:        0,
1362: /*140*/0,
1363:        0,
1364:        0,
1365:        0,
1366:        0,
1367: /*145*/0,
1368:        0,
1369:        0
1370: };

1372: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1373: {
1374:   PetscInt           *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1375:   PetscInt           size=stash->size,nsends;
1376:   PetscInt           count,*sindices,**rindices,i,j,l;
1377:   PetscScalar        **rvalues,*svalues;
1378:   MPI_Comm           comm = stash->comm;
1379:   MPI_Request        *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1380:   PetscMPIInt        *sizes,*nlengths,nreceives;
1381:   PetscInt           *sp_idx,*sp_idy;
1382:   PetscScalar        *sp_val;
1383:   PetscMatStashSpace space,space_next;
1384:   PetscBLASInt       gridx,gcidx,lridx,lcidx,rsrc,csrc;
1385:   Mat_ScaLAPACK      *a = (Mat_ScaLAPACK*)mat->data;

1387:   {                             /* make sure all processors are either in INSERTMODE or ADDMODE */
1388:     InsertMode addv;
1389:     MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1391:     mat->insertmode = addv; /* in case this processor had no cache */
1392:   }

1394:   bs2 = stash->bs*stash->bs;

1396:   /*  first count number of contributors to each processor */
1397:   PetscCalloc1(size,&nlengths);
1398:   PetscMalloc1(stash->n+1,&owner);

1400:   i     = j    = 0;
1401:   space = stash->space_head;
1402:   while (space) {
1403:     space_next = space->next;
1404:     for (l=0; l<space->local_used; l++) {
1405:       PetscBLASIntCast(space->idx[l]+1,&gridx);
1406:       PetscBLASIntCast(space->idy[l]+1,&gcidx);
1407:       PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1408:       j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1409:       nlengths[j]++; owner[i] = j;
1410:       i++;
1411:     }
1412:     space = space_next;
1413:   }

1415:   /* Now check what procs get messages - and compute nsends. */
1416:   PetscCalloc1(size,&sizes);
1417:   for (i=0, nsends=0; i<size; i++) {
1418:     if (nlengths[i]) {
1419:       sizes[i] = 1; nsends++;
1420:     }
1421:   }

1423:   {PetscMPIInt *onodes,*olengths;
1424:    /* Determine the number of messages to expect, their lengths, from from-ids */
1425:    PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1426:    PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1427:    /* since clubbing row,col - lengths are multiplied by 2 */
1428:    for (i=0; i<nreceives; i++) olengths[i] *=2;
1429:    PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1430:    /* values are size 'bs2' lengths (and remove earlier factor 2 */
1431:    for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1432:    PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1433:    PetscFree(onodes);
1434:    PetscFree(olengths);}

1436:   /* do sends:
1437:       1) starts[i] gives the starting index in svalues for stuff going to
1438:          the ith processor
1439:   */
1440:   PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1441:   PetscMalloc1(2*nsends,&send_waits);
1442:   PetscMalloc2(size,&startv,size,&starti);
1443:   /* use 2 sends the first with all_a, the next with all_i and all_j */
1444:   startv[0] = 0; starti[0] = 0;
1445:   for (i=1; i<size; i++) {
1446:     startv[i] = startv[i-1] + nlengths[i-1];
1447:     starti[i] = starti[i-1] + 2*nlengths[i-1];
1448:   }

1450:   i     = 0;
1451:   space = stash->space_head;
1452:   while (space) {
1453:     space_next = space->next;
1454:     sp_idx     = space->idx;
1455:     sp_idy     = space->idy;
1456:     sp_val     = space->val;
1457:     for (l=0; l<space->local_used; l++) {
1458:       j = owner[i];
1459:       if (bs2 == 1) {
1460:         svalues[startv[j]] = sp_val[l];
1461:       } else {
1462:         PetscInt    k;
1463:         PetscScalar *buf1,*buf2;
1464:         buf1 = svalues+bs2*startv[j];
1465:         buf2 = space->val + bs2*l;
1466:         for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1467:       }
1468:       sindices[starti[j]]             = sp_idx[l];
1469:       sindices[starti[j]+nlengths[j]] = sp_idy[l];
1470:       startv[j]++;
1471:       starti[j]++;
1472:       i++;
1473:     }
1474:     space = space_next;
1475:   }
1476:   startv[0] = 0;
1477:   for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];

1479:   for (i=0,count=0; i<size; i++) {
1480:     if (sizes[i]) {
1481:       MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1482:       MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1483:     }
1484:   }
1485: #if defined(PETSC_USE_INFO)
1486:   PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends);
1487:   for (i=0; i<size; i++) {
1488:     if (sizes[i]) {
1489:       PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))));
1490:     }
1491:   }
1492: #endif
1493:   PetscFree(nlengths);
1494:   PetscFree(owner);
1495:   PetscFree2(startv,starti);
1496:   PetscFree(sizes);

1498:   /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1499:   PetscMalloc1(2*nreceives,&recv_waits);

1501:   for (i=0; i<nreceives; i++) {
1502:     recv_waits[2*i]   = recv_waits1[i];
1503:     recv_waits[2*i+1] = recv_waits2[i];
1504:   }
1505:   stash->recv_waits = recv_waits;

1507:   PetscFree(recv_waits1);
1508:   PetscFree(recv_waits2);

1510:   stash->svalues         = svalues;
1511:   stash->sindices        = sindices;
1512:   stash->rvalues         = rvalues;
1513:   stash->rindices        = rindices;
1514:   stash->send_waits      = send_waits;
1515:   stash->nsends          = nsends;
1516:   stash->nrecvs          = nreceives;
1517:   stash->reproduce_count = 0;
1518:   return 0;
1519: }

1521: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1522: {
1523:   Mat_ScaLAPACK  *a = (Mat_ScaLAPACK*)A->data;

1528:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1529:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1530:   return 0;
1531: }

1533: /*@
1534:    MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1535:    the ScaLAPACK matrix

1537:    Logically Collective on A

1539:    Input Parameters:
1540: +  A  - a MATSCALAPACK matrix
1541: .  mb - the row block size
1542: -  nb - the column block size

1544:    Level: intermediate

1546: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1547: @*/
1548: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1549: {
1553:   PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1554:   return 0;
1555: }

1557: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1558: {
1559:   Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;

1561:   if (mb) *mb = a->mb;
1562:   if (nb) *nb = a->nb;
1563:   return 0;
1564: }

1566: /*@
1567:    MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1568:    the ScaLAPACK matrix

1570:    Not collective

1572:    Input Parameter:
1573: .  A  - a MATSCALAPACK matrix

1575:    Output Parameters:
1576: +  mb - the row block size
1577: -  nb - the column block size

1579:    Level: intermediate

1581: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1582: @*/
1583: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1584: {
1586:   PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1587:   return 0;
1588: }

1590: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1591: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);

1593: /*MC
1594:    MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package

1596:    Use ./configure --download-scalapack to install PETSc to use ScaLAPACK

1598:    Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver

1600:    Options Database Keys:
1601: +  -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1602: .  -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1603: -  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1605:    Level: beginner

1607: .seealso: MATDENSE, MATELEMENTAL
1608: M*/

1610: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1611: {
1612:   Mat_ScaLAPACK      *a;
1613:   PetscErrorCode     ierr;
1614:   PetscBool          flg,flg1;
1615:   Mat_ScaLAPACK_Grid *grid;
1616:   MPI_Comm           icomm;
1617:   PetscBLASInt       nprow,npcol,myrow,mycol;
1618:   PetscInt           optv1,k=2,array[2]={0,0};
1619:   PetscMPIInt        size;

1621:   PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1622:   A->insertmode = NOT_SET_VALUES;

1624:   MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1625:   A->stash.ScatterBegin   = MatStashScatterBegin_ScaLAPACK;
1626:   A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1627:   A->stash.ScatterEnd     = MatStashScatterEnd_Ref;
1628:   A->stash.ScatterDestroy = NULL;

1630:   PetscNewLog(A,&a);
1631:   A->data = (void*)a;

1633:   /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1634:   if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1635:     MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1636:     PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1637:     PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);
1638:   }
1639:   PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1640:   MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1641:   if (!flg) {
1642:     PetscNewLog(A,&grid);

1644:     MPI_Comm_size(icomm,&size);
1645:     grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);

1647:     PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1648:     PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1649:     if (flg1) {
1651:       grid->nprow = optv1;
1652:     }
1653:     PetscOptionsEnd();

1655:     if (size % grid->nprow) grid->nprow = 1;  /* cannot use a squarish grid, use a 1d grid */
1656:     grid->npcol = size/grid->nprow;
1657:     PetscBLASIntCast(grid->nprow,&nprow);
1658:     PetscBLASIntCast(grid->npcol,&npcol);
1659:     grid->ictxt = Csys2blacs_handle(icomm);
1660:     Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1661:     Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1662:     grid->grid_refct = 1;
1663:     grid->nprow      = nprow;
1664:     grid->npcol      = npcol;
1665:     grid->myrow      = myrow;
1666:     grid->mycol      = mycol;
1667:     /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1668:     grid->ictxrow = Csys2blacs_handle(icomm);
1669:     Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1670:     grid->ictxcol = Csys2blacs_handle(icomm);
1671:     Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1672:     MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);

1674:   } else grid->grid_refct++;
1675:   PetscCommDestroy(&icomm);
1676:   a->grid = grid;
1677:   a->mb   = DEFAULT_BLOCKSIZE;
1678:   a->nb   = DEFAULT_BLOCKSIZE;

1680:   PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1681:   PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1682:   if (flg) {
1683:     a->mb = array[0];
1684:     a->nb = (k>1)? array[1]: a->mb;
1685:   }
1686:   PetscOptionsEnd();

1688:   PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1689:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1690:   PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1691:   PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1692:   return 0;
1693: }

1695: /*@C
1696:    MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1697:    (2D block cyclic distribution).

1699:    Collective

1701:    Input Parameters:
1702: +  comm - MPI communicator
1703: .  mb   - row block size (or PETSC_DECIDE to have it set)
1704: .  nb   - column block size (or PETSC_DECIDE to have it set)
1705: .  M    - number of global rows
1706: .  N    - number of global columns
1707: .  rsrc - coordinate of process that owns the first row of the distributed matrix
1708: -  csrc - coordinate of process that owns the first column of the distributed matrix

1710:    Output Parameter:
1711: .  A - the matrix

1713:    Options Database Keys:
1714: .  -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)

1716:    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1717:    MatXXXXSetPreallocation() paradigm instead of this routine directly.
1718:    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]

1720:    Notes:
1721:    If PETSC_DECIDE is used for the block sizes, then an appropriate value
1722:    is chosen.

1724:    Storage Information:
1725:    Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1726:    configured with ScaLAPACK. In particular, PETSc's local sizes lose
1727:    significance and are thus ignored. The block sizes refer to the values
1728:    used for the distributed matrix, not the same meaning as in BAIJ.

1730:    Level: intermediate

1732: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1733: @*/
1734: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1735: {
1736:   Mat_ScaLAPACK  *a;
1737:   PetscInt       m,n;

1739:   MatCreate(comm,A);
1740:   MatSetType(*A,MATSCALAPACK);
1742:   /* rows and columns are NOT distributed according to PetscSplitOwnership */
1743:   m = PETSC_DECIDE;
1744:   PetscSplitOwnershipEqual(comm,&m,&M);
1745:   n = PETSC_DECIDE;
1746:   PetscSplitOwnershipEqual(comm,&n,&N);
1747:   MatSetSizes(*A,m,n,M,N);
1748:   a = (Mat_ScaLAPACK*)(*A)->data;
1749:   PetscBLASIntCast(M,&a->M);
1750:   PetscBLASIntCast(N,&a->N);
1751:   PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1752:   PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1753:   PetscBLASIntCast(rsrc,&a->rsrc);
1754:   PetscBLASIntCast(csrc,&a->csrc);
1755:   MatSetUp(*A);
1756:   return 0;
1757: }