Actual source code: matscalapack.c
1: #include <petsc/private/petscscalapack.h>
3: const char ScaLAPACKCitation[] = "@BOOK{scalapack-user-guide,\n"
4: " AUTHOR = {L. S. Blackford and J. Choi and A. Cleary and E. D'Azevedo and\n"
5: " J. Demmel and I. Dhillon and J. Dongarra and S. Hammarling and\n"
6: " G. Henry and A. Petitet and K. Stanley and D. Walker and R. C. Whaley},\n"
7: " TITLE = {Sca{LAPACK} Users' Guide},\n"
8: " PUBLISHER = {SIAM},\n"
9: " ADDRESS = {Philadelphia, PA},\n"
10: " YEAR = 1997\n"
11: "}\n";
12: static PetscBool ScaLAPACKCite = PETSC_FALSE;
14: #define DEFAULT_BLOCKSIZE 64
16: /*
17: The variable Petsc_ScaLAPACK_keyval is used to indicate an MPI attribute that
18: is attached to a communicator, in this case the attribute is a Mat_ScaLAPACK_Grid
19: */
20: static PetscMPIInt Petsc_ScaLAPACK_keyval = MPI_KEYVAL_INVALID;
22: static PetscErrorCode Petsc_ScaLAPACK_keyval_free(void)
23: {
24: PetscInfo(NULL,"Freeing Petsc_ScaLAPACK_keyval\n");
25: MPI_Comm_free_keyval(&Petsc_ScaLAPACK_keyval);
26: return 0;
27: }
29: static PetscErrorCode MatView_ScaLAPACK(Mat A,PetscViewer viewer)
30: {
31: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
32: PetscBool iascii;
33: PetscViewerFormat format;
34: Mat Adense;
36: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
37: if (iascii) {
38: PetscViewerGetFormat(viewer,&format);
39: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
40: PetscViewerASCIIPrintf(viewer,"block sizes: %d,%d\n",(int)a->mb,(int)a->nb);
41: PetscViewerASCIIPrintf(viewer,"grid height=%d, grid width=%d\n",(int)a->grid->nprow,(int)a->grid->npcol);
42: PetscViewerASCIIPrintf(viewer,"coordinates of process owning first row and column: (%d,%d)\n",(int)a->rsrc,(int)a->csrc);
43: PetscViewerASCIIPrintf(viewer,"dimension of largest local matrix: %d x %d\n",(int)a->locr,(int)a->locc);
44: return 0;
45: } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
46: return 0;
47: }
48: }
49: /* convert to dense format and call MatView() */
50: MatConvert(A,MATDENSE,MAT_INITIAL_MATRIX,&Adense);
51: MatView(Adense,viewer);
52: MatDestroy(&Adense);
53: return 0;
54: }
56: static PetscErrorCode MatGetInfo_ScaLAPACK(Mat A,MatInfoType flag,MatInfo *info)
57: {
58: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
59: PetscLogDouble isend[2],irecv[2];
61: info->block_size = 1.0;
63: isend[0] = a->lld*a->locc; /* locally allocated */
64: isend[1] = a->locr*a->locc; /* used submatrix */
65: if (flag == MAT_LOCAL || flag == MAT_GLOBAL_MAX) {
66: info->nz_allocated = isend[0];
67: info->nz_used = isend[1];
68: } else if (flag == MAT_GLOBAL_MAX) {
69: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_MAX,PetscObjectComm((PetscObject)A));
70: info->nz_allocated = irecv[0];
71: info->nz_used = irecv[1];
72: } else if (flag == MAT_GLOBAL_SUM) {
73: MPIU_Allreduce(isend,irecv,2,MPIU_PETSCLOGDOUBLE,MPIU_SUM,PetscObjectComm((PetscObject)A));
74: info->nz_allocated = irecv[0];
75: info->nz_used = irecv[1];
76: }
78: info->nz_unneeded = 0;
79: info->assemblies = A->num_ass;
80: info->mallocs = 0;
81: info->memory = ((PetscObject)A)->mem;
82: info->fill_ratio_given = 0;
83: info->fill_ratio_needed = 0;
84: info->factor_mallocs = 0;
85: return 0;
86: }
88: PetscErrorCode MatSetOption_ScaLAPACK(Mat A,MatOption op,PetscBool flg)
89: {
90: switch (op) {
91: case MAT_NEW_NONZERO_LOCATIONS:
92: case MAT_NEW_NONZERO_LOCATION_ERR:
93: case MAT_NEW_NONZERO_ALLOCATION_ERR:
94: case MAT_SYMMETRIC:
95: case MAT_SORTED_FULL:
96: case MAT_HERMITIAN:
97: break;
98: default:
99: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unsupported option %s",MatOptions[op]);
100: }
101: return 0;
102: }
104: static PetscErrorCode MatSetValues_ScaLAPACK(Mat A,PetscInt nr,const PetscInt *rows,PetscInt nc,const PetscInt *cols,const PetscScalar *vals,InsertMode imode)
105: {
106: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
107: PetscInt i,j;
108: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
110: for (i=0;i<nr;i++) {
111: if (rows[i] < 0) continue;
112: PetscBLASIntCast(rows[i]+1,&gridx);
113: for (j=0;j<nc;j++) {
114: if (cols[j] < 0) continue;
115: PetscBLASIntCast(cols[j]+1,&gcidx);
116: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
117: if (rsrc==a->grid->myrow && csrc==a->grid->mycol) {
118: switch (imode) {
119: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = vals[i*nc+j]; break;
120: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += vals[i*nc+j]; break;
121: default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)imode);
122: }
123: } else {
125: A->assembled = PETSC_FALSE;
126: MatStashValuesRow_Private(&A->stash,rows[i],1,cols+j,vals+i*nc+j,(PetscBool)(imode==ADD_VALUES));
127: }
128: }
129: }
130: return 0;
131: }
133: static PetscErrorCode MatMultXXXYYY_ScaLAPACK(Mat A,PetscBool transpose,PetscScalar beta,const PetscScalar *x,PetscScalar *y)
134: {
135: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
136: PetscScalar *x2d,*y2d,alpha=1.0;
137: const PetscInt *ranges;
138: PetscBLASInt xdesc[9],ydesc[9],x2desc[9],y2desc[9],mb,nb,lszx,lszy,zero=0,one=1,xlld,ylld,info;
140: if (transpose) {
142: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
143: PetscLayoutGetRanges(A->rmap,&ranges);
144: PetscBLASIntCast(ranges[1],&mb); /* x block size */
145: xlld = PetscMax(1,A->rmap->n);
146: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
148: PetscLayoutGetRanges(A->cmap,&ranges);
149: PetscBLASIntCast(ranges[1],&nb); /* y block size */
150: ylld = 1;
151: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&ylld,&info));
154: /* allocate 2d vectors */
155: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
156: lszy = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
157: PetscMalloc2(lszx,&x2d,lszy,&y2d);
158: xlld = PetscMax(1,lszx);
160: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
161: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
163: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&ylld,&info));
166: /* redistribute x as a column of a 2d matrix */
167: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
169: /* redistribute y as a row of a 2d matrix */
170: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxrow));
172: /* call PBLAS subroutine */
173: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("T",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
175: /* redistribute y from a row of a 2d matrix */
176: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxrow));
178: } else { /* non-transpose */
180: /* create ScaLAPACK descriptors for vectors (1d block distribution) */
181: PetscLayoutGetRanges(A->cmap,&ranges);
182: PetscBLASIntCast(ranges[1],&nb); /* x block size */
183: xlld = 1;
184: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&xlld,&info));
186: PetscLayoutGetRanges(A->rmap,&ranges);
187: PetscBLASIntCast(ranges[1],&mb); /* y block size */
188: ylld = PetscMax(1,A->rmap->n);
189: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ydesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&ylld,&info));
192: /* allocate 2d vectors */
193: lszy = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
194: lszx = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
195: PetscMalloc2(lszx,&x2d,lszy,&y2d);
196: ylld = PetscMax(1,lszy);
198: /* create ScaLAPACK descriptors for vectors (2d block distribution) */
199: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&xlld,&info));
201: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(y2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&ylld,&info));
204: /* redistribute x as a row of a 2d matrix */
205: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxrow));
207: /* redistribute y as a column of a 2d matrix */
208: if (beta!=0.0) PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y,&one,&one,ydesc,y2d,&one,&one,y2desc,&a->grid->ictxcol));
210: /* call PBLAS subroutine */
211: PetscStackCallBLAS("PBLASgemv",PBLASgemv_("N",&a->M,&a->N,&alpha,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&one,&beta,y2d,&one,&one,y2desc,&one));
213: /* redistribute y from a column of a 2d matrix */
214: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,y2d,&one,&one,y2desc,y,&one,&one,ydesc,&a->grid->ictxcol));
216: }
217: PetscFree2(x2d,y2d);
218: return 0;
219: }
221: static PetscErrorCode MatMult_ScaLAPACK(Mat A,Vec x,Vec y)
222: {
223: const PetscScalar *xarray;
224: PetscScalar *yarray;
226: VecGetArrayRead(x,&xarray);
227: VecGetArray(y,&yarray);
228: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,0.0,xarray,yarray);
229: VecRestoreArrayRead(x,&xarray);
230: VecRestoreArray(y,&yarray);
231: return 0;
232: }
234: static PetscErrorCode MatMultTranspose_ScaLAPACK(Mat A,Vec x,Vec y)
235: {
236: const PetscScalar *xarray;
237: PetscScalar *yarray;
239: VecGetArrayRead(x,&xarray);
240: VecGetArray(y,&yarray);
241: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,0.0,xarray,yarray);
242: VecRestoreArrayRead(x,&xarray);
243: VecRestoreArray(y,&yarray);
244: return 0;
245: }
247: static PetscErrorCode MatMultAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
248: {
249: const PetscScalar *xarray;
250: PetscScalar *zarray;
252: if (y != z) VecCopy(y,z);
253: VecGetArrayRead(x,&xarray);
254: VecGetArray(z,&zarray);
255: MatMultXXXYYY_ScaLAPACK(A,PETSC_FALSE,1.0,xarray,zarray);
256: VecRestoreArrayRead(x,&xarray);
257: VecRestoreArray(z,&zarray);
258: return 0;
259: }
261: static PetscErrorCode MatMultTransposeAdd_ScaLAPACK(Mat A,Vec x,Vec y,Vec z)
262: {
263: const PetscScalar *xarray;
264: PetscScalar *zarray;
266: if (y != z) VecCopy(y,z);
267: VecGetArrayRead(x,&xarray);
268: VecGetArray(z,&zarray);
269: MatMultXXXYYY_ScaLAPACK(A,PETSC_TRUE,1.0,xarray,zarray);
270: VecRestoreArrayRead(x,&xarray);
271: VecRestoreArray(z,&zarray);
272: return 0;
273: }
275: PetscErrorCode MatMatMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
276: {
277: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
278: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
279: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
280: PetscScalar sone=1.0,zero=0.0;
281: PetscBLASInt one=1;
283: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","N",&a->M,&b->N,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
284: C->assembled = PETSC_TRUE;
285: return 0;
286: }
288: PetscErrorCode MatMatMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
289: {
290: MatSetSizes(C,A->rmap->n,B->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
291: MatSetType(C,MATSCALAPACK);
292: MatSetUp(C);
293: C->ops->matmultnumeric = MatMatMultNumeric_ScaLAPACK;
294: return 0;
295: }
297: static PetscErrorCode MatMatTransposeMultNumeric_ScaLAPACK(Mat A,Mat B,Mat C)
298: {
299: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
300: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
301: Mat_ScaLAPACK *c = (Mat_ScaLAPACK*)C->data;
302: PetscScalar sone=1.0,zero=0.0;
303: PetscBLASInt one=1;
305: PetscStackCallBLAS("PBLASgemm",PBLASgemm_("N","T",&a->M,&b->M,&a->N,&sone,a->loc,&one,&one,a->desc,b->loc,&one,&one,b->desc,&zero,c->loc,&one,&one,c->desc));
306: C->assembled = PETSC_TRUE;
307: return 0;
308: }
310: static PetscErrorCode MatMatTransposeMultSymbolic_ScaLAPACK(Mat A,Mat B,PetscReal fill,Mat C)
311: {
312: MatSetSizes(C,A->rmap->n,B->rmap->n,PETSC_DECIDE,PETSC_DECIDE);
313: MatSetType(C,MATSCALAPACK);
314: MatSetUp(C);
315: return 0;
316: }
318: /* --------------------------------------- */
319: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_AB(Mat C)
320: {
321: C->ops->matmultsymbolic = MatMatMultSymbolic_ScaLAPACK;
322: C->ops->productsymbolic = MatProductSymbolic_AB;
323: return 0;
324: }
326: static PetscErrorCode MatProductSetFromOptions_ScaLAPACK_ABt(Mat C)
327: {
328: C->ops->mattransposemultsymbolic = MatMatTransposeMultSymbolic_ScaLAPACK;
329: C->ops->productsymbolic = MatProductSymbolic_ABt;
330: return 0;
331: }
333: PETSC_INTERN PetscErrorCode MatProductSetFromOptions_ScaLAPACK(Mat C)
334: {
335: Mat_Product *product = C->product;
337: switch (product->type) {
338: case MATPRODUCT_AB:
339: MatProductSetFromOptions_ScaLAPACK_AB(C);
340: break;
341: case MATPRODUCT_ABt:
342: MatProductSetFromOptions_ScaLAPACK_ABt(C);
343: break;
344: default: SETERRQ(PetscObjectComm((PetscObject)C),PETSC_ERR_SUP,"MatProduct type %s is not supported for ScaLAPACK and ScaLAPACK matrices",MatProductTypes[product->type]);
345: }
346: return 0;
347: }
348: /* --------------------------------------- */
350: static PetscErrorCode MatGetDiagonal_ScaLAPACK(Mat A,Vec D)
351: {
352: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
353: PetscScalar *darray,*d2d,v;
354: const PetscInt *ranges;
355: PetscBLASInt j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
357: VecGetArray(D,&darray);
359: if (A->rmap->N<=A->cmap->N) { /* row version */
361: /* create ScaLAPACK descriptor for vector (1d block distribution) */
362: PetscLayoutGetRanges(A->rmap,&ranges);
363: PetscBLASIntCast(ranges[1],&mb); /* D block size */
364: dlld = PetscMax(1,A->rmap->n);
365: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
368: /* allocate 2d vector */
369: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
370: PetscCalloc1(lszd,&d2d);
371: dlld = PetscMax(1,lszd);
373: /* create ScaLAPACK descriptor for vector (2d block distribution) */
374: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
377: /* collect diagonal */
378: for (j=1;j<=a->M;j++) {
379: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("R"," ",&v,a->loc,&j,&j,a->desc));
380: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&j,&one,d2desc,&v));
381: }
383: /* redistribute d from a column of a 2d matrix */
384: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxcol));
385: PetscFree(d2d);
387: } else { /* column version */
389: /* create ScaLAPACK descriptor for vector (1d block distribution) */
390: PetscLayoutGetRanges(A->cmap,&ranges);
391: PetscBLASIntCast(ranges[1],&nb); /* D block size */
392: dlld = 1;
393: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
396: /* allocate 2d vector */
397: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
398: PetscCalloc1(lszd,&d2d);
400: /* create ScaLAPACK descriptor for vector (2d block distribution) */
401: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
404: /* collect diagonal */
405: for (j=1;j<=a->N;j++) {
406: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("C"," ",&v,a->loc,&j,&j,a->desc));
407: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(d2d,&one,&j,d2desc,&v));
408: }
410: /* redistribute d from a row of a 2d matrix */
411: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,d2d,&one,&one,d2desc,darray,&one,&one,ddesc,&a->grid->ictxrow));
412: PetscFree(d2d);
413: }
415: VecRestoreArray(D,&darray);
416: VecAssemblyBegin(D);
417: VecAssemblyEnd(D);
418: return 0;
419: }
421: static PetscErrorCode MatDiagonalScale_ScaLAPACK(Mat A,Vec L,Vec R)
422: {
423: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
424: const PetscScalar *d;
425: const PetscInt *ranges;
426: PetscScalar *d2d;
427: PetscBLASInt i,j,ddesc[9],d2desc[9],mb,nb,lszd,zero=0,one=1,dlld,info;
429: if (R) {
430: VecGetArrayRead(R,(const PetscScalar **)&d);
431: /* create ScaLAPACK descriptor for vector (1d block distribution) */
432: PetscLayoutGetRanges(A->cmap,&ranges);
433: PetscBLASIntCast(ranges[1],&nb); /* D block size */
434: dlld = 1;
435: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&one,&a->N,&one,&nb,&zero,&zero,&a->grid->ictxrow,&dlld,&info));
438: /* allocate 2d vector */
439: lszd = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
440: PetscCalloc1(lszd,&d2d);
442: /* create ScaLAPACK descriptor for vector (2d block distribution) */
443: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&one,&a->N,&one,&a->nb,&zero,&zero,&a->grid->ictxt,&dlld,&info));
446: /* redistribute d to a row of a 2d matrix */
447: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&one,&a->N,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxrow));
449: /* broadcast along process columns */
450: if (!a->grid->myrow) Cdgebs2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld);
451: else Cdgebr2d(a->grid->ictxt,"C"," ",1,lszd,d2d,dlld,0,a->grid->mycol);
453: /* local scaling */
454: for (j=0;j<a->locc;j++) for (i=0;i<a->locr;i++) a->loc[i+j*a->lld] *= d2d[j];
456: PetscFree(d2d);
457: VecRestoreArrayRead(R,(const PetscScalar **)&d);
458: }
459: if (L) {
460: VecGetArrayRead(L,(const PetscScalar **)&d);
461: /* create ScaLAPACK descriptor for vector (1d block distribution) */
462: PetscLayoutGetRanges(A->rmap,&ranges);
463: PetscBLASIntCast(ranges[1],&mb); /* D block size */
464: dlld = PetscMax(1,A->rmap->n);
465: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(ddesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&dlld,&info));
468: /* allocate 2d vector */
469: lszd = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
470: PetscCalloc1(lszd,&d2d);
471: dlld = PetscMax(1,lszd);
473: /* create ScaLAPACK descriptor for vector (2d block distribution) */
474: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(d2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&dlld,&info));
477: /* redistribute d to a column of a 2d matrix */
478: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,(PetscScalar*)d,&one,&one,ddesc,d2d,&one,&one,d2desc,&a->grid->ictxcol));
480: /* broadcast along process rows */
481: if (!a->grid->mycol) Cdgebs2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld);
482: else Cdgebr2d(a->grid->ictxt,"R"," ",lszd,1,d2d,dlld,a->grid->myrow,0);
484: /* local scaling */
485: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] *= d2d[i];
487: PetscFree(d2d);
488: VecRestoreArrayRead(L,(const PetscScalar **)&d);
489: }
490: return 0;
491: }
493: static PetscErrorCode MatMissingDiagonal_ScaLAPACK(Mat A,PetscBool *missing,PetscInt *d)
494: {
495: *missing = PETSC_FALSE;
496: return 0;
497: }
499: static PetscErrorCode MatScale_ScaLAPACK(Mat X,PetscScalar a)
500: {
501: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
502: PetscBLASInt n,one=1;
504: n = x->lld*x->locc;
505: PetscStackCallBLAS("BLASscal",BLASscal_(&n,&a,x->loc,&one));
506: return 0;
507: }
509: static PetscErrorCode MatShift_ScaLAPACK(Mat X,PetscScalar alpha)
510: {
511: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
512: PetscBLASInt i,n;
513: PetscScalar v;
515: n = PetscMin(x->M,x->N);
516: for (i=1;i<=n;i++) {
517: PetscStackCallBLAS("SCALAPACKelget",SCALAPACKelget_("-"," ",&v,x->loc,&i,&i,x->desc));
518: v += alpha;
519: PetscStackCallBLAS("SCALAPACKelset",SCALAPACKelset_(x->loc,&i,&i,x->desc,&v));
520: }
521: return 0;
522: }
524: static PetscErrorCode MatAXPY_ScaLAPACK(Mat Y,PetscScalar alpha,Mat X,MatStructure str)
525: {
526: Mat_ScaLAPACK *x = (Mat_ScaLAPACK*)X->data;
527: Mat_ScaLAPACK *y = (Mat_ScaLAPACK*)Y->data;
528: PetscBLASInt one=1;
529: PetscScalar beta=1.0;
531: MatScaLAPACKCheckDistribution(Y,1,X,3);
532: PetscStackCallBLAS("SCALAPACKmatadd",SCALAPACKmatadd_(&x->M,&x->N,&alpha,x->loc,&one,&one,x->desc,&beta,y->loc,&one,&one,y->desc));
533: PetscObjectStateIncrease((PetscObject)Y);
534: return 0;
535: }
537: static PetscErrorCode MatCopy_ScaLAPACK(Mat A,Mat B,MatStructure str)
538: {
539: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
540: Mat_ScaLAPACK *b = (Mat_ScaLAPACK*)B->data;
542: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
543: PetscObjectStateIncrease((PetscObject)B);
544: return 0;
545: }
547: static PetscErrorCode MatDuplicate_ScaLAPACK(Mat A,MatDuplicateOption op,Mat *B)
548: {
549: Mat Bs;
550: MPI_Comm comm;
551: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b;
553: PetscObjectGetComm((PetscObject)A,&comm);
554: MatCreate(comm,&Bs);
555: MatSetSizes(Bs,A->rmap->n,A->cmap->n,PETSC_DECIDE,PETSC_DECIDE);
556: MatSetType(Bs,MATSCALAPACK);
557: b = (Mat_ScaLAPACK*)Bs->data;
558: b->M = a->M;
559: b->N = a->N;
560: b->mb = a->mb;
561: b->nb = a->nb;
562: b->rsrc = a->rsrc;
563: b->csrc = a->csrc;
564: MatSetUp(Bs);
565: *B = Bs;
566: if (op == MAT_COPY_VALUES) {
567: PetscArraycpy(b->loc,a->loc,a->lld*a->locc);
568: }
569: Bs->assembled = PETSC_TRUE;
570: return 0;
571: }
573: static PetscErrorCode MatTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
574: {
575: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
576: Mat Bs = *B;
577: PetscBLASInt one=1;
578: PetscScalar sone=1.0,zero=0.0;
579: #if defined(PETSC_USE_COMPLEX)
580: PetscInt i,j;
581: #endif
583: if (reuse == MAT_INITIAL_MATRIX) {
584: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
585: *B = Bs;
586: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
587: b = (Mat_ScaLAPACK*)Bs->data;
588: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
589: #if defined(PETSC_USE_COMPLEX)
590: /* undo conjugation */
591: for (i=0;i<b->locr;i++) for (j=0;j<b->locc;j++) b->loc[i+j*b->lld] = PetscConj(b->loc[i+j*b->lld]);
592: #endif
593: Bs->assembled = PETSC_TRUE;
594: return 0;
595: }
597: static PetscErrorCode MatConjugate_ScaLAPACK(Mat A)
598: {
599: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
600: PetscInt i,j;
602: for (i=0;i<a->locr;i++) for (j=0;j<a->locc;j++) a->loc[i+j*a->lld] = PetscConj(a->loc[i+j*a->lld]);
603: return 0;
604: }
606: static PetscErrorCode MatHermitianTranspose_ScaLAPACK(Mat A,MatReuse reuse,Mat *B)
607: {
608: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data, *b;
609: Mat Bs = *B;
610: PetscBLASInt one=1;
611: PetscScalar sone=1.0,zero=0.0;
613: if (reuse == MAT_INITIAL_MATRIX) {
614: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->nb,a->mb,a->N,a->M,a->csrc,a->rsrc,&Bs);
615: *B = Bs;
616: } else SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Only MAT_INITIAL_MATRIX supported");
617: b = (Mat_ScaLAPACK*)Bs->data;
618: PetscStackCallBLAS("PBLAStran",PBLAStran_(&a->N,&a->M,&sone,a->loc,&one,&one,a->desc,&zero,b->loc,&one,&one,b->desc));
619: Bs->assembled = PETSC_TRUE;
620: return 0;
621: }
623: static PetscErrorCode MatSolve_ScaLAPACK(Mat A,Vec B,Vec X)
624: {
625: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
626: PetscScalar *x,*x2d;
627: const PetscInt *ranges;
628: PetscBLASInt xdesc[9],x2desc[9],mb,lszx,zero=0,one=1,xlld,nrhs=1,info;
630: VecCopy(B,X);
631: VecGetArray(X,&x);
633: /* create ScaLAPACK descriptor for a vector (1d block distribution) */
634: PetscLayoutGetRanges(A->rmap,&ranges);
635: PetscBLASIntCast(ranges[1],&mb); /* x block size */
636: xlld = PetscMax(1,A->rmap->n);
637: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(xdesc,&a->M,&one,&mb,&one,&zero,&zero,&a->grid->ictxcol,&xlld,&info));
640: /* allocate 2d vector */
641: lszx = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
642: PetscMalloc1(lszx,&x2d);
643: xlld = PetscMax(1,lszx);
645: /* create ScaLAPACK descriptor for a vector (2d block distribution) */
646: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(x2desc,&a->M,&one,&a->mb,&one,&zero,&zero,&a->grid->ictxt,&xlld,&info));
649: /* redistribute x as a column of a 2d matrix */
650: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x,&one,&one,xdesc,x2d,&one,&one,x2desc,&a->grid->ictxcol));
652: /* call ScaLAPACK subroutine */
653: switch (A->factortype) {
654: case MAT_FACTOR_LU:
655: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&nrhs,a->loc,&one,&one,a->desc,a->pivots,x2d,&one,&one,x2desc,&info));
657: break;
658: case MAT_FACTOR_CHOLESKY:
659: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&nrhs,a->loc,&one,&one,a->desc,x2d,&one,&one,x2desc,&info));
661: break;
662: default:
663: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
664: }
666: /* redistribute x from a column of a 2d matrix */
667: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&one,x2d,&one,&one,x2desc,x,&one,&one,xdesc,&a->grid->ictxcol));
669: PetscFree(x2d);
670: VecRestoreArray(X,&x);
671: return 0;
672: }
674: static PetscErrorCode MatSolveAdd_ScaLAPACK(Mat A,Vec B,Vec Y,Vec X)
675: {
676: MatSolve_ScaLAPACK(A,B,X);
677: VecAXPY(X,1,Y);
678: return 0;
679: }
681: static PetscErrorCode MatMatSolve_ScaLAPACK(Mat A,Mat B,Mat X)
682: {
683: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data,*b,*x;
684: PetscBool flg1,flg2;
685: PetscBLASInt one=1,info;
687: PetscObjectTypeCompare((PetscObject)B,MATSCALAPACK,&flg1);
688: PetscObjectTypeCompare((PetscObject)X,MATSCALAPACK,&flg2);
690: MatScaLAPACKCheckDistribution(B,1,X,2);
691: b = (Mat_ScaLAPACK*)B->data;
692: x = (Mat_ScaLAPACK*)X->data;
693: PetscArraycpy(x->loc,b->loc,b->lld*b->locc);
695: switch (A->factortype) {
696: case MAT_FACTOR_LU:
697: PetscStackCallBLAS("SCALAPACKgetrs",SCALAPACKgetrs_("N",&a->M,&x->N,a->loc,&one,&one,a->desc,a->pivots,x->loc,&one,&one,x->desc,&info));
699: break;
700: case MAT_FACTOR_CHOLESKY:
701: PetscStackCallBLAS("SCALAPACKpotrs",SCALAPACKpotrs_("L",&a->M,&x->N,a->loc,&one,&one,a->desc,x->loc,&one,&one,x->desc,&info));
703: break;
704: default:
705: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Unfactored Matrix or Unsupported MatFactorType");
706: }
707: return 0;
708: }
710: static PetscErrorCode MatLUFactor_ScaLAPACK(Mat A,IS row,IS col,const MatFactorInfo *factorinfo)
711: {
712: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
713: PetscBLASInt one=1,info;
715: if (!a->pivots) {
716: PetscMalloc1(a->locr+a->mb,&a->pivots);
717: PetscLogObjectMemory((PetscObject)A,a->locr*sizeof(PetscBLASInt));
718: }
719: PetscStackCallBLAS("SCALAPACKgetrf",SCALAPACKgetrf_(&a->M,&a->N,a->loc,&one,&one,a->desc,a->pivots,&info));
721: A->factortype = MAT_FACTOR_LU;
722: A->assembled = PETSC_TRUE;
724: PetscFree(A->solvertype);
725: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
726: return 0;
727: }
729: static PetscErrorCode MatLUFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
730: {
731: MatCopy(A,F,SAME_NONZERO_PATTERN);
732: MatLUFactor_ScaLAPACK(F,0,0,info);
733: return 0;
734: }
736: static PetscErrorCode MatLUFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS r,IS c,const MatFactorInfo *info)
737: {
738: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
739: return 0;
740: }
742: static PetscErrorCode MatCholeskyFactor_ScaLAPACK(Mat A,IS perm,const MatFactorInfo *factorinfo)
743: {
744: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
745: PetscBLASInt one=1,info;
747: PetscStackCallBLAS("SCALAPACKpotrf",SCALAPACKpotrf_("L",&a->M,a->loc,&one,&one,a->desc,&info));
749: A->factortype = MAT_FACTOR_CHOLESKY;
750: A->assembled = PETSC_TRUE;
752: PetscFree(A->solvertype);
753: PetscStrallocpy(MATSOLVERSCALAPACK,&A->solvertype);
754: return 0;
755: }
757: static PetscErrorCode MatCholeskyFactorNumeric_ScaLAPACK(Mat F,Mat A,const MatFactorInfo *info)
758: {
759: MatCopy(A,F,SAME_NONZERO_PATTERN);
760: MatCholeskyFactor_ScaLAPACK(F,0,info);
761: return 0;
762: }
764: static PetscErrorCode MatCholeskyFactorSymbolic_ScaLAPACK(Mat F,Mat A,IS perm,const MatFactorInfo *info)
765: {
766: /* F is created and allocated by MatGetFactor_scalapack_petsc(), skip this routine. */
767: return 0;
768: }
770: PetscErrorCode MatFactorGetSolverType_scalapack_scalapack(Mat A,MatSolverType *type)
771: {
772: *type = MATSOLVERSCALAPACK;
773: return 0;
774: }
776: static PetscErrorCode MatGetFactor_scalapack_scalapack(Mat A,MatFactorType ftype,Mat *F)
777: {
778: Mat B;
779: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
781: /* Create the factorization matrix */
782: MatCreateScaLAPACK(PetscObjectComm((PetscObject)A),a->mb,a->nb,a->M,a->N,a->rsrc,a->csrc,&B);
783: B->trivialsymbolic = PETSC_TRUE;
784: B->factortype = ftype;
785: PetscFree(B->solvertype);
786: PetscStrallocpy(MATSOLVERSCALAPACK,&B->solvertype);
788: PetscObjectComposeFunction((PetscObject)B,"MatFactorGetSolverType_C",MatFactorGetSolverType_scalapack_scalapack);
789: *F = B;
790: return 0;
791: }
793: PETSC_EXTERN PetscErrorCode MatSolverTypeRegister_ScaLAPACK(void)
794: {
795: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_LU,MatGetFactor_scalapack_scalapack);
796: MatSolverTypeRegister(MATSOLVERSCALAPACK,MATSCALAPACK,MAT_FACTOR_CHOLESKY,MatGetFactor_scalapack_scalapack);
797: return 0;
798: }
800: static PetscErrorCode MatNorm_ScaLAPACK(Mat A,NormType type,PetscReal *nrm)
801: {
802: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
803: PetscBLASInt one=1,lwork=0;
804: const char *ntype;
805: PetscScalar *work=NULL,dummy;
807: switch (type) {
808: case NORM_1:
809: ntype = "1";
810: lwork = PetscMax(a->locr,a->locc);
811: break;
812: case NORM_FROBENIUS:
813: ntype = "F";
814: work = &dummy;
815: break;
816: case NORM_INFINITY:
817: ntype = "I";
818: lwork = PetscMax(a->locr,a->locc);
819: break;
820: default:
821: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"Unsupported norm type");
822: }
823: if (lwork) PetscMalloc1(lwork,&work);
824: *nrm = SCALAPACKlange_(ntype,&a->M,&a->N,a->loc,&one,&one,a->desc,work);
825: if (lwork) PetscFree(work);
826: return 0;
827: }
829: static PetscErrorCode MatZeroEntries_ScaLAPACK(Mat A)
830: {
831: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
833: PetscArrayzero(a->loc,a->lld*a->locc);
834: return 0;
835: }
837: static PetscErrorCode MatGetOwnershipIS_ScaLAPACK(Mat A,IS *rows,IS *cols)
838: {
839: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
840: PetscInt i,n,nb,isrc,nproc,iproc,*idx;
842: if (rows) {
843: n = a->locr;
844: nb = a->mb;
845: isrc = a->rsrc;
846: nproc = a->grid->nprow;
847: iproc = a->grid->myrow;
848: PetscMalloc1(n,&idx);
849: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
850: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,rows);
851: }
852: if (cols) {
853: n = a->locc;
854: nb = a->nb;
855: isrc = a->csrc;
856: nproc = a->grid->npcol;
857: iproc = a->grid->mycol;
858: PetscMalloc1(n,&idx);
859: for (i=0;i<n;i++) idx[i] = nproc*nb*(i/nb) + i%nb + ((nproc+iproc-isrc)%nproc)*nb;
860: ISCreateGeneral(PETSC_COMM_SELF,n,idx,PETSC_OWN_POINTER,cols);
861: }
862: return 0;
863: }
865: static PetscErrorCode MatConvert_ScaLAPACK_Dense(Mat A,MatType newtype,MatReuse reuse,Mat *B)
866: {
867: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
868: Mat Bmpi;
869: MPI_Comm comm;
870: PetscInt i,M=A->rmap->N,N=A->cmap->N,m,n,rstart,rend,nz;
871: const PetscInt *ranges,*branges,*cwork;
872: const PetscScalar *vwork;
873: PetscBLASInt bdesc[9],bmb,zero=0,one=1,lld,info;
874: PetscScalar *barray;
875: PetscBool differ=PETSC_FALSE;
876: PetscMPIInt size;
878: PetscObjectGetComm((PetscObject)A,&comm);
879: PetscLayoutGetRanges(A->rmap,&ranges);
881: if (reuse == MAT_REUSE_MATRIX) { /* check if local sizes differ in A and B */
882: MPI_Comm_size(comm,&size);
883: PetscLayoutGetRanges((*B)->rmap,&branges);
884: for (i=0;i<size;i++) if (ranges[i+1]!=branges[i+1]) { differ=PETSC_TRUE; break; }
885: }
887: if (reuse == MAT_REUSE_MATRIX && differ) { /* special case, use auxiliary dense matrix */
888: MatCreate(comm,&Bmpi);
889: m = PETSC_DECIDE;
890: PetscSplitOwnershipEqual(comm,&m,&M);
891: n = PETSC_DECIDE;
892: PetscSplitOwnershipEqual(comm,&n,&N);
893: MatSetSizes(Bmpi,m,n,M,N);
894: MatSetType(Bmpi,MATDENSE);
895: MatSetUp(Bmpi);
897: /* create ScaLAPACK descriptor for B (1d block distribution) */
898: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
899: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
900: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
903: /* redistribute matrix */
904: MatDenseGetArray(Bmpi,&barray);
905: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
906: MatDenseRestoreArray(Bmpi,&barray);
907: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
908: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
910: /* transfer rows of auxiliary matrix to the final matrix B */
911: MatGetOwnershipRange(Bmpi,&rstart,&rend);
912: for (i=rstart;i<rend;i++) {
913: MatGetRow(Bmpi,i,&nz,&cwork,&vwork);
914: MatSetValues(*B,1,&i,nz,cwork,vwork,INSERT_VALUES);
915: MatRestoreRow(Bmpi,i,&nz,&cwork,&vwork);
916: }
917: MatAssemblyBegin(*B,MAT_FINAL_ASSEMBLY);
918: MatAssemblyEnd(*B,MAT_FINAL_ASSEMBLY);
919: MatDestroy(&Bmpi);
921: } else { /* normal cases */
923: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
924: else {
925: MatCreate(comm,&Bmpi);
926: m = PETSC_DECIDE;
927: PetscSplitOwnershipEqual(comm,&m,&M);
928: n = PETSC_DECIDE;
929: PetscSplitOwnershipEqual(comm,&n,&N);
930: MatSetSizes(Bmpi,m,n,M,N);
931: MatSetType(Bmpi,MATDENSE);
932: MatSetUp(Bmpi);
933: }
935: /* create ScaLAPACK descriptor for B (1d block distribution) */
936: PetscBLASIntCast(ranges[1],&bmb); /* row block size */
937: lld = PetscMax(A->rmap->n,1); /* local leading dimension */
938: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(bdesc,&a->M,&a->N,&bmb,&a->N,&zero,&zero,&a->grid->ictxcol,&lld,&info));
941: /* redistribute matrix */
942: MatDenseGetArray(Bmpi,&barray);
943: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&a->M,&a->N,a->loc,&one,&one,a->desc,barray,&one,&one,bdesc,&a->grid->ictxcol));
944: MatDenseRestoreArray(Bmpi,&barray);
946: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
947: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
948: if (reuse == MAT_INPLACE_MATRIX) {
949: MatHeaderReplace(A,&Bmpi);
950: } else *B = Bmpi;
951: }
952: return 0;
953: }
955: PETSC_INTERN PetscErrorCode MatConvert_Dense_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *B)
956: {
957: Mat_ScaLAPACK *b;
958: Mat Bmpi;
959: MPI_Comm comm;
960: PetscInt M=A->rmap->N,N=A->cmap->N,m,n;
961: const PetscInt *ranges;
962: PetscBLASInt adesc[9],amb,zero=0,one=1,lld,info;
963: PetscScalar *aarray;
964: PetscInt lda;
966: PetscObjectGetComm((PetscObject)A,&comm);
968: if (reuse == MAT_REUSE_MATRIX) Bmpi = *B;
969: else {
970: MatCreate(comm,&Bmpi);
971: m = PETSC_DECIDE;
972: PetscSplitOwnershipEqual(comm,&m,&M);
973: n = PETSC_DECIDE;
974: PetscSplitOwnershipEqual(comm,&n,&N);
975: MatSetSizes(Bmpi,m,n,M,N);
976: MatSetType(Bmpi,MATSCALAPACK);
977: MatSetUp(Bmpi);
978: }
979: b = (Mat_ScaLAPACK*)Bmpi->data;
981: /* create ScaLAPACK descriptor for A (1d block distribution) */
982: PetscLayoutGetRanges(A->rmap,&ranges);
983: PetscBLASIntCast(ranges[1],&amb); /* row block size */
984: MatDenseGetLDA(A,&lda);
985: lld = PetscMax(lda,1); /* local leading dimension */
986: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(adesc,&b->M,&b->N,&amb,&b->N,&zero,&zero,&b->grid->ictxcol,&lld,&info));
989: /* redistribute matrix */
990: MatDenseGetArray(A,&aarray);
991: PetscStackCallBLAS("SCALAPACKgemr2d",SCALAPACKgemr2d_(&b->M,&b->N,aarray,&one,&one,adesc,b->loc,&one,&one,b->desc,&b->grid->ictxcol));
992: MatDenseRestoreArray(A,&aarray);
994: MatAssemblyBegin(Bmpi,MAT_FINAL_ASSEMBLY);
995: MatAssemblyEnd(Bmpi,MAT_FINAL_ASSEMBLY);
996: if (reuse == MAT_INPLACE_MATRIX) {
997: MatHeaderReplace(A,&Bmpi);
998: } else *B = Bmpi;
999: return 0;
1000: }
1002: PETSC_INTERN PetscErrorCode MatConvert_AIJ_ScaLAPACK(Mat A,MatType newtype,MatReuse reuse,Mat *newmat)
1003: {
1004: Mat mat_scal;
1005: PetscInt M=A->rmap->N,N=A->cmap->N,rstart=A->rmap->rstart,rend=A->rmap->rend,m,n,row,ncols;
1006: const PetscInt *cols;
1007: const PetscScalar *vals;
1009: if (reuse == MAT_REUSE_MATRIX) {
1010: mat_scal = *newmat;
1011: MatZeroEntries(mat_scal);
1012: } else {
1013: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1014: m = PETSC_DECIDE;
1015: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1016: n = PETSC_DECIDE;
1017: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1018: MatSetSizes(mat_scal,m,n,M,N);
1019: MatSetType(mat_scal,MATSCALAPACK);
1020: MatSetUp(mat_scal);
1021: }
1022: for (row=rstart;row<rend;row++) {
1023: MatGetRow(A,row,&ncols,&cols,&vals);
1024: MatSetValues(mat_scal,1,&row,ncols,cols,vals,INSERT_VALUES);
1025: MatRestoreRow(A,row,&ncols,&cols,&vals);
1026: }
1027: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1028: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1030: if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A,&mat_scal);
1031: else *newmat = mat_scal;
1032: return 0;
1033: }
1035: PETSC_INTERN PetscErrorCode MatConvert_SBAIJ_ScaLAPACK(Mat A, MatType newtype,MatReuse reuse,Mat *newmat)
1036: {
1037: Mat mat_scal;
1038: PetscInt M=A->rmap->N,N=A->cmap->N,m,n,row,ncols,j,rstart=A->rmap->rstart,rend=A->rmap->rend;
1039: const PetscInt *cols;
1040: const PetscScalar *vals;
1041: PetscScalar v;
1043: if (reuse == MAT_REUSE_MATRIX) {
1044: mat_scal = *newmat;
1045: MatZeroEntries(mat_scal);
1046: } else {
1047: MatCreate(PetscObjectComm((PetscObject)A),&mat_scal);
1048: m = PETSC_DECIDE;
1049: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&m,&M);
1050: n = PETSC_DECIDE;
1051: PetscSplitOwnershipEqual(PetscObjectComm((PetscObject)A),&n,&N);
1052: MatSetSizes(mat_scal,m,n,M,N);
1053: MatSetType(mat_scal,MATSCALAPACK);
1054: MatSetUp(mat_scal);
1055: }
1056: MatGetRowUpperTriangular(A);
1057: for (row=rstart;row<rend;row++) {
1058: MatGetRow(A,row,&ncols,&cols,&vals);
1059: MatSetValues(mat_scal,1,&row,ncols,cols,vals,ADD_VALUES);
1060: for (j=0;j<ncols;j++) { /* lower triangular part */
1061: if (cols[j] == row) continue;
1062: v = A->hermitian ? PetscConj(vals[j]) : vals[j];
1063: MatSetValues(mat_scal,1,&cols[j],1,&row,&v,ADD_VALUES);
1064: }
1065: MatRestoreRow(A,row,&ncols,&cols,&vals);
1066: }
1067: MatRestoreRowUpperTriangular(A);
1068: MatAssemblyBegin(mat_scal,MAT_FINAL_ASSEMBLY);
1069: MatAssemblyEnd(mat_scal,MAT_FINAL_ASSEMBLY);
1071: if (reuse == MAT_INPLACE_MATRIX) MatHeaderReplace(A,&mat_scal);
1072: else *newmat = mat_scal;
1073: return 0;
1074: }
1076: static PetscErrorCode MatScaLAPACKSetPreallocation(Mat A)
1077: {
1078: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1079: PetscInt sz=0;
1081: PetscLayoutSetUp(A->rmap);
1082: PetscLayoutSetUp(A->cmap);
1083: if (!a->lld) a->lld = a->locr;
1085: PetscFree(a->loc);
1086: PetscIntMultError(a->lld,a->locc,&sz);
1087: PetscCalloc1(sz,&a->loc);
1088: PetscLogObjectMemory((PetscObject)A,sz*sizeof(PetscScalar));
1090: A->preallocated = PETSC_TRUE;
1091: return 0;
1092: }
1094: static PetscErrorCode MatDestroy_ScaLAPACK(Mat A)
1095: {
1096: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1097: Mat_ScaLAPACK_Grid *grid;
1098: PetscBool flg;
1099: MPI_Comm icomm;
1101: MatStashDestroy_Private(&A->stash);
1102: PetscFree(a->loc);
1103: PetscFree(a->pivots);
1104: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1105: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1106: if (--grid->grid_refct == 0) {
1107: Cblacs_gridexit(grid->ictxt);
1108: Cblacs_gridexit(grid->ictxrow);
1109: Cblacs_gridexit(grid->ictxcol);
1110: PetscFree(grid);
1111: MPI_Comm_delete_attr(icomm,Petsc_ScaLAPACK_keyval);
1112: }
1113: PetscCommDestroy(&icomm);
1114: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",NULL);
1115: PetscObjectComposeFunction((PetscObject)A,"MatFactorGetSolverType_C",NULL);
1116: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",NULL);
1117: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",NULL);
1118: PetscFree(A->data);
1119: return 0;
1120: }
1122: static inline PetscErrorCode MatScaLAPACKCheckLayout(PetscLayout map)
1123: {
1124: const PetscInt *ranges;
1125: PetscMPIInt size;
1126: PetscInt i,n;
1128: MPI_Comm_size(map->comm,&size);
1129: if (size>2) {
1130: PetscLayoutGetRanges(map,&ranges);
1131: n = ranges[1]-ranges[0];
1132: for (i=1;i<size-1;i++) if (ranges[i+1]-ranges[i]!=n) break;
1134: }
1135: return 0;
1136: }
1138: PetscErrorCode MatSetUp_ScaLAPACK(Mat A)
1139: {
1140: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1141: PetscBLASInt info=0;
1143: PetscLayoutSetUp(A->rmap);
1144: PetscLayoutSetUp(A->cmap);
1146: /* check that the layout is as enforced by MatCreateScaLAPACK */
1147: MatScaLAPACKCheckLayout(A->rmap);
1148: MatScaLAPACKCheckLayout(A->cmap);
1150: /* compute local sizes */
1151: PetscBLASIntCast(A->rmap->N,&a->M);
1152: PetscBLASIntCast(A->cmap->N,&a->N);
1153: a->locr = SCALAPACKnumroc_(&a->M,&a->mb,&a->grid->myrow,&a->rsrc,&a->grid->nprow);
1154: a->locc = SCALAPACKnumroc_(&a->N,&a->nb,&a->grid->mycol,&a->csrc,&a->grid->npcol);
1155: a->lld = PetscMax(1,a->locr);
1157: /* allocate local array */
1158: MatScaLAPACKSetPreallocation(A);
1160: /* set up ScaLAPACK descriptor */
1161: PetscStackCallBLAS("SCALAPACKdescinit",SCALAPACKdescinit_(a->desc,&a->M,&a->N,&a->mb,&a->nb,&a->rsrc,&a->csrc,&a->grid->ictxt,&a->lld,&info));
1163: return 0;
1164: }
1166: PetscErrorCode MatAssemblyBegin_ScaLAPACK(Mat A,MatAssemblyType type)
1167: {
1168: PetscInt nstash,reallocs;
1170: if (A->nooffprocentries) return 0;
1171: MatStashScatterBegin_Private(A,&A->stash,NULL);
1172: MatStashGetInfo_Private(&A->stash,&nstash,&reallocs);
1173: PetscInfo(A,"Stash has %" PetscInt_FMT " entries, uses %" PetscInt_FMT " mallocs.\n",nstash,reallocs);
1174: return 0;
1175: }
1177: PetscErrorCode MatAssemblyEnd_ScaLAPACK(Mat A,MatAssemblyType type)
1178: {
1179: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1180: PetscMPIInt n;
1181: PetscInt i,flg,*row,*col;
1182: PetscScalar *val;
1183: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1185: if (A->nooffprocentries) return 0;
1186: while (1) {
1187: MatStashScatterGetMesg_Private(&A->stash,&n,&row,&col,&val,&flg);
1188: if (!flg) break;
1189: for (i=0;i<n;i++) {
1190: PetscBLASIntCast(row[i]+1,&gridx);
1191: PetscBLASIntCast(col[i]+1,&gcidx);
1192: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1194: switch (A->insertmode) {
1195: case INSERT_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] = val[i]; break;
1196: case ADD_VALUES: a->loc[lridx-1+(lcidx-1)*a->lld] += val[i]; break;
1197: default: SETERRQ(PetscObjectComm((PetscObject)A),PETSC_ERR_SUP,"No support for InsertMode %d",(int)A->insertmode);
1198: }
1199: }
1200: }
1201: MatStashScatterEnd_Private(&A->stash);
1202: return 0;
1203: }
1205: PetscErrorCode MatLoad_ScaLAPACK(Mat newMat,PetscViewer viewer)
1206: {
1207: Mat Adense,As;
1208: MPI_Comm comm;
1210: PetscObjectGetComm((PetscObject)newMat,&comm);
1211: MatCreate(comm,&Adense);
1212: MatSetType(Adense,MATDENSE);
1213: MatLoad(Adense,viewer);
1214: MatConvert(Adense,MATSCALAPACK,MAT_INITIAL_MATRIX,&As);
1215: MatDestroy(&Adense);
1216: MatHeaderReplace(newMat,&As);
1217: return 0;
1218: }
1220: /* -------------------------------------------------------------------*/
1221: static struct _MatOps MatOps_Values = {
1222: MatSetValues_ScaLAPACK,
1223: 0,
1224: 0,
1225: MatMult_ScaLAPACK,
1226: /* 4*/ MatMultAdd_ScaLAPACK,
1227: MatMultTranspose_ScaLAPACK,
1228: MatMultTransposeAdd_ScaLAPACK,
1229: MatSolve_ScaLAPACK,
1230: MatSolveAdd_ScaLAPACK,
1231: 0,
1232: /*10*/ 0,
1233: MatLUFactor_ScaLAPACK,
1234: MatCholeskyFactor_ScaLAPACK,
1235: 0,
1236: MatTranspose_ScaLAPACK,
1237: /*15*/ MatGetInfo_ScaLAPACK,
1238: 0,
1239: MatGetDiagonal_ScaLAPACK,
1240: MatDiagonalScale_ScaLAPACK,
1241: MatNorm_ScaLAPACK,
1242: /*20*/ MatAssemblyBegin_ScaLAPACK,
1243: MatAssemblyEnd_ScaLAPACK,
1244: MatSetOption_ScaLAPACK,
1245: MatZeroEntries_ScaLAPACK,
1246: /*24*/ 0,
1247: MatLUFactorSymbolic_ScaLAPACK,
1248: MatLUFactorNumeric_ScaLAPACK,
1249: MatCholeskyFactorSymbolic_ScaLAPACK,
1250: MatCholeskyFactorNumeric_ScaLAPACK,
1251: /*29*/ MatSetUp_ScaLAPACK,
1252: 0,
1253: 0,
1254: 0,
1255: 0,
1256: /*34*/ MatDuplicate_ScaLAPACK,
1257: 0,
1258: 0,
1259: 0,
1260: 0,
1261: /*39*/ MatAXPY_ScaLAPACK,
1262: 0,
1263: 0,
1264: 0,
1265: MatCopy_ScaLAPACK,
1266: /*44*/ 0,
1267: MatScale_ScaLAPACK,
1268: MatShift_ScaLAPACK,
1269: 0,
1270: 0,
1271: /*49*/ 0,
1272: 0,
1273: 0,
1274: 0,
1275: 0,
1276: /*54*/ 0,
1277: 0,
1278: 0,
1279: 0,
1280: 0,
1281: /*59*/ 0,
1282: MatDestroy_ScaLAPACK,
1283: MatView_ScaLAPACK,
1284: 0,
1285: 0,
1286: /*64*/ 0,
1287: 0,
1288: 0,
1289: 0,
1290: 0,
1291: /*69*/ 0,
1292: 0,
1293: MatConvert_ScaLAPACK_Dense,
1294: 0,
1295: 0,
1296: /*74*/ 0,
1297: 0,
1298: 0,
1299: 0,
1300: 0,
1301: /*79*/ 0,
1302: 0,
1303: 0,
1304: 0,
1305: MatLoad_ScaLAPACK,
1306: /*84*/ 0,
1307: 0,
1308: 0,
1309: 0,
1310: 0,
1311: /*89*/ 0,
1312: 0,
1313: MatMatMultNumeric_ScaLAPACK,
1314: 0,
1315: 0,
1316: /*94*/ 0,
1317: 0,
1318: 0,
1319: MatMatTransposeMultNumeric_ScaLAPACK,
1320: 0,
1321: /*99*/ MatProductSetFromOptions_ScaLAPACK,
1322: 0,
1323: 0,
1324: MatConjugate_ScaLAPACK,
1325: 0,
1326: /*104*/0,
1327: 0,
1328: 0,
1329: 0,
1330: 0,
1331: /*109*/MatMatSolve_ScaLAPACK,
1332: 0,
1333: 0,
1334: 0,
1335: MatMissingDiagonal_ScaLAPACK,
1336: /*114*/0,
1337: 0,
1338: 0,
1339: 0,
1340: 0,
1341: /*119*/0,
1342: MatHermitianTranspose_ScaLAPACK,
1343: 0,
1344: 0,
1345: 0,
1346: /*124*/0,
1347: 0,
1348: 0,
1349: 0,
1350: 0,
1351: /*129*/0,
1352: 0,
1353: 0,
1354: 0,
1355: 0,
1356: /*134*/0,
1357: 0,
1358: 0,
1359: 0,
1360: 0,
1361: 0,
1362: /*140*/0,
1363: 0,
1364: 0,
1365: 0,
1366: 0,
1367: /*145*/0,
1368: 0,
1369: 0
1370: };
1372: static PetscErrorCode MatStashScatterBegin_ScaLAPACK(Mat mat,MatStash *stash,PetscInt *owners)
1373: {
1374: PetscInt *owner,*startv,*starti,tag1=stash->tag1,tag2=stash->tag2,bs2;
1375: PetscInt size=stash->size,nsends;
1376: PetscInt count,*sindices,**rindices,i,j,l;
1377: PetscScalar **rvalues,*svalues;
1378: MPI_Comm comm = stash->comm;
1379: MPI_Request *send_waits,*recv_waits,*recv_waits1,*recv_waits2;
1380: PetscMPIInt *sizes,*nlengths,nreceives;
1381: PetscInt *sp_idx,*sp_idy;
1382: PetscScalar *sp_val;
1383: PetscMatStashSpace space,space_next;
1384: PetscBLASInt gridx,gcidx,lridx,lcidx,rsrc,csrc;
1385: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)mat->data;
1387: { /* make sure all processors are either in INSERTMODE or ADDMODE */
1388: InsertMode addv;
1389: MPIU_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));
1391: mat->insertmode = addv; /* in case this processor had no cache */
1392: }
1394: bs2 = stash->bs*stash->bs;
1396: /* first count number of contributors to each processor */
1397: PetscCalloc1(size,&nlengths);
1398: PetscMalloc1(stash->n+1,&owner);
1400: i = j = 0;
1401: space = stash->space_head;
1402: while (space) {
1403: space_next = space->next;
1404: for (l=0; l<space->local_used; l++) {
1405: PetscBLASIntCast(space->idx[l]+1,&gridx);
1406: PetscBLASIntCast(space->idy[l]+1,&gcidx);
1407: PetscStackCallBLAS("SCALAPACKinfog2l",SCALAPACKinfog2l_(&gridx,&gcidx,a->desc,&a->grid->nprow,&a->grid->npcol,&a->grid->myrow,&a->grid->mycol,&lridx,&lcidx,&rsrc,&csrc));
1408: j = Cblacs_pnum(a->grid->ictxt,rsrc,csrc);
1409: nlengths[j]++; owner[i] = j;
1410: i++;
1411: }
1412: space = space_next;
1413: }
1415: /* Now check what procs get messages - and compute nsends. */
1416: PetscCalloc1(size,&sizes);
1417: for (i=0, nsends=0; i<size; i++) {
1418: if (nlengths[i]) {
1419: sizes[i] = 1; nsends++;
1420: }
1421: }
1423: {PetscMPIInt *onodes,*olengths;
1424: /* Determine the number of messages to expect, their lengths, from from-ids */
1425: PetscGatherNumberOfMessages(comm,sizes,nlengths,&nreceives);
1426: PetscGatherMessageLengths(comm,nsends,nreceives,nlengths,&onodes,&olengths);
1427: /* since clubbing row,col - lengths are multiplied by 2 */
1428: for (i=0; i<nreceives; i++) olengths[i] *=2;
1429: PetscPostIrecvInt(comm,tag1,nreceives,onodes,olengths,&rindices,&recv_waits1);
1430: /* values are size 'bs2' lengths (and remove earlier factor 2 */
1431: for (i=0; i<nreceives; i++) olengths[i] = olengths[i]*bs2/2;
1432: PetscPostIrecvScalar(comm,tag2,nreceives,onodes,olengths,&rvalues,&recv_waits2);
1433: PetscFree(onodes);
1434: PetscFree(olengths);}
1436: /* do sends:
1437: 1) starts[i] gives the starting index in svalues for stuff going to
1438: the ith processor
1439: */
1440: PetscMalloc2(bs2*stash->n,&svalues,2*(stash->n+1),&sindices);
1441: PetscMalloc1(2*nsends,&send_waits);
1442: PetscMalloc2(size,&startv,size,&starti);
1443: /* use 2 sends the first with all_a, the next with all_i and all_j */
1444: startv[0] = 0; starti[0] = 0;
1445: for (i=1; i<size; i++) {
1446: startv[i] = startv[i-1] + nlengths[i-1];
1447: starti[i] = starti[i-1] + 2*nlengths[i-1];
1448: }
1450: i = 0;
1451: space = stash->space_head;
1452: while (space) {
1453: space_next = space->next;
1454: sp_idx = space->idx;
1455: sp_idy = space->idy;
1456: sp_val = space->val;
1457: for (l=0; l<space->local_used; l++) {
1458: j = owner[i];
1459: if (bs2 == 1) {
1460: svalues[startv[j]] = sp_val[l];
1461: } else {
1462: PetscInt k;
1463: PetscScalar *buf1,*buf2;
1464: buf1 = svalues+bs2*startv[j];
1465: buf2 = space->val + bs2*l;
1466: for (k=0; k<bs2; k++) buf1[k] = buf2[k];
1467: }
1468: sindices[starti[j]] = sp_idx[l];
1469: sindices[starti[j]+nlengths[j]] = sp_idy[l];
1470: startv[j]++;
1471: starti[j]++;
1472: i++;
1473: }
1474: space = space_next;
1475: }
1476: startv[0] = 0;
1477: for (i=1; i<size; i++) startv[i] = startv[i-1] + nlengths[i-1];
1479: for (i=0,count=0; i<size; i++) {
1480: if (sizes[i]) {
1481: MPI_Isend(sindices+2*startv[i],2*nlengths[i],MPIU_INT,i,tag1,comm,send_waits+count++);
1482: MPI_Isend(svalues+bs2*startv[i],bs2*nlengths[i],MPIU_SCALAR,i,tag2,comm,send_waits+count++);
1483: }
1484: }
1485: #if defined(PETSC_USE_INFO)
1486: PetscInfo(NULL,"No of messages: %" PetscInt_FMT "\n",nsends);
1487: for (i=0; i<size; i++) {
1488: if (sizes[i]) {
1489: PetscInfo(NULL,"Mesg_to: %" PetscInt_FMT ": size: %zu bytes\n",i,(size_t)(nlengths[i]*(bs2*sizeof(PetscScalar)+2*sizeof(PetscInt))));
1490: }
1491: }
1492: #endif
1493: PetscFree(nlengths);
1494: PetscFree(owner);
1495: PetscFree2(startv,starti);
1496: PetscFree(sizes);
1498: /* recv_waits need to be contiguous for MatStashScatterGetMesg_Private() */
1499: PetscMalloc1(2*nreceives,&recv_waits);
1501: for (i=0; i<nreceives; i++) {
1502: recv_waits[2*i] = recv_waits1[i];
1503: recv_waits[2*i+1] = recv_waits2[i];
1504: }
1505: stash->recv_waits = recv_waits;
1507: PetscFree(recv_waits1);
1508: PetscFree(recv_waits2);
1510: stash->svalues = svalues;
1511: stash->sindices = sindices;
1512: stash->rvalues = rvalues;
1513: stash->rindices = rindices;
1514: stash->send_waits = send_waits;
1515: stash->nsends = nsends;
1516: stash->nrecvs = nreceives;
1517: stash->reproduce_count = 0;
1518: return 0;
1519: }
1521: static PetscErrorCode MatScaLAPACKSetBlockSizes_ScaLAPACK(Mat A,PetscInt mb,PetscInt nb)
1522: {
1523: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1528: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1529: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1530: return 0;
1531: }
1533: /*@
1534: MatScaLAPACKSetBlockSizes - Sets the block sizes to be used for the distibution of
1535: the ScaLAPACK matrix
1537: Logically Collective on A
1539: Input Parameters:
1540: + A - a MATSCALAPACK matrix
1541: . mb - the row block size
1542: - nb - the column block size
1544: Level: intermediate
1546: .seealso: MatCreateScaLAPACK(), MatScaLAPACKGetBlockSizes()
1547: @*/
1548: PetscErrorCode MatScaLAPACKSetBlockSizes(Mat A,PetscInt mb,PetscInt nb)
1549: {
1553: PetscTryMethod(A,"MatScaLAPACKSetBlockSizes_C",(Mat,PetscInt,PetscInt),(A,mb,nb));
1554: return 0;
1555: }
1557: static PetscErrorCode MatScaLAPACKGetBlockSizes_ScaLAPACK(Mat A,PetscInt *mb,PetscInt *nb)
1558: {
1559: Mat_ScaLAPACK *a = (Mat_ScaLAPACK*)A->data;
1561: if (mb) *mb = a->mb;
1562: if (nb) *nb = a->nb;
1563: return 0;
1564: }
1566: /*@
1567: MatScaLAPACKGetBlockSizes - Gets the block sizes used in the distibution of
1568: the ScaLAPACK matrix
1570: Not collective
1572: Input Parameter:
1573: . A - a MATSCALAPACK matrix
1575: Output Parameters:
1576: + mb - the row block size
1577: - nb - the column block size
1579: Level: intermediate
1581: .seealso: MatCreateScaLAPACK(), MatScaLAPACKSetBlockSizes()
1582: @*/
1583: PetscErrorCode MatScaLAPACKGetBlockSizes(Mat A,PetscInt *mb,PetscInt *nb)
1584: {
1586: PetscUseMethod(A,"MatScaLAPACKGetBlockSizes_C",(Mat,PetscInt*,PetscInt*),(A,mb,nb));
1587: return 0;
1588: }
1590: PETSC_INTERN PetscErrorCode MatStashScatterGetMesg_Ref(MatStash*,PetscMPIInt*,PetscInt**,PetscInt**,PetscScalar**,PetscInt*);
1591: PETSC_INTERN PetscErrorCode MatStashScatterEnd_Ref(MatStash*);
1593: /*MC
1594: MATSCALAPACK = "scalapack" - A matrix type for dense matrices using the ScaLAPACK package
1596: Use ./configure --download-scalapack to install PETSc to use ScaLAPACK
1598: Use -pc_type lu -pc_factor_mat_solver_type scalapack to use this direct solver
1600: Options Database Keys:
1601: + -mat_type scalapack - sets the matrix type to "scalapack" during a call to MatSetFromOptions()
1602: . -mat_scalapack_grid_height - sets Grid Height for 2D cyclic ordering of internal matrix
1603: - -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1605: Level: beginner
1607: .seealso: MATDENSE, MATELEMENTAL
1608: M*/
1610: PETSC_EXTERN PetscErrorCode MatCreate_ScaLAPACK(Mat A)
1611: {
1612: Mat_ScaLAPACK *a;
1613: PetscErrorCode ierr;
1614: PetscBool flg,flg1;
1615: Mat_ScaLAPACK_Grid *grid;
1616: MPI_Comm icomm;
1617: PetscBLASInt nprow,npcol,myrow,mycol;
1618: PetscInt optv1,k=2,array[2]={0,0};
1619: PetscMPIInt size;
1621: PetscMemcpy(A->ops,&MatOps_Values,sizeof(struct _MatOps));
1622: A->insertmode = NOT_SET_VALUES;
1624: MatStashCreate_Private(PetscObjectComm((PetscObject)A),1,&A->stash);
1625: A->stash.ScatterBegin = MatStashScatterBegin_ScaLAPACK;
1626: A->stash.ScatterGetMesg = MatStashScatterGetMesg_Ref;
1627: A->stash.ScatterEnd = MatStashScatterEnd_Ref;
1628: A->stash.ScatterDestroy = NULL;
1630: PetscNewLog(A,&a);
1631: A->data = (void*)a;
1633: /* Grid needs to be shared between multiple Mats on the same communicator, implement by attribute caching on the MPI_Comm */
1634: if (Petsc_ScaLAPACK_keyval == MPI_KEYVAL_INVALID) {
1635: MPI_Comm_create_keyval(MPI_COMM_NULL_COPY_FN,MPI_COMM_NULL_DELETE_FN,&Petsc_ScaLAPACK_keyval,(void*)0);
1636: PetscRegisterFinalize(Petsc_ScaLAPACK_keyval_free);
1637: PetscCitationsRegister(ScaLAPACKCitation,&ScaLAPACKCite);
1638: }
1639: PetscCommDuplicate(PetscObjectComm((PetscObject)A),&icomm,NULL);
1640: MPI_Comm_get_attr(icomm,Petsc_ScaLAPACK_keyval,(void**)&grid,(int*)&flg);
1641: if (!flg) {
1642: PetscNewLog(A,&grid);
1644: MPI_Comm_size(icomm,&size);
1645: grid->nprow = (PetscInt) (PetscSqrtReal((PetscReal)size) + 0.001);
1647: PetscOptionsBegin(PetscObjectComm((PetscObject)A),((PetscObject)A)->prefix,"ScaLAPACK Grid Options","Mat");
1648: PetscOptionsInt("-mat_scalapack_grid_height","Grid Height","None",grid->nprow,&optv1,&flg1);
1649: if (flg1) {
1651: grid->nprow = optv1;
1652: }
1653: PetscOptionsEnd();
1655: if (size % grid->nprow) grid->nprow = 1; /* cannot use a squarish grid, use a 1d grid */
1656: grid->npcol = size/grid->nprow;
1657: PetscBLASIntCast(grid->nprow,&nprow);
1658: PetscBLASIntCast(grid->npcol,&npcol);
1659: grid->ictxt = Csys2blacs_handle(icomm);
1660: Cblacs_gridinit(&grid->ictxt,"R",nprow,npcol);
1661: Cblacs_gridinfo(grid->ictxt,&nprow,&npcol,&myrow,&mycol);
1662: grid->grid_refct = 1;
1663: grid->nprow = nprow;
1664: grid->npcol = npcol;
1665: grid->myrow = myrow;
1666: grid->mycol = mycol;
1667: /* auxiliary 1d BLACS contexts for 1xsize and sizex1 grids */
1668: grid->ictxrow = Csys2blacs_handle(icomm);
1669: Cblacs_gridinit(&grid->ictxrow,"R",1,size);
1670: grid->ictxcol = Csys2blacs_handle(icomm);
1671: Cblacs_gridinit(&grid->ictxcol,"R",size,1);
1672: MPI_Comm_set_attr(icomm,Petsc_ScaLAPACK_keyval,(void*)grid);
1674: } else grid->grid_refct++;
1675: PetscCommDestroy(&icomm);
1676: a->grid = grid;
1677: a->mb = DEFAULT_BLOCKSIZE;
1678: a->nb = DEFAULT_BLOCKSIZE;
1680: PetscOptionsBegin(PetscObjectComm((PetscObject)A),NULL,"ScaLAPACK Options","Mat");
1681: PetscOptionsIntArray("-mat_scalapack_block_sizes","Size of the blocks to use (one or two comma-separated integers)","MatCreateScaLAPACK",array,&k,&flg);
1682: if (flg) {
1683: a->mb = array[0];
1684: a->nb = (k>1)? array[1]: a->mb;
1685: }
1686: PetscOptionsEnd();
1688: PetscObjectComposeFunction((PetscObject)A,"MatGetOwnershipIS_C",MatGetOwnershipIS_ScaLAPACK);
1689: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKSetBlockSizes_C",MatScaLAPACKSetBlockSizes_ScaLAPACK);
1690: PetscObjectComposeFunction((PetscObject)A,"MatScaLAPACKGetBlockSizes_C",MatScaLAPACKGetBlockSizes_ScaLAPACK);
1691: PetscObjectChangeTypeName((PetscObject)A,MATSCALAPACK);
1692: return 0;
1693: }
1695: /*@C
1696: MatCreateScaLAPACK - Creates a dense parallel matrix in ScaLAPACK format
1697: (2D block cyclic distribution).
1699: Collective
1701: Input Parameters:
1702: + comm - MPI communicator
1703: . mb - row block size (or PETSC_DECIDE to have it set)
1704: . nb - column block size (or PETSC_DECIDE to have it set)
1705: . M - number of global rows
1706: . N - number of global columns
1707: . rsrc - coordinate of process that owns the first row of the distributed matrix
1708: - csrc - coordinate of process that owns the first column of the distributed matrix
1710: Output Parameter:
1711: . A - the matrix
1713: Options Database Keys:
1714: . -mat_scalapack_block_sizes - size of the blocks to use (one or two integers separated by comma)
1716: It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1717: MatXXXXSetPreallocation() paradigm instead of this routine directly.
1718: [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1720: Notes:
1721: If PETSC_DECIDE is used for the block sizes, then an appropriate value
1722: is chosen.
1724: Storage Information:
1725: Storate is completely managed by ScaLAPACK, so this requires PETSc to be
1726: configured with ScaLAPACK. In particular, PETSc's local sizes lose
1727: significance and are thus ignored. The block sizes refer to the values
1728: used for the distributed matrix, not the same meaning as in BAIJ.
1730: Level: intermediate
1732: .seealso: MatCreate(), MatCreateDense(), MatSetValues()
1733: @*/
1734: PetscErrorCode MatCreateScaLAPACK(MPI_Comm comm,PetscInt mb,PetscInt nb,PetscInt M,PetscInt N,PetscInt rsrc,PetscInt csrc,Mat *A)
1735: {
1736: Mat_ScaLAPACK *a;
1737: PetscInt m,n;
1739: MatCreate(comm,A);
1740: MatSetType(*A,MATSCALAPACK);
1742: /* rows and columns are NOT distributed according to PetscSplitOwnership */
1743: m = PETSC_DECIDE;
1744: PetscSplitOwnershipEqual(comm,&m,&M);
1745: n = PETSC_DECIDE;
1746: PetscSplitOwnershipEqual(comm,&n,&N);
1747: MatSetSizes(*A,m,n,M,N);
1748: a = (Mat_ScaLAPACK*)(*A)->data;
1749: PetscBLASIntCast(M,&a->M);
1750: PetscBLASIntCast(N,&a->N);
1751: PetscBLASIntCast((mb==PETSC_DECIDE)?DEFAULT_BLOCKSIZE:mb,&a->mb);
1752: PetscBLASIntCast((nb==PETSC_DECIDE)?a->mb:nb,&a->nb);
1753: PetscBLASIntCast(rsrc,&a->rsrc);
1754: PetscBLASIntCast(csrc,&a->csrc);
1755: MatSetUp(*A);
1756: return 0;
1757: }