Actual source code: matutil.c
slepc-3.17.0 2022-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/slepcimpl.h>
13: static PetscErrorCode MatCreateTile_Seq(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat G)
14: {
15: PetscInt i,j,M1,M2,N1,N2,*nnz,ncols,*scols,bs;
16: PetscScalar *svals,*buf;
17: const PetscInt *cols;
18: const PetscScalar *vals;
20: MatGetSize(A,&M1,&N1);
21: MatGetSize(D,&M2,&N2);
22: MatGetBlockSize(A,&bs);
24: PetscCalloc1((M1+M2)/bs,&nnz);
25: /* Preallocate for A */
26: if (a!=0.0) {
27: for (i=0;i<(M1+bs-1)/bs;i++) {
28: MatGetRow(A,i*bs,&ncols,NULL,NULL);
29: nnz[i] += ncols/bs;
30: MatRestoreRow(A,i*bs,&ncols,NULL,NULL);
31: }
32: }
33: /* Preallocate for B */
34: if (b!=0.0) {
35: for (i=0;i<(M1+bs-1)/bs;i++) {
36: MatGetRow(B,i*bs,&ncols,NULL,NULL);
37: nnz[i] += ncols/bs;
38: MatRestoreRow(B,i*bs,&ncols,NULL,NULL);
39: }
40: }
41: /* Preallocate for C */
42: if (c!=0.0) {
43: for (i=0;i<(M2+bs-1)/bs;i++) {
44: MatGetRow(C,i*bs,&ncols,NULL,NULL);
45: nnz[i+M1/bs] += ncols/bs;
46: MatRestoreRow(C,i*bs,&ncols,NULL,NULL);
47: }
48: }
49: /* Preallocate for D */
50: if (d!=0.0) {
51: for (i=0;i<(M2+bs-1)/bs;i++) {
52: MatGetRow(D,i*bs,&ncols,NULL,NULL);
53: nnz[i+M1/bs] += ncols/bs;
54: MatRestoreRow(D,i*bs,&ncols,NULL,NULL);
55: }
56: }
57: MatXAIJSetPreallocation(G,bs,nnz,NULL,NULL,NULL);
58: PetscFree(nnz);
60: PetscMalloc2(PetscMax(N1,N2),&buf,PetscMax(N1,N2),&scols);
61: /* Transfer A */
62: if (a!=0.0) {
63: for (i=0;i<M1;i++) {
64: MatGetRow(A,i,&ncols,&cols,&vals);
65: if (a!=1.0) {
66: svals=buf;
67: for (j=0;j<ncols;j++) svals[j] = vals[j]*a;
68: } else svals=(PetscScalar*)vals;
69: MatSetValues(G,1,&i,ncols,cols,svals,INSERT_VALUES);
70: MatRestoreRow(A,i,&ncols,&cols,&vals);
71: }
72: }
73: /* Transfer B */
74: if (b!=0.0) {
75: for (i=0;i<M1;i++) {
76: MatGetRow(B,i,&ncols,&cols,&vals);
77: if (b!=1.0) {
78: svals=buf;
79: for (j=0;j<ncols;j++) svals[j] = vals[j]*b;
80: } else svals=(PetscScalar*)vals;
81: for (j=0;j<ncols;j++) scols[j] = cols[j]+N1;
82: MatSetValues(G,1,&i,ncols,scols,svals,INSERT_VALUES);
83: MatRestoreRow(B,i,&ncols,&cols,&vals);
84: }
85: }
86: /* Transfer C */
87: if (c!=0.0) {
88: for (i=0;i<M2;i++) {
89: MatGetRow(C,i,&ncols,&cols,&vals);
90: if (c!=1.0) {
91: svals=buf;
92: for (j=0;j<ncols;j++) svals[j] = vals[j]*c;
93: } else svals=(PetscScalar*)vals;
94: j = i+M1;
95: MatSetValues(G,1,&j,ncols,cols,svals,INSERT_VALUES);
96: MatRestoreRow(C,i,&ncols,&cols,&vals);
97: }
98: }
99: /* Transfer D */
100: if (d!=0.0) {
101: for (i=0;i<M2;i++) {
102: MatGetRow(D,i,&ncols,&cols,&vals);
103: if (d!=1.0) {
104: svals=buf;
105: for (j=0;j<ncols;j++) svals[j] = vals[j]*d;
106: } else svals=(PetscScalar*)vals;
107: for (j=0;j<ncols;j++) scols[j] = cols[j]+N1;
108: j = i+M1;
109: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
110: MatRestoreRow(D,i,&ncols,&cols,&vals);
111: }
112: }
113: PetscFree2(buf,scols);
114: PetscFunctionReturn(0);
115: }
117: static PetscErrorCode MatCreateTile_MPI(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat G)
118: {
119: PetscErrorCode ierr;
120: PetscMPIInt np;
121: PetscInt p,i,j,N1,N2,m1,m2,n1,n2,*map1,*map2;
122: PetscInt *dnz,*onz,ncols,*scols,start,gstart;
123: PetscScalar *svals,*buf;
124: const PetscInt *cols,*mapptr1,*mapptr2;
125: const PetscScalar *vals;
127: MatGetSize(A,NULL,&N1);
128: MatGetLocalSize(A,&m1,&n1);
129: MatGetSize(D,NULL,&N2);
130: MatGetLocalSize(D,&m2,&n2);
132: /* Create mappings */
133: MPI_Comm_size(PetscObjectComm((PetscObject)G),&np);
134: MatGetOwnershipRangesColumn(A,&mapptr1);
135: MatGetOwnershipRangesColumn(B,&mapptr2);
136: PetscMalloc4(PetscMax(N1,N2),&buf,PetscMax(N1,N2),&scols,N1,&map1,N2,&map2);
137: for (p=0;p<np;p++) {
138: for (i=mapptr1[p];i<mapptr1[p+1];i++) map1[i] = i+mapptr2[p];
139: }
140: for (p=0;p<np;p++) {
141: for (i=mapptr2[p];i<mapptr2[p+1];i++) map2[i] = i+mapptr1[p+1];
142: }
144: ierr = MatPreallocateInitialize(PetscObjectComm((PetscObject)G),m1+m2,n1+n2,dnz,onz);
145: MatGetOwnershipRange(G,&gstart,NULL);
146: /* Preallocate for A */
147: if (a!=0.0) {
148: MatGetOwnershipRange(A,&start,NULL);
149: for (i=0;i<m1;i++) {
150: MatGetRow(A,i+start,&ncols,&cols,NULL);
151: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
152: MatPreallocateSet(gstart+i,ncols,scols,dnz,onz);
153: MatRestoreRow(A,i+start,&ncols,&cols,NULL);
154: }
155: }
156: /* Preallocate for B */
157: if (b!=0.0) {
158: MatGetOwnershipRange(B,&start,NULL);
159: for (i=0;i<m1;i++) {
160: MatGetRow(B,i+start,&ncols,&cols,NULL);
161: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
162: MatPreallocateSet(gstart+i,ncols,scols,dnz,onz);
163: MatRestoreRow(B,i+start,&ncols,&cols,NULL);
164: }
165: }
166: /* Preallocate for C */
167: if (c!=0.0) {
168: MatGetOwnershipRange(C,&start,NULL);
169: for (i=0;i<m2;i++) {
170: MatGetRow(C,i+start,&ncols,&cols,NULL);
171: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
172: MatPreallocateSet(gstart+m1+i,ncols,scols,dnz,onz);
173: MatRestoreRow(C,i+start,&ncols,&cols,NULL);
174: }
175: }
176: /* Preallocate for D */
177: if (d!=0.0) {
178: MatGetOwnershipRange(D,&start,NULL);
179: for (i=0;i<m2;i++) {
180: MatGetRow(D,i+start,&ncols,&cols,NULL);
181: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
182: MatPreallocateSet(gstart+m1+i,ncols,scols,dnz,onz);
183: MatRestoreRow(D,i+start,&ncols,&cols,NULL);
184: }
185: }
186: MatMPIAIJSetPreallocation(G,0,dnz,0,onz);
187: ierr = MatPreallocateFinalize(dnz,onz);
189: /* Transfer A */
190: if (a!=0.0) {
191: MatGetOwnershipRange(A,&start,NULL);
192: for (i=0;i<m1;i++) {
193: MatGetRow(A,i+start,&ncols,&cols,&vals);
194: if (a!=1.0) {
195: svals=buf;
196: for (j=0;j<ncols;j++) svals[j] = vals[j]*a;
197: } else svals=(PetscScalar*)vals;
198: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
199: j = gstart+i;
200: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
201: MatRestoreRow(A,i+start,&ncols,&cols,&vals);
202: }
203: }
204: /* Transfer B */
205: if (b!=0.0) {
206: MatGetOwnershipRange(B,&start,NULL);
207: for (i=0;i<m1;i++) {
208: MatGetRow(B,i+start,&ncols,&cols,&vals);
209: if (b!=1.0) {
210: svals=buf;
211: for (j=0;j<ncols;j++) svals[j] = vals[j]*b;
212: } else svals=(PetscScalar*)vals;
213: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
214: j = gstart+i;
215: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
216: MatRestoreRow(B,i+start,&ncols,&cols,&vals);
217: }
218: }
219: /* Transfer C */
220: if (c!=0.0) {
221: MatGetOwnershipRange(C,&start,NULL);
222: for (i=0;i<m2;i++) {
223: MatGetRow(C,i+start,&ncols,&cols,&vals);
224: if (c!=1.0) {
225: svals=buf;
226: for (j=0;j<ncols;j++) svals[j] = vals[j]*c;
227: } else svals=(PetscScalar*)vals;
228: for (j=0;j<ncols;j++) scols[j] = map1[cols[j]];
229: j = gstart+m1+i;
230: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
231: MatRestoreRow(C,i+start,&ncols,&cols,&vals);
232: }
233: }
234: /* Transfer D */
235: if (d!=0.0) {
236: MatGetOwnershipRange(D,&start,NULL);
237: for (i=0;i<m2;i++) {
238: MatGetRow(D,i+start,&ncols,&cols,&vals);
239: if (d!=1.0) {
240: svals=buf;
241: for (j=0;j<ncols;j++) svals[j] = vals[j]*d;
242: } else svals=(PetscScalar*)vals;
243: for (j=0;j<ncols;j++) scols[j] = map2[cols[j]];
244: j = gstart+m1+i;
245: MatSetValues(G,1,&j,ncols,scols,svals,INSERT_VALUES);
246: MatRestoreRow(D,i+start,&ncols,&cols,&vals);
247: }
248: }
249: PetscFree4(buf,scols,map1,map2);
250: PetscFunctionReturn(0);
251: }
253: /*@
254: MatCreateTile - Explicitly build a matrix from four blocks, G = [ a*A b*B; c*C d*D ].
256: Collective on A
258: Input Parameters:
259: + A - matrix for top-left block
260: . a - scaling factor for block A
261: . B - matrix for top-right block
262: . b - scaling factor for block B
263: . C - matrix for bottom-left block
264: . c - scaling factor for block C
265: . D - matrix for bottom-right block
266: - d - scaling factor for block D
268: Output Parameter:
269: . G - the resulting matrix
271: Notes:
272: In the case of a parallel matrix, a permuted version of G is returned. The permutation
273: is a perfect shuffle such that the local parts of A, B, C, D remain in the local part of
274: G for the same process.
276: Matrix G must be destroyed by the user.
278: The blocks can be of different type. They can be either ConstantDiagonal, or a standard
279: type such as AIJ, or any other type provided that it supports the MatGetRow operation.
280: The type of the output matrix will be the same as the first block that is not
281: ConstantDiagonal (checked in the A,B,C,D order).
283: Level: developer
285: .seealso: MatCreateNest()
286: @*/
287: PetscErrorCode MatCreateTile(PetscScalar a,Mat A,PetscScalar b,Mat B,PetscScalar c,Mat C,PetscScalar d,Mat D,Mat *G)
288: {
289: PetscInt i,k,M1,M2,N1,N2,M,N,m1,m2,n1,n2,m,n,bs;
290: PetscBool diag[4];
291: Mat block[4] = {A,B,C,D};
292: MatType type[4];
293: PetscMPIInt size;
308: /* check row 1 */
309: MatGetSize(A,&M1,NULL);
310: MatGetLocalSize(A,&m1,NULL);
311: MatGetSize(B,&M,NULL);
312: MatGetLocalSize(B,&m,NULL);
314: /* check row 2 */
315: MatGetSize(C,&M2,NULL);
316: MatGetLocalSize(C,&m2,NULL);
317: MatGetSize(D,&M,NULL);
318: MatGetLocalSize(D,&m,NULL);
320: /* check column 1 */
321: MatGetSize(A,NULL,&N1);
322: MatGetLocalSize(A,NULL,&n1);
323: MatGetSize(C,NULL,&N);
324: MatGetLocalSize(C,NULL,&n);
326: /* check column 2 */
327: MatGetSize(B,NULL,&N2);
328: MatGetLocalSize(B,NULL,&n2);
329: MatGetSize(D,NULL,&N);
330: MatGetLocalSize(D,NULL,&n);
333: /* check matrix types */
334: for (i=0;i<4;i++) {
335: MatGetType(block[i],&type[i]);
336: PetscStrcmp(type[i],MATCONSTANTDIAGONAL,&diag[i]);
337: }
338: for (k=0;k<4;k++) if (!diag[k]) break;
341: MatGetBlockSize(block[k],&bs);
342: MatCreate(PetscObjectComm((PetscObject)block[k]),G);
343: MatSetSizes(*G,m1+m2,n1+n2,M1+M2,N1+N2);
344: MatSetType(*G,type[k]);
345: MatSetBlockSize(*G,bs);
346: MatSetUp(*G);
348: MPI_Comm_size(PetscObjectComm((PetscObject)*G),&size);
349: if (size>1) MatCreateTile_MPI(a,A,b,B,c,C,d,D,*G);
350: else MatCreateTile_Seq(a,A,b,B,c,C,d,D,*G);
351: MatAssemblyBegin(*G,MAT_FINAL_ASSEMBLY);
352: MatAssemblyEnd(*G,MAT_FINAL_ASSEMBLY);
353: PetscFunctionReturn(0);
354: }
356: /*@C
357: MatCreateVecsEmpty - Get vector(s) compatible with the matrix, i.e. with the same
358: parallel layout, but without internal array.
360: Collective on mat
362: Input Parameter:
363: . mat - the matrix
365: Output Parameters:
366: + right - (optional) vector that the matrix can be multiplied against
367: - left - (optional) vector that the matrix vector product can be stored in
369: Note:
370: This is similar to MatCreateVecs(), but the new vectors do not have an internal
371: array, so the intended usage is with VecPlaceArray().
373: Level: developer
375: .seealso: VecDuplicateEmpty()
376: @*/
377: PetscErrorCode MatCreateVecsEmpty(Mat mat,Vec *right,Vec *left)
378: {
379: PetscBool standard,cuda=PETSC_FALSE,skip=PETSC_FALSE;
380: PetscInt M,N,mloc,nloc,rbs,cbs;
381: PetscMPIInt size;
382: Vec v;
387: PetscObjectTypeCompareAny((PetscObject)mat,&standard,MATSEQAIJ,MATMPIAIJ,MATSEQBAIJ,MATMPIBAIJ,MATSEQSBAIJ,MATMPISBAIJ,MATSEQDENSE,MATMPIDENSE,"");
388: PetscObjectTypeCompareAny((PetscObject)mat,&cuda,MATSEQAIJCUSPARSE,MATMPIAIJCUSPARSE,"");
389: if (!standard && !cuda) {
390: MatCreateVecs(mat,right,left);
391: v = right? *right: *left;
392: if (v) {
393: PetscObjectTypeCompareAny((PetscObject)v,&standard,VECSEQ,VECMPI,"");
394: PetscObjectTypeCompareAny((PetscObject)v,&cuda,VECSEQCUDA,VECMPICUDA,"");
395: }
396: if (!standard && !cuda) skip = PETSC_TRUE;
397: else {
398: if (right) VecDestroy(right);
399: if (left) VecDestroy(left);
400: }
401: }
402: if (!skip) {
403: MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);
404: MatGetLocalSize(mat,&mloc,&nloc);
405: MatGetSize(mat,&M,&N);
406: MatGetBlockSizes(mat,&rbs,&cbs);
407: if (right) {
408: if (cuda) {
409: #if defined(PETSC_HAVE_CUDA)
410: if (size>1) VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)mat),cbs,nloc,N,NULL,right);
411: else VecCreateSeqCUDAWithArray(PetscObjectComm((PetscObject)mat),cbs,N,NULL,right);
412: #endif
413: } else {
414: if (size>1) VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),cbs,nloc,N,NULL,right);
415: else VecCreateSeqWithArray(PetscObjectComm((PetscObject)mat),cbs,N,NULL,right);
416: }
417: }
418: if (left) {
419: if (cuda) {
420: #if defined(PETSC_HAVE_CUDA)
421: if (size>1) VecCreateMPICUDAWithArray(PetscObjectComm((PetscObject)mat),rbs,mloc,M,NULL,left);
422: else VecCreateSeqCUDAWithArray(PetscObjectComm((PetscObject)mat),rbs,M,NULL,left);
423: #endif
424: } else {
425: if (size>1) VecCreateMPIWithArray(PetscObjectComm((PetscObject)mat),rbs,mloc,M,NULL,left);
426: else VecCreateSeqWithArray(PetscObjectComm((PetscObject)mat),rbs,M,NULL,left);
427: }
428: }
429: }
430: PetscFunctionReturn(0);
431: }
433: /*@C
434: MatNormEstimate - Estimate the 2-norm of a matrix.
436: Collective on A
438: Input Parameters:
439: + A - the matrix
440: . vrn - random vector with normally distributed entries (can be NULL)
441: - w - workspace vector (can be NULL)
443: Output Parameter:
444: . nrm - the norm estimate
446: Notes:
447: Does not need access to the matrix entries, just performs a matrix-vector product.
448: Based on work by I. Ipsen and coworkers https://ipsen.math.ncsu.edu/ps/slides_ima.pdf
450: The input vector vrn must have unit 2-norm.
451: If vrn is NULL, then it is created internally and filled with VecSetRandomNormal().
453: Level: developer
455: .seealso: VecSetRandomNormal()
456: @*/
457: PetscErrorCode MatNormEstimate(Mat A,Vec vrn,Vec w,PetscReal *nrm)
458: {
459: PetscInt n;
460: Vec vv=NULL,ww=NULL;
468: if (!vrn) {
469: MatCreateVecs(A,&vv,NULL);
470: vrn = vv;
471: VecSetRandomNormal(vv,NULL,NULL,NULL);
472: VecNormalize(vv,NULL);
473: }
474: if (!w) {
475: MatCreateVecs(A,&ww,NULL);
476: w = ww;
477: }
479: MatGetSize(A,&n,NULL);
480: MatMult(A,vrn,w);
481: VecNorm(w,NORM_2,nrm);
482: *nrm *= PetscSqrtReal((PetscReal)n);
484: VecDestroy(&vv);
485: VecDestroy(&ww);
486: PetscFunctionReturn(0);
487: }