Actual source code: axpy.c
2: #include <petsc/private/matimpl.h>
4: static PetscErrorCode MatTransposeAXPY_Private(Mat Y,PetscScalar a,Mat X,MatStructure str,Mat T)
5: {
6: Mat A,F;
7: PetscErrorCode (*f)(Mat,Mat*);
9: PetscObjectQueryFunction((PetscObject)T,"MatTransposeGetMat_C",&f);
10: if (f) {
11: MatTransposeGetMat(T,&A);
12: if (T == X) {
13: PetscInfo(NULL,"Explicitly transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
14: MatTranspose(A,MAT_INITIAL_MATRIX,&F);
15: A = Y;
16: } else {
17: PetscInfo(NULL,"Transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
18: MatTranspose(X,MAT_INITIAL_MATRIX,&F);
19: }
20: } else {
21: MatHermitianTransposeGetMat(T,&A);
22: if (T == X) {
23: PetscInfo(NULL,"Explicitly Hermitian transposing X of type MATTRANSPOSEMAT to perform MatAXPY()\n");
24: MatHermitianTranspose(A,MAT_INITIAL_MATRIX,&F);
25: A = Y;
26: } else {
27: PetscInfo(NULL,"Hermitian transposing X because Y of type MATTRANSPOSEMAT to perform MatAXPY()\n");
28: MatHermitianTranspose(X,MAT_INITIAL_MATRIX,&F);
29: }
30: }
31: MatAXPY(A,a,F,str);
32: MatDestroy(&F);
33: return 0;
34: }
36: /*@
37: MatAXPY - Computes Y = a*X + Y.
39: Logically Collective on Mat
41: Input Parameters:
42: + a - the scalar multiplier
43: . X - the first matrix
44: . Y - the second matrix
45: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
47: Notes: No operation is performed when a is zero.
49: Level: intermediate
51: .seealso: MatAYPX()
52: @*/
53: PetscErrorCode MatAXPY(Mat Y,PetscScalar a,Mat X,MatStructure str)
54: {
55: PetscInt M1,M2,N1,N2;
56: PetscInt m1,m2,n1,n2;
57: MatType t1,t2;
58: PetscBool sametype,transpose;
64: MatGetSize(X,&M1,&N1);
65: MatGetSize(Y,&M2,&N2);
66: MatGetLocalSize(X,&m1,&n1);
67: MatGetLocalSize(Y,&m2,&n2);
72: if (a == (PetscScalar)0.0) return 0;
73: if (Y == X) {
74: MatScale(Y,1.0 + a);
75: return 0;
76: }
77: MatGetType(X,&t1);
78: MatGetType(Y,&t2);
79: PetscStrcmp(t1,t2,&sametype);
80: PetscLogEventBegin(MAT_AXPY,Y,0,0,0);
81: if (Y->ops->axpy && (sametype || X->ops->axpy == Y->ops->axpy)) {
82: (*Y->ops->axpy)(Y,a,X,str);
83: } else {
84: PetscStrcmp(t1,MATTRANSPOSEMAT,&transpose);
85: if (transpose) {
86: MatTransposeAXPY_Private(Y,a,X,str,X);
87: } else {
88: PetscStrcmp(t2,MATTRANSPOSEMAT,&transpose);
89: if (transpose) {
90: MatTransposeAXPY_Private(Y,a,X,str,Y);
91: } else {
92: MatAXPY_Basic(Y,a,X,str);
93: }
94: }
95: }
96: PetscLogEventEnd(MAT_AXPY,Y,0,0,0);
97: return 0;
98: }
100: PetscErrorCode MatAXPY_Basic_Preallocate(Mat Y, Mat X, Mat *B)
101: {
102: PetscErrorCode (*preall)(Mat,Mat,Mat*) = NULL;
104: /* look for any available faster alternative to the general preallocator */
105: PetscObjectQueryFunction((PetscObject)Y,"MatAXPYGetPreallocation_C",&preall);
106: if (preall) {
107: (*preall)(Y,X,B);
108: } else { /* Use MatPrellocator, assumes same row-col distribution */
109: Mat preallocator;
110: PetscInt r,rstart,rend;
111: PetscInt m,n,M,N;
113: MatGetRowUpperTriangular(Y);
114: MatGetRowUpperTriangular(X);
115: MatGetSize(Y,&M,&N);
116: MatGetLocalSize(Y,&m,&n);
117: MatCreate(PetscObjectComm((PetscObject)Y),&preallocator);
118: MatSetType(preallocator,MATPREALLOCATOR);
119: MatSetLayouts(preallocator,Y->rmap,Y->cmap);
120: MatSetUp(preallocator);
121: MatGetOwnershipRange(preallocator,&rstart,&rend);
122: for (r = rstart; r < rend; ++r) {
123: PetscInt ncols;
124: const PetscInt *row;
125: const PetscScalar *vals;
127: MatGetRow(Y,r,&ncols,&row,&vals);
128: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
129: MatRestoreRow(Y,r,&ncols,&row,&vals);
130: MatGetRow(X,r,&ncols,&row,&vals);
131: MatSetValues(preallocator,1,&r,ncols,row,vals,INSERT_VALUES);
132: MatRestoreRow(X,r,&ncols,&row,&vals);
133: }
134: MatSetOption(preallocator,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE);
135: MatAssemblyBegin(preallocator,MAT_FINAL_ASSEMBLY);
136: MatAssemblyEnd(preallocator,MAT_FINAL_ASSEMBLY);
137: MatRestoreRowUpperTriangular(Y);
138: MatRestoreRowUpperTriangular(X);
140: MatCreate(PetscObjectComm((PetscObject)Y),B);
141: MatSetType(*B,((PetscObject)Y)->type_name);
142: MatSetLayouts(*B,Y->rmap,Y->cmap);
143: MatPreallocatorPreallocate(preallocator,PETSC_FALSE,*B);
144: MatDestroy(&preallocator);
145: }
146: return 0;
147: }
149: PetscErrorCode MatAXPY_Basic(Mat Y,PetscScalar a,Mat X,MatStructure str)
150: {
151: PetscBool isshell,isdense,isnest;
153: MatIsShell(Y,&isshell);
154: if (isshell) { /* MatShell has special support for AXPY */
155: PetscErrorCode (*f)(Mat,PetscScalar,Mat,MatStructure);
157: MatGetOperation(Y,MATOP_AXPY,(void (**)(void))&f);
158: if (f) {
159: (*f)(Y,a,X,str);
160: return 0;
161: }
162: }
163: /* no need to preallocate if Y is dense */
164: PetscObjectBaseTypeCompareAny((PetscObject)Y,&isdense,MATSEQDENSE,MATMPIDENSE,"");
165: if (isdense) {
166: PetscObjectTypeCompare((PetscObject)X,MATNEST,&isnest);
167: if (isnest) {
168: MatAXPY_Dense_Nest(Y,a,X);
169: return 0;
170: }
171: if (str == DIFFERENT_NONZERO_PATTERN || str == UNKNOWN_NONZERO_PATTERN) str = SUBSET_NONZERO_PATTERN;
172: }
173: if (str != DIFFERENT_NONZERO_PATTERN && str != UNKNOWN_NONZERO_PATTERN) {
174: PetscInt i,start,end,j,ncols,m,n;
175: const PetscInt *row;
176: PetscScalar *val;
177: const PetscScalar *vals;
179: MatGetSize(X,&m,&n);
180: MatGetOwnershipRange(X,&start,&end);
181: MatGetRowUpperTriangular(X);
182: if (a == 1.0) {
183: for (i = start; i < end; i++) {
184: MatGetRow(X,i,&ncols,&row,&vals);
185: MatSetValues(Y,1,&i,ncols,row,vals,ADD_VALUES);
186: MatRestoreRow(X,i,&ncols,&row,&vals);
187: }
188: } else {
189: PetscInt vs = 100;
190: /* realloc if needed, as this function may be used in parallel */
191: PetscMalloc1(vs,&val);
192: for (i=start; i<end; i++) {
193: MatGetRow(X,i,&ncols,&row,&vals);
194: if (vs < ncols) {
195: vs = PetscMin(2*ncols,n);
196: PetscRealloc(vs*sizeof(*val),&val);
197: }
198: for (j=0; j<ncols; j++) val[j] = a*vals[j];
199: MatSetValues(Y,1,&i,ncols,row,val,ADD_VALUES);
200: MatRestoreRow(X,i,&ncols,&row,&vals);
201: }
202: PetscFree(val);
203: }
204: MatRestoreRowUpperTriangular(X);
205: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
206: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
207: } else {
208: Mat B;
210: MatAXPY_Basic_Preallocate(Y,X,&B);
211: MatAXPY_BasicWithPreallocation(B,Y,a,X,str);
212: MatHeaderMerge(Y,&B);
213: }
214: return 0;
215: }
217: PetscErrorCode MatAXPY_BasicWithPreallocation(Mat B,Mat Y,PetscScalar a,Mat X,MatStructure str)
218: {
219: PetscInt i,start,end,j,ncols,m,n;
220: const PetscInt *row;
221: PetscScalar *val;
222: const PetscScalar *vals;
224: MatGetSize(X,&m,&n);
225: MatGetOwnershipRange(X,&start,&end);
226: MatGetRowUpperTriangular(Y);
227: MatGetRowUpperTriangular(X);
228: if (a == 1.0) {
229: for (i = start; i < end; i++) {
230: MatGetRow(Y,i,&ncols,&row,&vals);
231: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
232: MatRestoreRow(Y,i,&ncols,&row,&vals);
234: MatGetRow(X,i,&ncols,&row,&vals);
235: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
236: MatRestoreRow(X,i,&ncols,&row,&vals);
237: }
238: } else {
239: PetscInt vs = 100;
240: /* realloc if needed, as this function may be used in parallel */
241: PetscMalloc1(vs,&val);
242: for (i=start; i<end; i++) {
243: MatGetRow(Y,i,&ncols,&row,&vals);
244: MatSetValues(B,1,&i,ncols,row,vals,ADD_VALUES);
245: MatRestoreRow(Y,i,&ncols,&row,&vals);
247: MatGetRow(X,i,&ncols,&row,&vals);
248: if (vs < ncols) {
249: vs = PetscMin(2*ncols,n);
250: PetscRealloc(vs*sizeof(*val),&val);
251: }
252: for (j=0; j<ncols; j++) val[j] = a*vals[j];
253: MatSetValues(B,1,&i,ncols,row,val,ADD_VALUES);
254: MatRestoreRow(X,i,&ncols,&row,&vals);
255: }
256: PetscFree(val);
257: }
258: MatRestoreRowUpperTriangular(Y);
259: MatRestoreRowUpperTriangular(X);
260: MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);
261: MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);
262: return 0;
263: }
265: /*@
266: MatShift - Computes Y = Y + a I, where a is a PetscScalar and I is the identity matrix.
268: Neighbor-wise Collective on Mat
270: Input Parameters:
271: + Y - the matrices
272: - a - the PetscScalar
274: Level: intermediate
276: Notes:
277: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
278: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
279: entry). No operation is performed when a is zero.
281: To form Y = Y + diag(V) use MatDiagonalSet()
283: .seealso: MatDiagonalSet(), MatScale(), MatDiagonalScale()
284: @*/
285: PetscErrorCode MatShift(Mat Y,PetscScalar a)
286: {
290: MatCheckPreallocated(Y,1);
291: if (a == 0.0) return 0;
293: if (Y->ops->shift) (*Y->ops->shift)(Y,a);
294: else MatShift_Basic(Y,a);
296: PetscObjectStateIncrease((PetscObject)Y);
297: return 0;
298: }
300: PetscErrorCode MatDiagonalSet_Default(Mat Y,Vec D,InsertMode is)
301: {
302: PetscInt i,start,end;
303: const PetscScalar *v;
305: MatGetOwnershipRange(Y,&start,&end);
306: VecGetArrayRead(D,&v);
307: for (i=start; i<end; i++) {
308: MatSetValues(Y,1,&i,1,&i,v+i-start,is);
309: }
310: VecRestoreArrayRead(D,&v);
311: MatAssemblyBegin(Y,MAT_FINAL_ASSEMBLY);
312: MatAssemblyEnd(Y,MAT_FINAL_ASSEMBLY);
313: return 0;
314: }
316: /*@
317: MatDiagonalSet - Computes Y = Y + D, where D is a diagonal matrix
318: that is represented as a vector. Or Y[i,i] = D[i] if InsertMode is
319: INSERT_VALUES.
321: Neighbor-wise Collective on Mat
323: Input Parameters:
324: + Y - the input matrix
325: . D - the diagonal matrix, represented as a vector
326: - i - INSERT_VALUES or ADD_VALUES
328: Notes:
329: If the matrix Y is missing some diagonal entries this routine can be very slow. To make it fast one should initially
330: fill the matrix so that all diagonal entries have a value (with a value of zero for those locations that would not have an
331: entry).
333: Level: intermediate
335: .seealso: MatShift(), MatScale(), MatDiagonalScale()
336: @*/
337: PetscErrorCode MatDiagonalSet(Mat Y,Vec D,InsertMode is)
338: {
339: PetscInt matlocal,veclocal;
343: MatGetLocalSize(Y,&matlocal,NULL);
344: VecGetLocalSize(D,&veclocal);
346: if (Y->ops->diagonalset) {
347: (*Y->ops->diagonalset)(Y,D,is);
348: } else {
349: MatDiagonalSet_Default(Y,D,is);
350: }
351: PetscObjectStateIncrease((PetscObject)Y);
352: return 0;
353: }
355: /*@
356: MatAYPX - Computes Y = a*Y + X.
358: Logically on Mat
360: Input Parameters:
361: + a - the PetscScalar multiplier
362: . Y - the first matrix
363: . X - the second matrix
364: - str - either SAME_NONZERO_PATTERN, DIFFERENT_NONZERO_PATTERN, UNKNOWN_NONZERO_PATTERN, or SUBSET_NONZERO_PATTERN (nonzeros of X is a subset of Y's)
366: Level: intermediate
368: .seealso: MatAXPY()
369: @*/
370: PetscErrorCode MatAYPX(Mat Y,PetscScalar a,Mat X,MatStructure str)
371: {
372: MatScale(Y,a);
373: MatAXPY(Y,1.0,X,str);
374: return 0;
375: }
377: /*@
378: MatComputeOperator - Computes the explicit matrix
380: Collective on Mat
382: Input Parameters:
383: + inmat - the matrix
384: - mattype - the matrix type for the explicit operator
386: Output Parameter:
387: . mat - the explicit operator
389: Notes:
390: This computation is done by applying the operators to columns of the identity matrix.
391: This routine is costly in general, and is recommended for use only with relatively small systems.
392: Currently, this routine uses a dense matrix format if mattype == NULL.
394: Level: advanced
396: @*/
397: PetscErrorCode MatComputeOperator(Mat inmat,MatType mattype,Mat *mat)
398: {
401: MatConvert_Shell(inmat,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
402: return 0;
403: }
405: /*@
406: MatComputeOperatorTranspose - Computes the explicit matrix representation of
407: a give matrix that can apply MatMultTranspose()
409: Collective on Mat
411: Input Parameters:
412: + inmat - the matrix
413: - mattype - the matrix type for the explicit operator
415: Output Parameter:
416: . mat - the explicit operator transposed
418: Notes:
419: This computation is done by applying the transpose of the operator to columns of the identity matrix.
420: This routine is costly in general, and is recommended for use only with relatively small systems.
421: Currently, this routine uses a dense matrix format if mattype == NULL.
423: Level: advanced
425: @*/
426: PetscErrorCode MatComputeOperatorTranspose(Mat inmat,MatType mattype,Mat *mat)
427: {
428: Mat A;
432: MatCreateTranspose(inmat,&A);
433: MatConvert_Shell(A,mattype ? mattype : MATDENSE,MAT_INITIAL_MATRIX,mat);
434: MatDestroy(&A);
435: return 0;
436: }
438: /*@
439: MatChop - Set all values in the matrix less than the tolerance to zero
441: Input Parameters:
442: + A - The matrix
443: - tol - The zero tolerance
445: Output Parameters:
446: . A - The chopped matrix
448: Level: intermediate
450: .seealso: MatCreate(), MatZeroEntries()
451: @*/
452: PetscErrorCode MatChop(Mat A, PetscReal tol)
453: {
454: Mat a;
455: PetscScalar *newVals;
456: PetscInt *newCols, rStart, rEnd, numRows, maxRows, r, colMax = 0;
457: PetscBool flg;
459: PetscObjectBaseTypeCompareAny((PetscObject)A, &flg, MATSEQDENSE, MATMPIDENSE, "");
460: if (flg) {
461: MatDenseGetLocalMatrix(A, &a);
462: MatDenseGetLDA(a, &r);
463: MatGetSize(a, &rStart, &rEnd);
464: MatDenseGetArray(a, &newVals);
465: for (; colMax < rEnd; ++colMax) {
466: for (maxRows = 0; maxRows < rStart; ++maxRows) {
467: newVals[maxRows + colMax * r] = PetscAbsScalar(newVals[maxRows + colMax * r]) < tol ? 0.0 : newVals[maxRows + colMax * r];
468: }
469: }
470: MatDenseRestoreArray(a, &newVals);
471: } else {
472: MatGetOwnershipRange(A, &rStart, &rEnd);
473: MatGetRowUpperTriangular(A);
474: for (r = rStart; r < rEnd; ++r) {
475: PetscInt ncols;
477: MatGetRow(A, r, &ncols, NULL, NULL);
478: colMax = PetscMax(colMax, ncols);
479: MatRestoreRow(A, r, &ncols, NULL, NULL);
480: }
481: numRows = rEnd - rStart;
482: MPIU_Allreduce(&numRows, &maxRows, 1, MPIU_INT, MPI_MAX, PetscObjectComm((PetscObject)A));
483: PetscMalloc2(colMax, &newCols, colMax, &newVals);
484: MatGetOption(A, MAT_NO_OFF_PROC_ENTRIES, &flg); /* cache user-defined value */
485: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, PETSC_TRUE);
486: /* short-circuit code in MatAssemblyBegin() and MatAssemblyEnd() */
487: /* that are potentially called many times depending on the distribution of A */
488: for (r = rStart; r < rStart+maxRows; ++r) {
489: const PetscScalar *vals;
490: const PetscInt *cols;
491: PetscInt ncols, newcols, c;
493: if (r < rEnd) {
494: MatGetRow(A, r, &ncols, &cols, &vals);
495: for (c = 0; c < ncols; ++c) {
496: newCols[c] = cols[c];
497: newVals[c] = PetscAbsScalar(vals[c]) < tol ? 0.0 : vals[c];
498: }
499: newcols = ncols;
500: MatRestoreRow(A, r, &ncols, &cols, &vals);
501: MatSetValues(A, 1, &r, newcols, newCols, newVals, INSERT_VALUES);
502: }
503: MatAssemblyBegin(A, MAT_FINAL_ASSEMBLY);
504: MatAssemblyEnd(A, MAT_FINAL_ASSEMBLY);
505: }
506: MatRestoreRowUpperTriangular(A);
507: PetscFree2(newCols, newVals);
508: MatSetOption(A, MAT_NO_OFF_PROC_ENTRIES, flg); /* reset option to its user-defined value */
509: }
510: return 0;
511: }