Actual source code: ex10.c


  2: static char help[] = "Solve a small system and a large system through preloading\n\
  3:   Input arguments are:\n\
  4:   -permute <natural,rcm,nd,...> : solve system in permuted indexing\n\
  5:   -f0 <small_sys_binary> -f1 <large_sys_binary> \n\n";

  7: /*T
  8:    Concepts: KSP^basic parallel example
  9:    Concepts: Mat^loading a binary matrix and vector;
 10:    Concepts: PetscLog^preloading executable
 11:    Processors: n
 12: T*/

 14: /*
 15:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 16:   automatically includes:
 17:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 18:      petscmat.h - matrices
 19:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 20:      petscviewer.h - viewers               petscpc.h  - preconditioners
 21: */
 22: #include <petscksp.h>

 24: typedef enum {
 25:   RHS_FILE,
 26:   RHS_ONE,
 27:   RHS_RANDOM
 28: } RHSType;
 29: const char *const RHSTypes[] = {"FILE", "ONE", "RANDOM", "RHSType", "RHS_", NULL};

 31: PetscErrorCode CheckResult(KSP *ksp, Mat *A, Vec *b, Vec *x, IS *rowperm)
 32: {
 33:   PetscReal         norm;        /* norm of solution error */
 34:   PetscInt          its;
 35:   KSPGetTotalIterations(*ksp,&its);
 36:   PetscPrintf(PETSC_COMM_WORLD,"Number of iterations = %d\n",its);

 38:   KSPGetResidualNorm(*ksp,&norm);
 39:   if (norm < 1.e-12) {
 40:     PetscPrintf(PETSC_COMM_WORLD,"Residual norm < 1.e-12\n");
 41:   } else {
 42:     PetscPrintf(PETSC_COMM_WORLD,"Residual norm %e\n",(double)norm);
 43:   }

 45:   KSPDestroy(ksp);
 46:   MatDestroy(A);
 47:   VecDestroy(x);
 48:   VecDestroy(b);
 49:   ISDestroy(rowperm);
 50:   return 0;
 51: }

 53: PetscErrorCode CreateSystem(const char filename[PETSC_MAX_PATH_LEN], RHSType rhstype, MatOrderingType ordering, PetscBool permute, IS *rowperm_out, Mat *A_out, Vec *b_out, Vec *x_out)
 54: {

 56:   Vec               x,b,b2;
 57:   Mat               A;           /* linear system matrix */
 58:   PetscViewer       viewer;      /* viewer */
 59:   PetscBool         same;
 60:   PetscInt          j,len,start,idx,n1,n2;
 61:   const PetscScalar *val;
 62:   IS                rowperm=NULL,colperm=NULL;

 64:   /* open binary file. Note that we use FILE_MODE_READ to indicate reading from this file */
 65:   PetscViewerBinaryOpen(PETSC_COMM_WORLD,filename,FILE_MODE_READ,&viewer);

 67:   /* load the matrix and vector; then destroy the viewer */
 68:   MatCreate(PETSC_COMM_WORLD,&A);
 69:   MatSetFromOptions(A);
 70:   MatLoad(A,viewer);
 71:   switch (rhstype) {
 72:   case RHS_FILE:
 73:     /* Vectors in the file might a different size than the matrix so we need a
 74:      * Vec whose size hasn't been set yet.  It'll get fixed below.  Otherwise we
 75:      * can create the correct size Vec. */
 76:     VecCreate(PETSC_COMM_WORLD,&b);
 77:     VecLoad(b,viewer);
 78:     break;
 79:   case RHS_ONE:
 80:     MatCreateVecs(A,&b,NULL);
 81:     VecSet(b,1.0);
 82:     break;
 83:   case RHS_RANDOM:
 84:     MatCreateVecs(A,&b,NULL);
 85:     VecSetRandom(b,NULL);
 86:     break;
 87:   }
 88:   PetscViewerDestroy(&viewer);

 90:   /* if the loaded matrix is larger than the vector (due to being padded
 91:      to match the block size of the system), then create a new padded vector
 92:    */
 93:   MatGetLocalSize(A,NULL,&n1);
 94:   VecGetLocalSize(b,&n2);
 95:   same = (n1 == n2)? PETSC_TRUE : PETSC_FALSE;
 96:   MPIU_Allreduce(MPI_IN_PLACE,&same,1,MPIU_BOOL,MPI_LAND,PETSC_COMM_WORLD);

 98:   if (!same) { /* create a new vector b by padding the old one */
 99:     VecCreate(PETSC_COMM_WORLD,&b2);
100:     VecSetSizes(b2,n1,PETSC_DECIDE);
101:     VecSetFromOptions(b2);
102:     VecGetOwnershipRange(b,&start,NULL);
103:     VecGetLocalSize(b,&len);
104:     VecGetArrayRead(b,&val);
105:     for (j=0; j<len; j++) {
106:       idx = start+j;
107:       VecSetValues(b2,1,&idx,val+j,INSERT_VALUES);
108:     }
109:     VecRestoreArrayRead(b,&val);
110:     VecDestroy(&b);
111:     VecAssemblyBegin(b2);
112:     VecAssemblyEnd(b2);
113:     b    = b2;
114:   }
115:   VecDuplicate(b,&x);

117:   if (permute) {
118:     Mat Aperm;
119:     MatGetOrdering(A,ordering,&rowperm,&colperm);
120:     MatPermute(A,rowperm,colperm,&Aperm);
121:     VecPermute(b,colperm,PETSC_FALSE);
122:     MatDestroy(&A);
123:     A    = Aperm;               /* Replace original operator with permuted version */
124:     ISDestroy(&colperm);
125:   }

127:   *b_out = b;
128:   *x_out = x;
129:   *A_out = A;
130:   *rowperm_out = rowperm;

132:   return 0;
133: }

135: /* ATTENTION: this is the example used in the Profiling chaper of the PETSc manual,
136:    where we referenced its profiling stages, preloading and output etc.
137:    When you modify it, please make sure it is still consistent with the manual.
138:  */
139: int main(int argc,char **args)
140: {
141:   PetscErrorCode    ierr;
142:   Vec               x,b;
143:   Mat               A;           /* linear system matrix */
144:   KSP               ksp;         /* Krylov subspace method context */
145:   char              file[2][PETSC_MAX_PATH_LEN],ordering[256]=MATORDERINGRCM;
146:   RHSType           rhstype = RHS_FILE;
147:   PetscBool         flg,preload=PETSC_FALSE,trans=PETSC_FALSE,permute=PETSC_FALSE;
148:   IS                rowperm=NULL;

150:   PetscInitialize(&argc,&args,(char*)0,help);

152:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"Preloading example options","");
153:   {
154:     /*
155:        Determine files from which we read the two linear systems
156:        (matrix and right-hand-side vector).
157:     */
158:     PetscOptionsBool("-trans","Solve transpose system instead","",trans,&trans,&flg);
159:     PetscOptionsString("-f","First file to load (small system)","",file[0],file[0],sizeof(file[0]),&flg);
160:     PetscOptionsFList("-permute","Permute matrix and vector to solve in new ordering","",MatOrderingList,ordering,ordering,sizeof(ordering),&permute);

162:     if (flg) {
163:       PetscStrcpy(file[1],file[0]);
164:       preload = PETSC_FALSE;
165:     } else {
166:       PetscOptionsString("-f0","First file to load (small system)","",file[0],file[0],sizeof(file[0]),&flg);
168:       PetscOptionsString("-f1","Second file to load (larger system)","",file[1],file[1],sizeof(file[1]),&flg);
169:       if (!flg) preload = PETSC_FALSE;   /* don't bother with second system */
170:     }

172:     PetscOptionsEnum("-rhs","Right hand side","",RHSTypes,(PetscEnum)rhstype,(PetscEnum*)&rhstype,NULL);
173:   }
174:   PetscOptionsEnd();

176:   /*
177:     To use preloading, one usually has code like the following:

179:     PetscPreLoadBegin(preload,"first stage);
180:       lines of code
181:     PetscPreLoadStage("second stage");
182:       lines of code
183:     PetscPreLoadEnd();

185:     The two macro PetscPreLoadBegin() and PetscPreLoadEnd() implicitly form a
186:     loop with maximal two iterations, depending whether preloading is turned on or
187:     not. If it is, either through the preload arg of PetscPreLoadBegin or through
188:     -preload command line, the trip count is 2, otherwise it is 1. One can use the
189:     predefined variable PetscPreLoadIt within the loop body to get the current
190:     iteration number, which is 0 or 1. If preload is turned on, the runtime doesn't
191:     do profiling for the first iteration, but it will do profiling for the second
192:     iteration instead.

194:     One can solve a small system in the first iteration and a large system in
195:     the second iteration. This process preloads the instructions with the small
196:     system so that more accurate performance monitoring (via -log_view) can be done
197:     with the large one (that actually is the system of interest).

199:     But in this example, we turned off preloading and duplicated the code for
200:     the large system. In general, it is a bad practice and one should not duplicate
201:     code. We do that because we want to show profiling stages for both the small
202:     system and the large system.
203:   */

205:   /*=========================
206:       solve a small system
207:     =========================*/

209:   PetscPreLoadBegin(preload,"Load System 0");
210:   CreateSystem(file[0],rhstype,ordering,permute,&rowperm,&A,&b,&x);

212:   PetscPreLoadStage("KSPSetUp 0");
213:   KSPCreate(PETSC_COMM_WORLD,&ksp);
214:   KSPSetOperators(ksp,A,A);
215:   KSPSetFromOptions(ksp);

217:   /*
218:     Here we explicitly call KSPSetUp() and KSPSetUpOnBlocks() to
219:     enable more precise profiling of setting up the preconditioner.
220:     These calls are optional, since both will be called within
221:     KSPSolve() if they haven't been called already.
222:   */
223:   KSPSetUp(ksp);
224:   KSPSetUpOnBlocks(ksp);

226:   PetscPreLoadStage("KSPSolve 0");
227:   if (trans) KSPSolveTranspose(ksp,b,x);
228:   else       KSPSolve(ksp,b,x);

230:   if (permute) VecPermute(x,rowperm,PETSC_TRUE);

232:   CheckResult(&ksp,&A,&b,&x,&rowperm);

234:   /*=========================
235:     solve a large system
236:     =========================*/

238:   PetscPreLoadStage("Load System 1");

240:   CreateSystem(file[1],rhstype,ordering,permute,&rowperm,&A,&b,&x);

242:   PetscPreLoadStage("KSPSetUp 1");
243:   KSPCreate(PETSC_COMM_WORLD,&ksp);
244:   KSPSetOperators(ksp,A,A);
245:   KSPSetFromOptions(ksp);

247:   /*
248:     Here we explicitly call KSPSetUp() and KSPSetUpOnBlocks() to
249:     enable more precise profiling of setting up the preconditioner.
250:     These calls are optional, since both will be called within
251:     KSPSolve() if they haven't been called already.
252:   */
253:   KSPSetUp(ksp);
254:   KSPSetUpOnBlocks(ksp);

256:   PetscPreLoadStage("KSPSolve 1");
257:   if (trans) KSPSolveTranspose(ksp,b,x);
258:   else       KSPSolve(ksp,b,x);

260:   if (permute) VecPermute(x,rowperm,PETSC_TRUE);

262:   CheckResult(&ksp,&A,&b,&x,&rowperm);

264:   PetscPreLoadEnd();
265:   /*
266:      Always call PetscFinalize() before exiting a program.  This routine
267:        - finalizes the PETSc libraries as well as MPI
268:        - provides summary and diagnostic information if certain runtime
269:          options are chosen (e.g., -log_view).
270:   */
271:   PetscFinalize();
272:   return 0;
273: }

275: /*TEST

277:    test:
278:       TODO: Matrix row/column sizes are not compatible with block size
279:       suffix: 1
280:       nsize: 4
281:       output_file: output/ex10_1.out
282:       requires: datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES)
283:       args: -f0 ${DATAFILESPATH}/matrices/medium -f1 ${DATAFILESPATH}/matrices/arco6 -ksp_gmres_classicalgramschmidt -mat_type baij -matload_block_size 3 -pc_type bjacobi

285:    test:
286:       TODO: Matrix row/column sizes are not compatible with block size
287:       suffix: 2
288:       nsize: 4
289:       output_file: output/ex10_2.out
290:       requires: datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES)
291:       args: -f0 ${DATAFILESPATH}/matrices/medium -f1 ${DATAFILESPATH}/matrices/arco6 -ksp_gmres_classicalgramschmidt -mat_type baij -matload_block_size 3 -pc_type bjacobi -trans

293:    test:
294:       suffix: 3
295:       requires: double complex !defined(PETSC_USE_64BIT_INDICES)
296:       args: -f ${wPETSC_DIR}/share/petsc/datafiles/matrices/nh-complex-int32-float64 -ksp_type bicg

298:    test:
299:       suffix: 4
300:       args: -f ${DATAFILESPATH}/matrices/medium -ksp_type bicg -permute rcm
301:       requires: datafilespath double !complex !defined(PETSC_USE_64BIT_INDICES)

303: TEST*/