Actual source code: ex5.c


  2: static char help[] = "Solves two linear systems in parallel with KSP.  The code\n\
  3: illustrates repeated solution of linear systems with the same preconditioner\n\
  4: method but different matrices (having the same nonzero structure).  The code\n\
  5: also uses multiple profiling stages.  Input arguments are\n\
  6:   -m <size> : problem size\n\
  7:   -mat_nonsym : use nonsymmetric matrix (default is symmetric)\n\n";

  9: /*T
 10:    Concepts: KSP^repeatedly solving linear systems;
 11:    Concepts: PetscLog^profiling multiple stages of code;
 12:    Processors: n
 13: T*/

 15: /*
 16:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 17:   automatically includes:
 18:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 19:      petscmat.h - matrices
 20:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 21:      petscviewer.h - viewers               petscpc.h  - preconditioners
 22: */
 23: #include <petscksp.h>

 25: int main(int argc,char **args)
 26: {
 27:   KSP            ksp;              /* linear solver context */
 28:   Mat            C;                /* matrix */
 29:   Vec            x,u,b;            /* approx solution, RHS, exact solution */
 30:   PetscReal      norm;             /* norm of solution error */
 31:   PetscScalar    v,none = -1.0;
 32:   PetscInt       Ii,J,ldim,low,high,iglobal,Istart,Iend;
 33:   PetscInt       i,j,m = 3,n = 2,its;
 34:   PetscMPIInt    size,rank;
 35:   PetscBool      mat_nonsymmetric = PETSC_FALSE;
 36:   PetscBool      testnewC = PETSC_FALSE,testscaledMat=PETSC_FALSE;
 37: #if defined(PETSC_USE_LOG)
 38:   PetscLogStage stages[2];
 39: #endif

 41:   PetscInitialize(&argc,&args,(char*)0,help);
 42:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 43:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 44:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 45:   n    = 2*size;

 47:   /*
 48:      Set flag if we are doing a nonsymmetric problem; the default is symmetric.
 49:   */
 50:   PetscOptionsGetBool(NULL,NULL,"-mat_nonsym",&mat_nonsymmetric,NULL);

 52:   PetscOptionsGetBool(NULL,NULL,"-test_scaledMat",&testscaledMat,NULL);

 54:   /*
 55:      Register two stages for separate profiling of the two linear solves.
 56:      Use the runtime option -log_view for a printout of performance
 57:      statistics at the program's conlusion.
 58:   */
 59:   PetscLogStageRegister("Original Solve",&stages[0]);
 60:   PetscLogStageRegister("Second Solve",&stages[1]);

 62:   /* -------------- Stage 0: Solve Original System ---------------------- */
 63:   /*
 64:      Indicate to PETSc profiling that we're beginning the first stage
 65:   */
 66:   PetscLogStagePush(stages[0]);

 68:   /*
 69:      Create parallel matrix, specifying only its global dimensions.
 70:      When using MatCreate(), the matrix format can be specified at
 71:      runtime. Also, the parallel partitioning of the matrix is
 72:      determined by PETSc at runtime.
 73:   */
 74:   MatCreate(PETSC_COMM_WORLD,&C);
 75:   MatSetSizes(C,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 76:   MatSetFromOptions(C);
 77:   MatSetUp(C);

 79:   /*
 80:      Currently, all PETSc parallel matrix formats are partitioned by
 81:      contiguous chunks of rows across the processors.  Determine which
 82:      rows of the matrix are locally owned.
 83:   */
 84:   MatGetOwnershipRange(C,&Istart,&Iend);

 86:   /*
 87:      Set matrix entries matrix in parallel.
 88:       - Each processor needs to insert only elements that it owns
 89:         locally (but any non-local elements will be sent to the
 90:         appropriate processor during matrix assembly).
 91:       - Always specify global row and columns of matrix entries.
 92:   */
 93:   for (Ii=Istart; Ii<Iend; Ii++) {
 94:     v = -1.0; i = Ii/n; j = Ii - i*n;
 95:     if (i>0)   {J = Ii - n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 96:     if (i<m-1) {J = Ii + n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 97:     if (j>0)   {J = Ii - 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 98:     if (j<n-1) {J = Ii + 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
 99:     v = 4.0; MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);
100:   }

102:   /*
103:      Make the matrix nonsymmetric if desired
104:   */
105:   if (mat_nonsymmetric) {
106:     for (Ii=Istart; Ii<Iend; Ii++) {
107:       v = -1.5; i = Ii/n;
108:       if (i>1)   {J = Ii-n-1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
109:     }
110:   } else {
111:     MatSetOption(C,MAT_SYMMETRIC,PETSC_TRUE);
112:     MatSetOption(C,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);
113:   }

115:   /*
116:      Assemble matrix, using the 2-step process:
117:        MatAssemblyBegin(), MatAssemblyEnd()
118:      Computations can be done while messages are in transition
119:      by placing code between these two statements.
120:   */
121:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
122:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

124:   /*
125:      Create parallel vectors.
126:       - When using VecSetSizes(), we specify only the vector's global
127:         dimension; the parallel partitioning is determined at runtime.
128:       - Note: We form 1 vector from scratch and then duplicate as needed.
129:   */
130:   VecCreate(PETSC_COMM_WORLD,&u);
131:   VecSetSizes(u,PETSC_DECIDE,m*n);
132:   VecSetFromOptions(u);
133:   VecDuplicate(u,&b);
134:   VecDuplicate(b,&x);

136:   /*
137:      Currently, all parallel PETSc vectors are partitioned by
138:      contiguous chunks across the processors.  Determine which
139:      range of entries are locally owned.
140:   */
141:   VecGetOwnershipRange(x,&low,&high);

143:   /*
144:     Set elements within the exact solution vector in parallel.
145:      - Each processor needs to insert only elements that it owns
146:        locally (but any non-local entries will be sent to the
147:        appropriate processor during vector assembly).
148:      - Always specify global locations of vector entries.
149:   */
150:   VecGetLocalSize(x,&ldim);
151:   for (i=0; i<ldim; i++) {
152:     iglobal = i + low;
153:     v       = (PetscScalar)(i + 100*rank);
154:     VecSetValues(u,1,&iglobal,&v,INSERT_VALUES);
155:   }

157:   /*
158:      Assemble vector, using the 2-step process:
159:        VecAssemblyBegin(), VecAssemblyEnd()
160:      Computations can be done while messages are in transition,
161:      by placing code between these two statements.
162:   */
163:   VecAssemblyBegin(u);
164:   VecAssemblyEnd(u);

166:   /*
167:      Compute right-hand-side vector
168:   */
169:   MatMult(C,u,b);

171:   /*
172:     Create linear solver context
173:   */
174:   KSPCreate(PETSC_COMM_WORLD,&ksp);

176:   /*
177:      Set operators. Here the matrix that defines the linear system
178:      also serves as the preconditioning matrix.
179:   */
180:   KSPSetOperators(ksp,C,C);

182:   /*
183:      Set runtime options (e.g., -ksp_type <type> -pc_type <type>)
184:   */
185:   KSPSetFromOptions(ksp);

187:   /*
188:      Solve linear system.  Here we explicitly call KSPSetUp() for more
189:      detailed performance monitoring of certain preconditioners, such
190:      as ICC and ILU.  This call is optional, as KSPSetUp() will
191:      automatically be called within KSPSolve() if it hasn't been
192:      called already.
193:   */
194:   KSPSetUp(ksp);
195:   KSPSolve(ksp,b,x);

197:   /*
198:      Check the error
199:   */
200:   VecAXPY(x,none,u);
201:   VecNorm(x,NORM_2,&norm);
202:   KSPGetIterationNumber(ksp,&its);
203:   if (!testscaledMat || norm > 1.e-7) {
204:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
205:   }

207:   /* -------------- Stage 1: Solve Second System ---------------------- */
208:   /*
209:      Solve another linear system with the same method.  We reuse the KSP
210:      context, matrix and vector data structures, and hence save the
211:      overhead of creating new ones.

213:      Indicate to PETSc profiling that we're concluding the first
214:      stage with PetscLogStagePop(), and beginning the second stage with
215:      PetscLogStagePush().
216:   */
217:   PetscLogStagePop();
218:   PetscLogStagePush(stages[1]);

220:   /*
221:      Initialize all matrix entries to zero.  MatZeroEntries() retains the
222:      nonzero structure of the matrix for sparse formats.
223:   */
224:   MatZeroEntries(C);

226:   /*
227:      Assemble matrix again.  Note that we retain the same matrix data
228:      structure and the same nonzero pattern; we just change the values
229:      of the matrix entries.
230:   */
231:   for (i=0; i<m; i++) {
232:     for (j=2*rank; j<2*rank+2; j++) {
233:       v = -1.0;  Ii = j + n*i;
234:       if (i>0)   {J = Ii - n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
235:       if (i<m-1) {J = Ii + n; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
236:       if (j>0)   {J = Ii - 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
237:       if (j<n-1) {J = Ii + 1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
238:       v = 6.0; MatSetValues(C,1,&Ii,1,&Ii,&v,ADD_VALUES);
239:     }
240:   }
241:   if (mat_nonsymmetric) {
242:     for (Ii=Istart; Ii<Iend; Ii++) {
243:       v = -1.5; i = Ii/n;
244:       if (i>1)   {J = Ii-n-1; MatSetValues(C,1,&Ii,1,&J,&v,ADD_VALUES);}
245:     }
246:   }
247:   MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
248:   MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

250:   if (testscaledMat) {
251:     PetscRandom    rctx;

253:     /* Scale a(0,0) and a(M-1,M-1) */
254:     if (rank == 0) {
255:       v = 6.0*0.00001; Ii = 0; J = 0;
256:       MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);
257:     } else if (rank == size -1) {
258:       v = 6.0*0.00001; Ii = m*n-1; J = m*n-1;
259:       MatSetValues(C,1,&Ii,1,&J,&v,INSERT_VALUES);
260:     }
261:     MatAssemblyBegin(C,MAT_FINAL_ASSEMBLY);
262:     MatAssemblyEnd(C,MAT_FINAL_ASSEMBLY);

264:     /* Compute a new right-hand-side vector */
265:     VecDestroy(&u);
266:     VecCreate(PETSC_COMM_WORLD,&u);
267:     VecSetSizes(u,PETSC_DECIDE,m*n);
268:     VecSetFromOptions(u);

270:     PetscRandomCreate(PETSC_COMM_WORLD,&rctx);
271:     PetscRandomSetFromOptions(rctx);
272:     VecSetRandom(u,rctx);
273:     PetscRandomDestroy(&rctx);
274:     VecAssemblyBegin(u);
275:     VecAssemblyEnd(u);
276:   }

278:   PetscOptionsGetBool(NULL,NULL,"-test_newMat",&testnewC,NULL);
279:   if (testnewC) {
280:     /*
281:      User may use a new matrix C with same nonzero pattern, e.g.
282:       ./ex5 -ksp_monitor -mat_type sbaij -pc_type cholesky -pc_factor_mat_solver_type mumps -test_newMat
283:     */
284:     Mat Ctmp;
285:     MatDuplicate(C,MAT_COPY_VALUES,&Ctmp);
286:     MatDestroy(&C);
287:     MatDuplicate(Ctmp,MAT_COPY_VALUES,&C);
288:     MatDestroy(&Ctmp);
289:   }

291:   MatMult(C,u,b);

293:   /*
294:      Set operators. Here the matrix that defines the linear system
295:      also serves as the preconditioning matrix.
296:   */
297:   KSPSetOperators(ksp,C,C);

299:   /*
300:      Solve linear system
301:   */
302:   KSPSetUp(ksp);
303:   KSPSolve(ksp,b,x);

305:   /*
306:      Check the error
307:   */
308:   VecAXPY(x,none,u);
309:   VecNorm(x,NORM_2,&norm);
310:   KSPGetIterationNumber(ksp,&its);
311:   if (!testscaledMat || norm > 1.e-7) {
312:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
313:   }

315:   /*
316:      Free work space.  All PETSc objects should be destroyed when they
317:      are no longer needed.
318:   */
319:   KSPDestroy(&ksp);
320:   VecDestroy(&u);
321:   VecDestroy(&x);
322:   VecDestroy(&b);
323:   MatDestroy(&C);

325:   /*
326:      Indicate to PETSc profiling that we're concluding the second stage
327:   */
328:   PetscLogStagePop();

330:   PetscFinalize();
331:   return 0;
332: }

334: /*TEST

336:    test:
337:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always

339:    test:
340:       suffix: 2
341:       nsize: 2
342:       args: -pc_type jacobi -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -ksp_rtol .000001

344:    test:
345:       suffix: 5
346:       nsize: 2
347:       args: -ksp_gmres_cgs_refinement_type refine_always -ksp_monitor draw::draw_lg -ksp_monitor_true_residual draw::draw_lg

349:    test:
350:       suffix: asm
351:       nsize: 4
352:       args: -pc_type asm

354:    test:
355:       suffix: asm_baij
356:       nsize: 4
357:       args: -pc_type asm -mat_type baij
358:       output_file: output/ex5_asm.out

360:    test:
361:       suffix: redundant_0
362:       args: -m 1000 -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

364:    test:
365:       suffix: redundant_1
366:       nsize: 5
367:       args: -pc_type redundant -pc_redundant_number 1 -redundant_ksp_type gmres -redundant_pc_type jacobi

369:    test:
370:       suffix: redundant_2
371:       nsize: 5
372:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi

374:    test:
375:       suffix: redundant_3
376:       nsize: 5
377:       args: -pc_type redundant -pc_redundant_number 5 -redundant_ksp_type gmres -redundant_pc_type jacobi

379:    test:
380:       suffix: redundant_4
381:       nsize: 5
382:       args: -pc_type redundant -pc_redundant_number 3 -redundant_ksp_type gmres -redundant_pc_type jacobi -psubcomm_type interlaced

384:    test:
385:       suffix: superlu_dist
386:       nsize: 15
387:       requires: superlu_dist
388:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat

390:    test:
391:       suffix: superlu_dist_2
392:       nsize: 15
393:       requires: superlu_dist
394:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 150 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact SamePattern_SameRowPerm
395:       output_file: output/ex5_superlu_dist.out

397:    test:
398:       suffix: superlu_dist_3
399:       nsize: 15
400:       requires: superlu_dist
401:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -mat_superlu_dist_equil false -m 500 -mat_superlu_dist_r 3 -mat_superlu_dist_c 5 -test_scaledMat -mat_superlu_dist_fact DOFACT
402:       output_file: output/ex5_superlu_dist.out

404:    test:
405:       suffix: superlu_dist_0
406:       nsize: 1
407:       requires: superlu_dist
408:       args: -pc_type lu -pc_factor_mat_solver_type superlu_dist -test_scaledMat
409:       output_file: output/ex5_superlu_dist.out

411: TEST*/