Actual source code: ex9.c


  2: static char help[] = "The solution of 2 different linear systems with different linear solvers.\n\
  3: Also, this example illustrates the repeated\n\
  4: solution of linear systems, while reusing matrix, vector, and solver data\n\
  5: structures throughout the process.  Note the various stages of event logging.\n\n";

  7: /*T
  8:    Concepts: KSP^repeatedly solving linear systems;
  9:    Concepts: PetscLog^profiling multiple stages of code;
 10:    Concepts: PetscLog^user-defined event profiling;
 11:    Processors: n
 12: T*/

 14: /*
 15:   Include "petscksp.h" so that we can use KSP solvers.  Note that this file
 16:   automatically includes:
 17:      petscsys.h       - base PETSc routines   petscvec.h - vectors
 18:      petscmat.h - matrices
 19:      petscis.h     - index sets            petscksp.h - Krylov subspace methods
 20:      petscviewer.h - viewers               petscpc.h  - preconditioners
 21: */
 22: #include <petscksp.h>

 24: /*
 25:    Declare user-defined routines
 26: */
 27: extern PetscErrorCode CheckError(Vec,Vec,Vec,PetscInt,PetscReal,PetscLogEvent);
 28: extern PetscErrorCode MyKSPMonitor(KSP,PetscInt,PetscReal,void*);

 30: int main(int argc,char **args)
 31: {
 32:   Vec            x1,b1,x2,b2; /* solution and RHS vectors for systems #1 and #2 */
 33:   Vec            u;              /* exact solution vector */
 34:   Mat            C1,C2;         /* matrices for systems #1 and #2 */
 35:   KSP            ksp1,ksp2;   /* KSP contexts for systems #1 and #2 */
 36:   PetscInt       ntimes = 3;     /* number of times to solve the linear systems */
 37:   PetscLogEvent  CHECK_ERROR;    /* event number for error checking */
 38:   PetscInt       ldim,low,high,iglobal,Istart,Iend,Istart2,Iend2;
 39:   PetscInt       Ii,J,i,j,m = 3,n = 2,its,t;
 40:   PetscBool      flg = PETSC_FALSE, unsym = PETSC_TRUE;
 41:   PetscScalar    v;
 42:   PetscMPIInt    rank,size;
 43: #if defined(PETSC_USE_LOG)
 44:   PetscLogStage stages[3];
 45: #endif

 47:   PetscInitialize(&argc,&args,(char*)0,help);
 48:   PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
 49:   PetscOptionsGetInt(NULL,NULL,"-t",&ntimes,NULL);
 50:   PetscOptionsGetBool(NULL,NULL,"-unsym",&unsym,NULL);
 51:   MPI_Comm_rank(PETSC_COMM_WORLD,&rank);
 52:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
 53:   n    = 2*size;

 55:   /*
 56:      Register various stages for profiling
 57:   */
 58:   PetscLogStageRegister("Prelim setup",&stages[0]);
 59:   PetscLogStageRegister("Linear System 1",&stages[1]);
 60:   PetscLogStageRegister("Linear System 2",&stages[2]);

 62:   /*
 63:      Register a user-defined event for profiling (error checking).
 64:   */
 65:   CHECK_ERROR = 0;
 66:   PetscLogEventRegister("Check Error",KSP_CLASSID,&CHECK_ERROR);

 68:   /* - - - - - - - - - - - - Stage 0: - - - - - - - - - - - - - -
 69:                         Preliminary Setup
 70:      - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

 72:   PetscLogStagePush(stages[0]);

 74:   /*
 75:      Create data structures for first linear system.
 76:       - Create parallel matrix, specifying only its global dimensions.
 77:         When using MatCreate(), the matrix format can be specified at
 78:         runtime. Also, the parallel partitioning of the matrix is
 79:         determined by PETSc at runtime.
 80:       - Create parallel vectors.
 81:         - When using VecSetSizes(), we specify only the vector's global
 82:           dimension; the parallel partitioning is determined at runtime.
 83:         - Note: We form 1 vector from scratch and then duplicate as needed.
 84:   */
 85:   MatCreate(PETSC_COMM_WORLD,&C1);
 86:   MatSetSizes(C1,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
 87:   MatSetFromOptions(C1);
 88:   MatSetUp(C1);
 89:   MatGetOwnershipRange(C1,&Istart,&Iend);
 90:   VecCreate(PETSC_COMM_WORLD,&u);
 91:   VecSetSizes(u,PETSC_DECIDE,m*n);
 92:   VecSetFromOptions(u);
 93:   VecDuplicate(u,&b1);
 94:   VecDuplicate(u,&x1);

 96:   /*
 97:      Create first linear solver context.
 98:      Set runtime options (e.g., -pc_type <type>).
 99:      Note that the first linear system uses the default option
100:      names, while the second linear system uses a different
101:      options prefix.
102:   */
103:   KSPCreate(PETSC_COMM_WORLD,&ksp1);
104:   KSPSetFromOptions(ksp1);

106:   /*
107:      Set user-defined monitoring routine for first linear system.
108:   */
109:   PetscOptionsGetBool(NULL,NULL,"-my_ksp_monitor",&flg,NULL);
110:   if (flg) KSPMonitorSet(ksp1,MyKSPMonitor,NULL,0);

112:   /*
113:      Create data structures for second linear system.
114:   */
115:   MatCreate(PETSC_COMM_WORLD,&C2);
116:   MatSetSizes(C2,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
117:   MatSetFromOptions(C2);
118:   MatSetUp(C2);
119:   MatGetOwnershipRange(C2,&Istart2,&Iend2);
120:   VecDuplicate(u,&b2);
121:   VecDuplicate(u,&x2);

123:   /*
124:      Create second linear solver context
125:   */
126:   KSPCreate(PETSC_COMM_WORLD,&ksp2);

128:   /*
129:      Set different options prefix for second linear system.
130:      Set runtime options (e.g., -s2_pc_type <type>)
131:   */
132:   KSPAppendOptionsPrefix(ksp2,"s2_");
133:   KSPSetFromOptions(ksp2);

135:   /*
136:      Assemble exact solution vector in parallel.  Note that each
137:      processor needs to set only its local part of the vector.
138:   */
139:   VecGetLocalSize(u,&ldim);
140:   VecGetOwnershipRange(u,&low,&high);
141:   for (i=0; i<ldim; i++) {
142:     iglobal = i + low;
143:     v       = (PetscScalar)(i + 100*rank);
144:     VecSetValues(u,1,&iglobal,&v,ADD_VALUES);
145:   }
146:   VecAssemblyBegin(u);
147:   VecAssemblyEnd(u);

149:   /*
150:      Log the number of flops for computing vector entries
151:   */
152:   PetscLogFlops(2.0*ldim);

154:   /*
155:      End curent profiling stage
156:   */
157:   PetscLogStagePop();

159:   /* --------------------------------------------------------------
160:                         Linear solver loop:
161:       Solve 2 different linear systems several times in succession
162:      -------------------------------------------------------------- */

164:   for (t=0; t<ntimes; t++) {

166:     /* - - - - - - - - - - - - Stage 1: - - - - - - - - - - - - - -
167:                  Assemble and solve first linear system
168:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

170:     /*
171:        Begin profiling stage #1
172:     */
173:     PetscLogStagePush(stages[1]);

175:     /*
176:        Initialize all matrix entries to zero.  MatZeroEntries() retains
177:        the nonzero structure of the matrix for sparse formats.
178:     */
179:     if (t > 0) MatZeroEntries(C1);

181:     /*
182:        Set matrix entries in parallel.  Also, log the number of flops
183:        for computing matrix entries.
184:         - Each processor needs to insert only elements that it owns
185:           locally (but any non-local elements will be sent to the
186:           appropriate processor during matrix assembly).
187:         - Always specify global row and columns of matrix entries.
188:     */
189:     for (Ii=Istart; Ii<Iend; Ii++) {
190:       v = -1.0; i = Ii/n; j = Ii - i*n;
191:       if (i>0)   {J = Ii - n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
192:       if (i<m-1) {J = Ii + n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
193:       if (j>0)   {J = Ii - 1; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
194:       if (j<n-1) {J = Ii + 1; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
195:       v = 4.0; MatSetValues(C1,1,&Ii,1,&Ii,&v,ADD_VALUES);
196:     }
197:     if (unsym) {
198:       for (Ii=Istart; Ii<Iend; Ii++) { /* Make matrix nonsymmetric */
199:         v = -1.0*(t+0.5); i = Ii/n;
200:         if (i>0)   {J = Ii - n; MatSetValues(C1,1,&Ii,1,&J,&v,ADD_VALUES);}
201:       }
202:       PetscLogFlops(2.0*(Iend-Istart));
203:     }

205:     /*
206:        Assemble matrix, using the 2-step process:
207:          MatAssemblyBegin(), MatAssemblyEnd()
208:        Computations can be done while messages are in transition
209:        by placing code between these two statements.
210:     */
211:     MatAssemblyBegin(C1,MAT_FINAL_ASSEMBLY);
212:     MatAssemblyEnd(C1,MAT_FINAL_ASSEMBLY);

214:     /*
215:        Indicate same nonzero structure of successive linear system matrices
216:     */
217:     MatSetOption(C1,MAT_NEW_NONZERO_LOCATIONS,PETSC_TRUE);

219:     /*
220:        Compute right-hand-side vector
221:     */
222:     MatMult(C1,u,b1);

224:     /*
225:        Set operators. Here the matrix that defines the linear system
226:        also serves as the preconditioning matrix.
227:     */
228:     KSPSetOperators(ksp1,C1,C1);

230:     /*
231:        Use the previous solution of linear system #1 as the initial
232:        guess for the next solve of linear system #1.  The user MUST
233:        call KSPSetInitialGuessNonzero() in indicate use of an initial
234:        guess vector; otherwise, an initial guess of zero is used.
235:     */
236:     if (t>0) {
237:       KSPSetInitialGuessNonzero(ksp1,PETSC_TRUE);
238:     }

240:     /*
241:        Solve the first linear system.  Here we explicitly call
242:        KSPSetUp() for more detailed performance monitoring of
243:        certain preconditioners, such as ICC and ILU.  This call
244:        is optional, ase KSPSetUp() will automatically be called
245:        within KSPSolve() if it hasn't been called already.
246:     */
247:     KSPSetUp(ksp1);
248:     KSPSolve(ksp1,b1,x1);
249:     KSPGetIterationNumber(ksp1,&its);

251:     /*
252:        Check error of solution to first linear system
253:     */
254:     CheckError(u,x1,b1,its,1.e-4,CHECK_ERROR);

256:     /* - - - - - - - - - - - - Stage 2: - - - - - - - - - - - - - -
257:                  Assemble and solve second linear system
258:        - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

260:     /*
261:        Conclude profiling stage #1; begin profiling stage #2
262:     */
263:     PetscLogStagePop();
264:     PetscLogStagePush(stages[2]);

266:     /*
267:        Initialize all matrix entries to zero
268:     */
269:     if (t > 0) MatZeroEntries(C2);

271:     /*
272:        Assemble matrix in parallel. Also, log the number of flops
273:        for computing matrix entries.
274:         - To illustrate the features of parallel matrix assembly, we
275:           intentionally set the values differently from the way in
276:           which the matrix is distributed across the processors.  Each
277:           entry that is not owned locally will be sent to the appropriate
278:           processor during MatAssemblyBegin() and MatAssemblyEnd().
279:         - For best efficiency the user should strive to set as many
280:           entries locally as possible.
281:      */
282:     for (i=0; i<m; i++) {
283:       for (j=2*rank; j<2*rank+2; j++) {
284:         v = -1.0;  Ii = j + n*i;
285:         if (i>0)   {J = Ii - n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
286:         if (i<m-1) {J = Ii + n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
287:         if (j>0)   {J = Ii - 1; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
288:         if (j<n-1) {J = Ii + 1; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
289:         v = 6.0 + t*0.5; MatSetValues(C2,1,&Ii,1,&Ii,&v,ADD_VALUES);
290:       }
291:     }
292:     if (unsym) {
293:       for (Ii=Istart2; Ii<Iend2; Ii++) { /* Make matrix nonsymmetric */
294:         v = -1.0*(t+0.5); i = Ii/n;
295:         if (i>0)   {J = Ii - n; MatSetValues(C2,1,&Ii,1,&J,&v,ADD_VALUES);}
296:       }
297:     }
298:     MatAssemblyBegin(C2,MAT_FINAL_ASSEMBLY);
299:     MatAssemblyEnd(C2,MAT_FINAL_ASSEMBLY);
300:     PetscLogFlops(2.0*(Iend-Istart));

302:     /*
303:        Indicate same nonzero structure of successive linear system matrices
304:     */
305:     MatSetOption(C2,MAT_NEW_NONZERO_LOCATIONS,PETSC_FALSE);

307:     /*
308:        Compute right-hand-side vector
309:     */
310:     MatMult(C2,u,b2);

312:     /*
313:        Set operators. Here the matrix that defines the linear system
314:        also serves as the preconditioning matrix.  Indicate same nonzero
315:        structure of successive preconditioner matrices by setting flag
316:        SAME_NONZERO_PATTERN.
317:     */
318:     KSPSetOperators(ksp2,C2,C2);

320:     /*
321:        Solve the second linear system
322:     */
323:     KSPSetUp(ksp2);
324:     KSPSolve(ksp2,b2,x2);
325:     KSPGetIterationNumber(ksp2,&its);

327:     /*
328:        Check error of solution to second linear system
329:     */
330:     CheckError(u,x2,b2,its,1.e-4,CHECK_ERROR);

332:     /*
333:        Conclude profiling stage #2
334:     */
335:     PetscLogStagePop();
336:   }
337:   /* --------------------------------------------------------------
338:                        End of linear solver loop
339:      -------------------------------------------------------------- */

341:   /*
342:      Free work space.  All PETSc objects should be destroyed when they
343:      are no longer needed.
344:   */
345:   KSPDestroy(&ksp1)); PetscCall(KSPDestroy(&ksp2);
346:   VecDestroy(&x1));   PetscCall(VecDestroy(&x2);
347:   VecDestroy(&b1));   PetscCall(VecDestroy(&b2);
348:   MatDestroy(&C1));   PetscCall(MatDestroy(&C2);
349:   VecDestroy(&u);

351:   PetscFinalize();
352:   return 0;
353: }
354: /* ------------------------------------------------------------- */
355: /*
356:     CheckError - Checks the error of the solution.

358:     Input Parameters:
359:     u - exact solution
360:     x - approximate solution
361:     b - work vector
362:     its - number of iterations for convergence
363:     tol - tolerance
364:     CHECK_ERROR - the event number for error checking
365:                   (for use with profiling)

367:     Notes:
368:     In order to profile this section of code separately from the
369:     rest of the program, we register it as an "event" with
370:     PetscLogEventRegister() in the main program.  Then, we indicate
371:     the start and end of this event by respectively calling
372:         PetscLogEventBegin(CHECK_ERROR,u,x,b,0);
373:         PetscLogEventEnd(CHECK_ERROR,u,x,b,0);
374:     Here, we specify the objects most closely associated with
375:     the event (the vectors u,x,b).  Such information is optional;
376:     we could instead just use 0 instead for all objects.
377: */
378: PetscErrorCode CheckError(Vec u,Vec x,Vec b,PetscInt its,PetscReal tol,PetscLogEvent CHECK_ERROR)
379: {
380:   PetscScalar    none = -1.0;
381:   PetscReal      norm;

383:   PetscLogEventBegin(CHECK_ERROR,u,x,b,0);

385:   /*
386:      Compute error of the solution, using b as a work vector.
387:   */
388:   VecCopy(x,b);
389:   VecAXPY(b,none,u);
390:   VecNorm(b,NORM_2,&norm);
391:   if (norm > tol) {
392:     PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g, Iterations %D\n",(double)norm,its);
393:   }
394:   PetscLogEventEnd(CHECK_ERROR,u,x,b,0);
395:   return 0;
396: }
397: /* ------------------------------------------------------------- */
398: /*
399:    MyKSPMonitor - This is a user-defined routine for monitoring
400:    the KSP iterative solvers.

402:    Input Parameters:
403:      ksp   - iterative context
404:      n     - iteration number
405:      rnorm - 2-norm (preconditioned) residual value (may be estimated)
406:      dummy - optional user-defined monitor context (unused here)
407: */
408: PetscErrorCode MyKSPMonitor(KSP ksp,PetscInt n,PetscReal rnorm,void *dummy)
409: {
410:   Vec            x;

412:   /*
413:      Build the solution vector
414:   */
415:   KSPBuildSolution(ksp,NULL,&x);

417:   /*
418:      Write the solution vector and residual norm to stdout.
419:       - PetscPrintf() handles output for multiprocessor jobs
420:         by printing from only one processor in the communicator.
421:       - The parallel viewer PETSC_VIEWER_STDOUT_WORLD handles
422:         data from multiple processors so that the output
423:         is not jumbled.
424:   */
425:   PetscPrintf(PETSC_COMM_WORLD,"iteration %D solution vector:\n",n);
426:   VecView(x,PETSC_VIEWER_STDOUT_WORLD);
427:   PetscPrintf(PETSC_COMM_WORLD,"iteration %D KSP Residual norm %14.12e \n",n,rnorm);
428:   return 0;
429: }

431: /*TEST

433:    test:
434:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -ksp_gmres_cgs_refinement_type refine_always -s2_ksp_type bcgs -s2_pc_type jacobi -s2_ksp_monitor_short

436:    test:
437:       requires: hpddm
438:       suffix: hpddm
439:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type {{gmres hpddm}} -s2_ksp_type {{gmres hpddm}} -s2_pc_type jacobi -s2_ksp_monitor_short

441:    test:
442:       requires: hpddm
443:       suffix: hpddm_2
444:       args: -t 2 -pc_type jacobi -ksp_monitor_short -ksp_type gmres -s2_ksp_type hpddm -s2_ksp_hpddm_type gcrodr -s2_ksp_hpddm_recycle 10 -s2_pc_type jacobi -s2_ksp_monitor_short

446:    testset:
447:       requires: hpddm
448:       output_file: output/ex9_hpddm_cg.out
449:       args: -unsym 0 -t 2 -pc_type jacobi -ksp_monitor_short -s2_pc_type jacobi -s2_ksp_monitor_short -ksp_rtol 1.e-2 -s2_ksp_rtol 1.e-2
450:       test:
451:          suffix: hpddm_cg_p_p
452:          args: -ksp_type cg -s2_ksp_type cg
453:       test:
454:          suffix: hpddm_cg_p_h
455:          args: -ksp_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}
456:       test:
457:          suffix: hpddm_cg_h_h
458:          args: -ksp_type hpddm -ksp_hpddm_type cg -s2_ksp_type hpddm -s2_ksp_hpddm_type {{cg bcg bfbcg}shared output}

460: TEST*/