Actual source code: ex2.c
2: static char help[] = "Solves a linear system in parallel with KSP.\n\
3: Input parameters include:\n\
4: -view_exact_sol : write exact solution vector to stdout\n\
5: -m <mesh_x> : number of mesh points in x-direction\n\
6: -n <mesh_y> : number of mesh points in y-direction\n\n";
8: /*T
9: Concepts: KSP^basic parallel example;
10: Concepts: KSP^Laplacian, 2d
11: Concepts: Laplacian, 2d
12: Processors: n
13: T*/
15: /*
16: Include "petscksp.h" so that we can use KSP solvers.
17: */
18: #include <petscksp.h>
20: int main(int argc,char **args)
21: {
22: Vec x,b,u; /* approx solution, RHS, exact solution */
23: Mat A; /* linear system matrix */
24: KSP ksp; /* linear solver context */
25: PetscReal norm; /* norm of solution error */
26: PetscInt i,j,Ii,J,Istart,Iend,m = 8,n = 7,its;
27: PetscBool flg;
28: PetscScalar v;
30: PetscInitialize(&argc,&args,(char*)0,help);
31: PetscOptionsGetInt(NULL,NULL,"-m",&m,NULL);
32: PetscOptionsGetInt(NULL,NULL,"-n",&n,NULL);
33: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
34: Compute the matrix and right-hand-side vector that define
35: the linear system, Ax = b.
36: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
37: /*
38: Create parallel matrix, specifying only its global dimensions.
39: When using MatCreate(), the matrix format can be specified at
40: runtime. Also, the parallel partitioning of the matrix is
41: determined by PETSc at runtime.
43: Performance tuning note: For problems of substantial size,
44: preallocation of matrix memory is crucial for attaining good
45: performance. See the matrix chapter of the users manual for details.
46: */
47: MatCreate(PETSC_COMM_WORLD,&A);
48: MatSetSizes(A,PETSC_DECIDE,PETSC_DECIDE,m*n,m*n);
49: MatSetFromOptions(A);
50: MatMPIAIJSetPreallocation(A,5,NULL,5,NULL);
51: MatSeqAIJSetPreallocation(A,5,NULL);
52: MatSeqSBAIJSetPreallocation(A,1,5,NULL);
53: MatMPISBAIJSetPreallocation(A,1,5,NULL,5,NULL);
54: MatMPISELLSetPreallocation(A,5,NULL,5,NULL);
55: MatSeqSELLSetPreallocation(A,5,NULL);
57: /*
58: Currently, all PETSc parallel matrix formats are partitioned by
59: contiguous chunks of rows across the processors. Determine which
60: rows of the matrix are locally owned.
61: */
62: MatGetOwnershipRange(A,&Istart,&Iend);
64: /*
65: Set matrix elements for the 2-D, five-point stencil in parallel.
66: - Each processor needs to insert only elements that it owns
67: locally (but any non-local elements will be sent to the
68: appropriate processor during matrix assembly).
69: - Always specify global rows and columns of matrix entries.
71: Note: this uses the less common natural ordering that orders first
72: all the unknowns for x = h then for x = 2h etc; Hence you see J = Ii +- n
73: instead of J = I +- m as you might expect. The more standard ordering
74: would first do all variables for y = h, then y = 2h etc.
76: */
77: for (Ii=Istart; Ii<Iend; Ii++) {
78: v = -1.0; i = Ii/n; j = Ii - i*n;
79: if (i>0) {J = Ii - n; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
80: if (i<m-1) {J = Ii + n; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
81: if (j>0) {J = Ii - 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
82: if (j<n-1) {J = Ii + 1; MatSetValues(A,1,&Ii,1,&J,&v,ADD_VALUES);}
83: v = 4.0; MatSetValues(A,1,&Ii,1,&Ii,&v,ADD_VALUES);
84: }
86: /*
87: Assemble matrix, using the 2-step process:
88: MatAssemblyBegin(), MatAssemblyEnd()
89: Computations can be done while messages are in transition
90: by placing code between these two statements.
91: */
92: MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);
93: MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);
95: /* A is symmetric. Set symmetric flag to enable ICC/Cholesky preconditioner */
96: MatSetOption(A,MAT_SYMMETRIC,PETSC_TRUE);
98: /*
99: Create parallel vectors.
100: - We form 1 vector from scratch and then duplicate as needed.
101: - When using VecCreate(), VecSetSizes and VecSetFromOptions()
102: in this example, we specify only the
103: vector's global dimension; the parallel partitioning is determined
104: at runtime.
105: - When solving a linear system, the vectors and matrices MUST
106: be partitioned accordingly. PETSc automatically generates
107: appropriately partitioned matrices and vectors when MatCreate()
108: and VecCreate() are used with the same communicator.
109: - The user can alternatively specify the local vector and matrix
110: dimensions when more sophisticated partitioning is needed
111: (replacing the PETSC_DECIDE argument in the VecSetSizes() statement
112: below).
113: */
114: VecCreate(PETSC_COMM_WORLD,&u);
115: VecSetSizes(u,PETSC_DECIDE,m*n);
116: VecSetFromOptions(u);
117: VecDuplicate(u,&b);
118: VecDuplicate(b,&x);
120: /*
121: Set exact solution; then compute right-hand-side vector.
122: By default we use an exact solution of a vector with all
123: elements of 1.0;
124: */
125: VecSet(u,1.0);
126: MatMult(A,u,b);
128: /*
129: View the exact solution vector if desired
130: */
131: flg = PETSC_FALSE;
132: PetscOptionsGetBool(NULL,NULL,"-view_exact_sol",&flg,NULL);
133: if (flg) VecView(u,PETSC_VIEWER_STDOUT_WORLD);
135: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
136: Create the linear solver and set various options
137: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
138: KSPCreate(PETSC_COMM_WORLD,&ksp);
140: /*
141: Set operators. Here the matrix that defines the linear system
142: also serves as the preconditioning matrix.
143: */
144: KSPSetOperators(ksp,A,A);
146: /*
147: Set linear solver defaults for this problem (optional).
148: - By extracting the KSP and PC contexts from the KSP context,
149: we can then directly call any KSP and PC routines to set
150: various options.
151: - The following two statements are optional; all of these
152: parameters could alternatively be specified at runtime via
153: KSPSetFromOptions(). All of these defaults can be
154: overridden at runtime, as indicated below.
155: */
156: KSPSetTolerances(ksp,1.e-2/((m+1)*(n+1)),1.e-50,PETSC_DEFAULT,PETSC_DEFAULT);
158: /*
159: Set runtime options, e.g.,
160: -ksp_type <type> -pc_type <type> -ksp_monitor -ksp_rtol <rtol>
161: These options will override those specified above as long as
162: KSPSetFromOptions() is called _after_ any other customization
163: routines.
164: */
165: KSPSetFromOptions(ksp);
167: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
168: Solve the linear system
169: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
171: KSPSolve(ksp,b,x);
173: /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
174: Check the solution and clean up
175: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
176: VecAXPY(x,-1.0,u);
177: VecNorm(x,NORM_2,&norm);
178: KSPGetIterationNumber(ksp,&its);
180: /*
181: Print convergence information. PetscPrintf() produces a single
182: print statement from all processes that share a communicator.
183: An alternative is PetscFPrintf(), which prints to a file.
184: */
185: PetscPrintf(PETSC_COMM_WORLD,"Norm of error %g iterations %D\n",(double)norm,its);
187: /*
188: Free work space. All PETSc objects should be destroyed when they
189: are no longer needed.
190: */
191: KSPDestroy(&ksp);
192: VecDestroy(&u)); PetscCall(VecDestroy(&x);
193: VecDestroy(&b)); PetscCall(MatDestroy(&A);
195: /*
196: Always call PetscFinalize() before exiting a program. This routine
197: - finalizes the PETSc libraries as well as MPI
198: - provides summary and diagnostic information if certain runtime
199: options are chosen (e.g., -log_view).
200: */
201: PetscFinalize();
202: return 0;
203: }
205: /*TEST
207: build:
208: requires: !single
210: test:
211: suffix: chebyest_1
212: args: -m 80 -n 80 -ksp_pc_side right -pc_type ksp -ksp_ksp_type chebyshev -ksp_ksp_max_it 5 -ksp_ksp_chebyshev_esteig 0.9,0,0,1.1 -ksp_monitor_short
214: test:
215: suffix: chebyest_2
216: args: -m 80 -n 80 -ksp_pc_side right -pc_type ksp -ksp_ksp_type chebyshev -ksp_ksp_max_it 5 -ksp_ksp_chebyshev_esteig 0.9,0,0,1.1 -ksp_esteig_ksp_type cg -ksp_monitor_short
218: test:
219: args: -ksp_monitor_short -m 5 -n 5 -ksp_gmres_cgs_refinement_type refine_always
221: test:
222: suffix: 2
223: nsize: 2
224: args: -ksp_monitor_short -m 5 -n 5 -ksp_gmres_cgs_refinement_type refine_always
226: test:
227: suffix: 3
228: args: -pc_type sor -pc_sor_symmetric -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
230: test:
231: suffix: 4
232: args: -pc_type eisenstat -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always
234: test:
235: suffix: 5
236: nsize: 2
237: args: -ksp_monitor_short -m 5 -n 5 -mat_view draw -ksp_gmres_cgs_refinement_type refine_always -nox
238: output_file: output/ex2_2.out
240: test:
241: suffix: bjacobi
242: nsize: 4
243: args: -pc_type bjacobi -pc_bjacobi_blocks 1 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres
245: test:
246: suffix: bjacobi_2
247: nsize: 4
248: args: -pc_type bjacobi -pc_bjacobi_blocks 2 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres -ksp_view
250: test:
251: suffix: bjacobi_3
252: nsize: 4
253: args: -pc_type bjacobi -pc_bjacobi_blocks 4 -ksp_monitor_short -sub_pc_type jacobi -sub_ksp_type gmres
255: test:
256: suffix: qmrcgs
257: args: -ksp_type qmrcgs -pc_type ilu
258: output_file: output/ex2_fbcgs.out
260: test:
261: suffix: qmrcgs_2
262: nsize: 3
263: args: -ksp_type qmrcgs -pc_type bjacobi
264: output_file: output/ex2_fbcgs_2.out
266: test:
267: suffix: fbcgs
268: args: -ksp_type fbcgs -pc_type ilu
270: test:
271: suffix: fbcgs_2
272: nsize: 3
273: args: -ksp_type fbcgsr -pc_type bjacobi
275: test:
276: suffix: groppcg
277: args: -ksp_monitor_short -ksp_type groppcg -m 9 -n 9
279: test:
280: suffix: mkl_pardiso_cholesky
281: requires: mkl_pardiso
282: args: -ksp_type preonly -pc_type cholesky -mat_type sbaij -pc_factor_mat_solver_type mkl_pardiso
284: test:
285: suffix: mkl_pardiso_lu
286: requires: mkl_pardiso
287: args: -ksp_type preonly -pc_type lu -pc_factor_mat_solver_type mkl_pardiso
289: test:
290: suffix: pipebcgs
291: args: -ksp_monitor_short -ksp_type pipebcgs -m 9 -n 9
293: test:
294: suffix: pipecg
295: args: -ksp_monitor_short -ksp_type pipecg -m 9 -n 9
297: test:
298: suffix: pipecgrr
299: args: -ksp_monitor_short -ksp_type pipecgrr -m 9 -n 9
301: test:
302: suffix: pipecr
303: args: -ksp_monitor_short -ksp_type pipecr -m 9 -n 9
305: test:
306: suffix: pipelcg
307: args: -ksp_monitor_short -ksp_type pipelcg -m 9 -n 9 -pc_type none -ksp_pipelcg_pipel 2 -ksp_pipelcg_lmax 2
308: filter: grep -v "sqrt breakdown in iteration"
310: test:
311: suffix: sell
312: args: -ksp_monitor_short -ksp_gmres_cgs_refinement_type refine_always -m 9 -n 9 -mat_type sell
314: test:
315: requires: mumps
316: suffix: sell_mumps
317: args: -ksp_type preonly -m 9 -n 12 -mat_type sell -pc_type lu -pc_factor_mat_solver_type mumps -pc_factor_mat_ordering_type natural
319: test:
320: suffix: telescope
321: nsize: 4
322: args: -m 100 -n 100 -ksp_converged_reason -pc_type telescope -pc_telescope_reduction_factor 4 -telescope_pc_type bjacobi
324: test:
325: suffix: umfpack
326: requires: suitesparse
327: args: -ksp_type preonly -pc_type lu -pc_factor_mat_solver_type umfpack
329: test:
330: suffix: spqr
331: requires: suitesparse
332: args: -ksp_type preonly -pc_type qr -pc_factor_mat_solver_type spqr
334: test:
335: suffix: pc_symmetric
336: args: -m 10 -n 9 -ksp_converged_reason -ksp_type gmres -ksp_pc_side symmetric -pc_type cholesky
338: test:
339: suffix: pipeprcg
340: args: -ksp_monitor_short -ksp_type pipeprcg -m 9 -n 9
342: test:
343: suffix: pipeprcg_rcw
344: args: -ksp_monitor_short -ksp_type pipeprcg -recompute_w false -m 9 -n 9
346: test:
347: suffix: pipecg2
348: args: -ksp_monitor_short -ksp_type pipecg2 -m 9 -n 9 -ksp_norm_type {{preconditioned unpreconditioned natural}}
350: test:
351: suffix: pipecg2_2
352: nsize: 4
353: args: -ksp_monitor_short -ksp_type pipecg2 -m 15 -n 9 -ksp_norm_type {{preconditioned unpreconditioned natural}}
355: test:
356: suffix: hpddm
357: nsize: 4
358: requires: hpddm
359: filter: sed -e "s/ iterations 9/ iterations 8/g"
360: args: -ksp_converged_reason -ksp_type hpddm -ksp_hpddm_precision {{single double}shared output} -ksp_pc_side {{left right}shared output}
362: TEST*/