Actual source code: bddcschurs.c
1: #include <../src/ksp/pc/impls/bddc/bddc.h>
2: #include <../src/ksp/pc/impls/bddc/bddcprivate.h>
3: #include <../src/mat/impls/dense/seq/dense.h>
4: #include <petscblaslapack.h>
6: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt*,PetscInt,PetscBT,PetscInt*,PetscInt*,PetscInt*);
7: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat,PetscBool,MatReuse,Mat*);
8: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC,Vec,Vec);
9: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC,Vec,Vec);
11: /* if v2 is not present, correction is done in-place */
12: PetscErrorCode PCBDDCReuseSolversBenignAdapt(PCBDDCReuseSolvers ctx, Vec v, Vec v2, PetscBool sol, PetscBool full)
13: {
14: PetscScalar *array;
15: PetscScalar *array2;
17: if (!ctx->benign_n) return 0;
18: if (sol && full) {
19: PetscInt n_I,size_schur;
21: /* get sizes */
22: MatGetSize(ctx->benign_csAIB,&size_schur,NULL);
23: VecGetSize(v,&n_I);
24: n_I = n_I - size_schur;
25: /* get schur sol from array */
26: VecGetArray(v,&array);
27: VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I);
28: VecRestoreArray(v,&array);
29: /* apply interior sol correction */
30: MatMultTranspose(ctx->benign_csAIB,ctx->benign_dummy_schur_vec,ctx->benign_corr_work);
31: VecResetArray(ctx->benign_dummy_schur_vec);
32: MatMultAdd(ctx->benign_AIIm1ones,ctx->benign_corr_work,v,v);
33: }
34: if (v2) {
35: PetscInt nl;
37: VecGetArrayRead(v,(const PetscScalar**)&array);
38: VecGetLocalSize(v2,&nl);
39: VecGetArray(v2,&array2);
40: PetscArraycpy(array2,array,nl);
41: } else {
42: VecGetArray(v,&array);
43: array2 = array;
44: }
45: if (!sol) { /* change rhs */
46: PetscInt n;
47: for (n=0;n<ctx->benign_n;n++) {
48: PetscScalar sum = 0.;
49: const PetscInt *cols;
50: PetscInt nz,i;
52: ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz);
53: ISGetIndices(ctx->benign_zerodiag_subs[n],&cols);
54: for (i=0;i<nz-1;i++) sum += array[cols[i]];
55: #if defined(PETSC_USE_COMPLEX)
56: sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
57: #else
58: sum = -sum/nz;
59: #endif
60: for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
61: ctx->benign_save_vals[n] = array2[cols[nz-1]];
62: array2[cols[nz-1]] = sum;
63: ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols);
64: }
65: } else {
66: PetscInt n;
67: for (n=0;n<ctx->benign_n;n++) {
68: PetscScalar sum = 0.;
69: const PetscInt *cols;
70: PetscInt nz,i;
71: ISGetLocalSize(ctx->benign_zerodiag_subs[n],&nz);
72: ISGetIndices(ctx->benign_zerodiag_subs[n],&cols);
73: for (i=0;i<nz-1;i++) sum += array[cols[i]];
74: #if defined(PETSC_USE_COMPLEX)
75: sum = -(PetscRealPart(sum)/nz + PETSC_i*(PetscImaginaryPart(sum)/nz));
76: #else
77: sum = -sum/nz;
78: #endif
79: for (i=0;i<nz-1;i++) array2[cols[i]] += sum;
80: array2[cols[nz-1]] = ctx->benign_save_vals[n];
81: ISRestoreIndices(ctx->benign_zerodiag_subs[n],&cols);
82: }
83: }
84: if (v2) {
85: VecRestoreArrayRead(v,(const PetscScalar**)&array);
86: VecRestoreArray(v2,&array2);
87: } else {
88: VecRestoreArray(v,&array);
89: }
90: if (!sol && full) {
91: Vec usedv;
92: PetscInt n_I,size_schur;
94: /* get sizes */
95: MatGetSize(ctx->benign_csAIB,&size_schur,NULL);
96: VecGetSize(v,&n_I);
97: n_I = n_I - size_schur;
98: /* compute schur rhs correction */
99: if (v2) {
100: usedv = v2;
101: } else {
102: usedv = v;
103: }
104: /* apply schur rhs correction */
105: MatMultTranspose(ctx->benign_AIIm1ones,usedv,ctx->benign_corr_work);
106: VecGetArrayRead(usedv,(const PetscScalar**)&array);
107: VecPlaceArray(ctx->benign_dummy_schur_vec,array+n_I);
108: VecRestoreArrayRead(usedv,(const PetscScalar**)&array);
109: MatMultAdd(ctx->benign_csAIB,ctx->benign_corr_work,ctx->benign_dummy_schur_vec,ctx->benign_dummy_schur_vec);
110: VecResetArray(ctx->benign_dummy_schur_vec);
111: }
112: return 0;
113: }
115: static PetscErrorCode PCBDDCReuseSolvers_Solve_Private(PC pc, Vec rhs, Vec sol, PetscBool transpose, PetscBool full)
116: {
117: PCBDDCReuseSolvers ctx;
118: PetscBool copy = PETSC_FALSE;
120: PCShellGetContext(pc,&ctx);
121: if (full) {
122: #if defined(PETSC_HAVE_MUMPS)
123: MatMumpsSetIcntl(ctx->F,26,-1);
124: #endif
125: #if defined(PETSC_HAVE_MKL_PARDISO)
126: MatMkl_PardisoSetCntl(ctx->F,70,0);
127: #endif
128: copy = ctx->has_vertices;
129: } else { /* interior solver */
130: #if defined(PETSC_HAVE_MUMPS)
131: MatMumpsSetIcntl(ctx->F,26,0);
132: #endif
133: #if defined(PETSC_HAVE_MKL_PARDISO)
134: MatMkl_PardisoSetCntl(ctx->F,70,1);
135: #endif
136: copy = PETSC_TRUE;
137: }
138: /* copy rhs into factored matrix workspace */
139: if (copy) {
140: PetscInt n;
141: PetscScalar *array,*array_solver;
143: VecGetLocalSize(rhs,&n);
144: VecGetArrayRead(rhs,(const PetscScalar**)&array);
145: VecGetArray(ctx->rhs,&array_solver);
146: PetscArraycpy(array_solver,array,n);
147: VecRestoreArray(ctx->rhs,&array_solver);
148: VecRestoreArrayRead(rhs,(const PetscScalar**)&array);
150: PCBDDCReuseSolversBenignAdapt(ctx,ctx->rhs,NULL,PETSC_FALSE,full);
151: if (transpose) {
152: MatSolveTranspose(ctx->F,ctx->rhs,ctx->sol);
153: } else {
154: MatSolve(ctx->F,ctx->rhs,ctx->sol);
155: }
156: PCBDDCReuseSolversBenignAdapt(ctx,ctx->sol,NULL,PETSC_TRUE,full);
158: /* get back data to caller worskpace */
159: VecGetArrayRead(ctx->sol,(const PetscScalar**)&array_solver);
160: VecGetArray(sol,&array);
161: PetscArraycpy(array,array_solver,n);
162: VecRestoreArray(sol,&array);
163: VecRestoreArrayRead(ctx->sol,(const PetscScalar**)&array_solver);
164: } else {
165: if (ctx->benign_n) {
166: PCBDDCReuseSolversBenignAdapt(ctx,rhs,ctx->rhs,PETSC_FALSE,full);
167: if (transpose) {
168: MatSolveTranspose(ctx->F,ctx->rhs,sol);
169: } else {
170: MatSolve(ctx->F,ctx->rhs,sol);
171: }
172: PCBDDCReuseSolversBenignAdapt(ctx,sol,NULL,PETSC_TRUE,full);
173: } else {
174: if (transpose) {
175: MatSolveTranspose(ctx->F,rhs,sol);
176: } else {
177: MatSolve(ctx->F,rhs,sol);
178: }
179: }
180: }
181: /* restore defaults */
182: #if defined(PETSC_HAVE_MUMPS)
183: MatMumpsSetIcntl(ctx->F,26,-1);
184: #endif
185: #if defined(PETSC_HAVE_MKL_PARDISO)
186: MatMkl_PardisoSetCntl(ctx->F,70,0);
187: #endif
188: return 0;
189: }
191: static PetscErrorCode PCBDDCReuseSolvers_Correction(PC pc, Vec rhs, Vec sol)
192: {
193: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_TRUE);
194: return 0;
195: }
197: static PetscErrorCode PCBDDCReuseSolvers_CorrectionTranspose(PC pc, Vec rhs, Vec sol)
198: {
199: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_TRUE);
200: return 0;
201: }
203: static PetscErrorCode PCBDDCReuseSolvers_Interior(PC pc, Vec rhs, Vec sol)
204: {
205: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_FALSE,PETSC_FALSE);
206: return 0;
207: }
209: static PetscErrorCode PCBDDCReuseSolvers_InteriorTranspose(PC pc, Vec rhs, Vec sol)
210: {
211: PCBDDCReuseSolvers_Solve_Private(pc,rhs,sol,PETSC_TRUE,PETSC_FALSE);
212: return 0;
213: }
215: static PetscErrorCode PCBDDCReuseSolvers_View(PC pc, PetscViewer viewer)
216: {
217: PCBDDCReuseSolvers ctx;
218: PetscBool iascii;
220: PCShellGetContext(pc,&ctx);
221: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
222: if (iascii) {
223: PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_INFO);
224: }
225: MatView(ctx->F,viewer);
226: if (iascii) {
227: PetscViewerPopFormat(viewer);
228: }
229: return 0;
230: }
232: static PetscErrorCode PCBDDCReuseSolversReset(PCBDDCReuseSolvers reuse)
233: {
234: PetscInt i;
236: MatDestroy(&reuse->F);
237: VecDestroy(&reuse->sol);
238: VecDestroy(&reuse->rhs);
239: PCDestroy(&reuse->interior_solver);
240: PCDestroy(&reuse->correction_solver);
241: ISDestroy(&reuse->is_R);
242: ISDestroy(&reuse->is_B);
243: VecScatterDestroy(&reuse->correction_scatter_B);
244: VecDestroy(&reuse->sol_B);
245: VecDestroy(&reuse->rhs_B);
246: for (i=0;i<reuse->benign_n;i++) {
247: ISDestroy(&reuse->benign_zerodiag_subs[i]);
248: }
249: PetscFree(reuse->benign_zerodiag_subs);
250: PetscFree(reuse->benign_save_vals);
251: MatDestroy(&reuse->benign_csAIB);
252: MatDestroy(&reuse->benign_AIIm1ones);
253: VecDestroy(&reuse->benign_corr_work);
254: VecDestroy(&reuse->benign_dummy_schur_vec);
255: return 0;
256: }
258: static PetscErrorCode PCBDDCComputeExplicitSchur(Mat M, PetscBool issym, MatReuse reuse, Mat *S)
259: {
260: Mat B, C, D, Bd, Cd, AinvBd;
261: KSP ksp;
262: PC pc;
263: PetscBool isLU, isILU, isCHOL, Bdense, Cdense;
264: PetscReal fill = 2.0;
265: PetscInt n_I;
266: PetscMPIInt size;
268: MPI_Comm_size(PetscObjectComm((PetscObject)M),&size);
270: if (reuse == MAT_REUSE_MATRIX) {
271: PetscBool Sdense;
273: PetscObjectTypeCompare((PetscObject)*S, MATSEQDENSE, &Sdense);
275: }
276: MatSchurComplementGetSubMatrices(M, NULL, NULL, &B, &C, &D);
277: MatSchurComplementGetKSP(M, &ksp);
278: KSPGetPC(ksp, &pc);
279: PetscObjectTypeCompare((PetscObject) pc, PCLU, &isLU);
280: PetscObjectTypeCompare((PetscObject) pc, PCILU, &isILU);
281: PetscObjectTypeCompare((PetscObject) pc, PCCHOLESKY, &isCHOL);
282: PetscObjectTypeCompare((PetscObject) B, MATSEQDENSE, &Bdense);
283: PetscObjectTypeCompare((PetscObject) C, MATSEQDENSE, &Cdense);
284: MatGetSize(B,&n_I,NULL);
285: if (n_I) {
286: if (!Bdense) {
287: MatConvert(B, MATSEQDENSE, MAT_INITIAL_MATRIX, &Bd);
288: } else {
289: Bd = B;
290: }
292: if (isLU || isILU || isCHOL) {
293: Mat fact;
294: KSPSetUp(ksp);
295: PCFactorGetMatrix(pc, &fact);
296: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
297: MatMatSolve(fact, Bd, AinvBd);
298: } else {
299: PetscBool ex = PETSC_TRUE;
301: if (ex) {
302: Mat Ainvd;
304: PCComputeOperator(pc, MATDENSE, &Ainvd);
305: MatMatMult(Ainvd, Bd, MAT_INITIAL_MATRIX, fill, &AinvBd);
306: MatDestroy(&Ainvd);
307: } else {
308: Vec sol,rhs;
309: PetscScalar *arrayrhs,*arraysol;
310: PetscInt i,nrhs,n;
312: MatDuplicate(Bd, MAT_DO_NOT_COPY_VALUES, &AinvBd);
313: MatGetSize(Bd,&n,&nrhs);
314: MatDenseGetArray(Bd,&arrayrhs);
315: MatDenseGetArray(AinvBd,&arraysol);
316: KSPGetSolution(ksp,&sol);
317: KSPGetRhs(ksp,&rhs);
318: for (i=0;i<nrhs;i++) {
319: VecPlaceArray(rhs,arrayrhs+i*n);
320: VecPlaceArray(sol,arraysol+i*n);
321: KSPSolve(ksp,rhs,sol);
322: VecResetArray(rhs);
323: VecResetArray(sol);
324: }
325: MatDenseRestoreArray(Bd,&arrayrhs);
326: MatDenseRestoreArray(AinvBd,&arrayrhs);
327: }
328: }
329: if (!Bdense & !issym) {
330: MatDestroy(&Bd);
331: }
333: if (!issym) {
334: if (!Cdense) {
335: MatConvert(C, MATSEQDENSE, MAT_INITIAL_MATRIX, &Cd);
336: } else {
337: Cd = C;
338: }
339: MatMatMult(Cd, AinvBd, reuse, fill, S);
340: if (!Cdense) {
341: MatDestroy(&Cd);
342: }
343: } else {
344: MatTransposeMatMult(Bd, AinvBd, reuse, fill, S);
345: if (!Bdense) {
346: MatDestroy(&Bd);
347: }
348: }
349: MatDestroy(&AinvBd);
350: }
352: if (D) {
353: Mat Dd;
354: PetscBool Ddense;
356: PetscObjectTypeCompare((PetscObject)D,MATSEQDENSE,&Ddense);
357: if (!Ddense) {
358: MatConvert(D, MATSEQDENSE, MAT_INITIAL_MATRIX, &Dd);
359: } else {
360: Dd = D;
361: }
362: if (n_I) {
363: MatAYPX(*S,-1.0,Dd,SAME_NONZERO_PATTERN);
364: } else {
365: if (reuse == MAT_INITIAL_MATRIX) {
366: MatDuplicate(Dd,MAT_COPY_VALUES,S);
367: } else {
368: MatCopy(Dd,*S,SAME_NONZERO_PATTERN);
369: }
370: }
371: if (!Ddense) {
372: MatDestroy(&Dd);
373: }
374: } else {
375: MatScale(*S,-1.0);
376: }
377: return 0;
378: }
380: PetscErrorCode PCBDDCSubSchursSetUp(PCBDDCSubSchurs sub_schurs, Mat Ain, Mat Sin, PetscBool exact_schur, PetscInt xadj[], PetscInt adjncy[], PetscInt nlayers, Vec scaling, PetscBool compute_Stilda, PetscBool reuse_solvers, PetscBool benign_trick, PetscInt benign_n, PetscInt benign_p0_lidx[], IS benign_zerodiag_subs[], Mat change, IS change_primal)
381: {
382: Mat F,A_II,A_IB,A_BI,A_BB,AE_II;
383: Mat S_all;
384: Vec gstash,lstash;
385: VecScatter sstash;
386: IS is_I,is_I_layer;
387: IS all_subsets,all_subsets_mult,all_subsets_n;
388: PetscScalar *stasharray,*Bwork;
389: PetscInt *nnz,*all_local_idx_N;
390: PetscInt *auxnum1,*auxnum2;
391: PetscInt i,subset_size,max_subset_size;
392: PetscInt n_B,extra,local_size,global_size;
393: PetscInt local_stash_size;
394: PetscBLASInt B_N,B_ierr,B_lwork,*pivots;
395: MPI_Comm comm_n;
396: PetscBool deluxe = PETSC_TRUE;
397: PetscBool use_potr = PETSC_FALSE, use_sytr = PETSC_FALSE;
398: PetscViewer matl_dbg_viewer = NULL;
399: PetscErrorCode ierr;
400: PetscBool flg;
402: MatDestroy(&sub_schurs->A);
403: MatDestroy(&sub_schurs->S);
404: if (Ain) {
405: PetscObjectReference((PetscObject)Ain);
406: sub_schurs->A = Ain;
407: }
409: PetscObjectReference((PetscObject)Sin);
410: sub_schurs->S = Sin;
411: if (sub_schurs->schur_explicit) {
412: sub_schurs->schur_explicit = (PetscBool)(!!sub_schurs->A);
413: }
415: /* preliminary checks */
418: if (benign_trick) sub_schurs->is_posdef = PETSC_FALSE;
420: /* debug (MATLAB) */
421: if (sub_schurs->debug) {
422: PetscMPIInt size,rank;
423: PetscInt nr,*print_schurs_ranks,print_schurs = PETSC_FALSE;
425: MPI_Comm_size(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&size);
426: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
427: nr = size;
428: PetscMalloc1(nr,&print_schurs_ranks);
429: PetscOptionsBegin(PetscObjectComm((PetscObject)sub_schurs->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
430: PetscOptionsIntArray("-sub_schurs_debug_ranks","Ranks to debug (all if the option is not used)",NULL,print_schurs_ranks,&nr,&flg);
431: if (!flg) print_schurs = PETSC_TRUE;
432: else {
433: print_schurs = PETSC_FALSE;
434: for (i=0;i<nr;i++) if (print_schurs_ranks[i] == (PetscInt)rank) { print_schurs = PETSC_TRUE; break; }
435: }
436: PetscOptionsEnd();
437: PetscFree(print_schurs_ranks);
438: if (print_schurs) {
439: char filename[256];
441: PetscSNPrintf(filename,sizeof(filename),"sub_schurs_Schur_r%d.m",PetscGlobalRank);
442: PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&matl_dbg_viewer);
443: PetscViewerPushFormat(matl_dbg_viewer,PETSC_VIEWER_ASCII_MATLAB);
444: }
445: }
447: /* restrict work on active processes */
448: if (sub_schurs->restrict_comm) {
449: PetscSubcomm subcomm;
450: PetscMPIInt color,rank;
452: color = 0;
453: if (!sub_schurs->n_subs) color = 1; /* this can happen if we are in a multilevel case or if the subdomain is disconnected */
454: MPI_Comm_rank(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&rank);
455: PetscSubcommCreate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&subcomm);
456: PetscSubcommSetNumber(subcomm,2);
457: PetscSubcommSetTypeGeneral(subcomm,color,rank);
458: PetscCommDuplicate(PetscSubcommChild(subcomm),&comm_n,NULL);
459: PetscSubcommDestroy(&subcomm);
460: if (!sub_schurs->n_subs) {
461: PetscCommDestroy(&comm_n);
462: return 0;
463: }
464: } else {
465: PetscCommDuplicate(PetscObjectComm((PetscObject)sub_schurs->l2gmap),&comm_n,NULL);
466: }
468: /* get Schur complement matrices */
469: if (!sub_schurs->schur_explicit) {
470: Mat tA_IB,tA_BI,tA_BB;
471: PetscBool isseqsbaij;
472: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,&tA_IB,&tA_BI,&tA_BB);
473: PetscObjectTypeCompare((PetscObject)tA_BB,MATSEQSBAIJ,&isseqsbaij);
474: if (isseqsbaij) {
475: MatConvert(tA_BB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BB);
476: MatConvert(tA_IB,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_IB);
477: MatConvert(tA_BI,MATSEQAIJ,MAT_INITIAL_MATRIX,&A_BI);
478: } else {
479: PetscObjectReference((PetscObject)tA_BB);
480: A_BB = tA_BB;
481: PetscObjectReference((PetscObject)tA_IB);
482: A_IB = tA_IB;
483: PetscObjectReference((PetscObject)tA_BI);
484: A_BI = tA_BI;
485: }
486: } else {
487: A_II = NULL;
488: A_IB = NULL;
489: A_BI = NULL;
490: A_BB = NULL;
491: }
492: S_all = NULL;
494: /* determine interior problems */
495: ISGetLocalSize(sub_schurs->is_I,&i);
496: if (nlayers >= 0 && i) { /* Interior problems can be different from the original one */
497: PetscBT touched;
498: const PetscInt* idx_B;
499: PetscInt n_I,n_B,n_local_dofs,n_prev_added,j,layer,*local_numbering;
502: /* get sizes */
503: ISGetLocalSize(sub_schurs->is_I,&n_I);
504: ISGetLocalSize(sub_schurs->is_B,&n_B);
506: PetscMalloc1(n_I+n_B,&local_numbering);
507: PetscBTCreate(n_I+n_B,&touched);
508: PetscBTMemzero(n_I+n_B,touched);
510: /* all boundary dofs must be skipped when adding layers */
511: ISGetIndices(sub_schurs->is_B,&idx_B);
512: for (j=0;j<n_B;j++) {
513: PetscBTSet(touched,idx_B[j]);
514: }
515: PetscArraycpy(local_numbering,idx_B,n_B);
516: ISRestoreIndices(sub_schurs->is_B,&idx_B);
518: /* add prescribed number of layers of dofs */
519: n_local_dofs = n_B;
520: n_prev_added = n_B;
521: for (layer=0;layer<nlayers;layer++) {
522: PetscInt n_added = 0;
523: if (n_local_dofs == n_I+n_B) break;
525: PCBDDCAdjGetNextLayer_Private(local_numbering+n_local_dofs,n_prev_added,touched,xadj,adjncy,&n_added);
526: n_prev_added = n_added;
527: n_local_dofs += n_added;
528: if (!n_added) break;
529: }
530: PetscBTDestroy(&touched);
532: /* IS for I layer dofs in original numbering */
533: ISCreateGeneral(PetscObjectComm((PetscObject)sub_schurs->is_I),n_local_dofs-n_B,local_numbering+n_B,PETSC_COPY_VALUES,&is_I_layer);
534: PetscFree(local_numbering);
535: ISSort(is_I_layer);
536: /* IS for I layer dofs in I numbering */
537: if (!sub_schurs->schur_explicit) {
538: ISLocalToGlobalMapping ItoNmap;
539: ISLocalToGlobalMappingCreateIS(sub_schurs->is_I,&ItoNmap);
540: ISGlobalToLocalMappingApplyIS(ItoNmap,IS_GTOLM_DROP,is_I_layer,&is_I);
541: ISLocalToGlobalMappingDestroy(&ItoNmap);
543: /* II block */
544: MatCreateSubMatrix(A_II,is_I,is_I,MAT_INITIAL_MATRIX,&AE_II);
545: }
546: } else {
547: PetscInt n_I;
549: /* IS for I dofs in original numbering */
550: PetscObjectReference((PetscObject)sub_schurs->is_I);
551: is_I_layer = sub_schurs->is_I;
553: /* IS for I dofs in I numbering (strided 1) */
554: if (!sub_schurs->schur_explicit) {
555: ISGetSize(sub_schurs->is_I,&n_I);
556: ISCreateStride(PetscObjectComm((PetscObject)sub_schurs->is_I),n_I,0,1,&is_I);
558: /* II block is the same */
559: PetscObjectReference((PetscObject)A_II);
560: AE_II = A_II;
561: }
562: }
564: /* Get info on subset sizes and sum of all subsets sizes */
565: max_subset_size = 0;
566: local_size = 0;
567: for (i=0;i<sub_schurs->n_subs;i++) {
568: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
569: max_subset_size = PetscMax(subset_size,max_subset_size);
570: local_size += subset_size;
571: }
573: /* Work arrays for local indices */
574: extra = 0;
575: ISGetLocalSize(sub_schurs->is_B,&n_B);
576: if (sub_schurs->schur_explicit && is_I_layer) {
577: ISGetLocalSize(is_I_layer,&extra);
578: }
579: PetscMalloc1(n_B+extra,&all_local_idx_N);
580: if (extra) {
581: const PetscInt *idxs;
582: ISGetIndices(is_I_layer,&idxs);
583: PetscArraycpy(all_local_idx_N,idxs,extra);
584: ISRestoreIndices(is_I_layer,&idxs);
585: }
586: PetscMalloc1(sub_schurs->n_subs,&auxnum1);
587: PetscMalloc1(sub_schurs->n_subs,&auxnum2);
589: /* Get local indices in local numbering */
590: local_size = 0;
591: local_stash_size = 0;
592: for (i=0;i<sub_schurs->n_subs;i++) {
593: const PetscInt *idxs;
595: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
596: ISGetIndices(sub_schurs->is_subs[i],&idxs);
597: /* start (smallest in global ordering) and multiplicity */
598: auxnum1[i] = idxs[0];
599: auxnum2[i] = subset_size*subset_size;
600: /* subset indices in local numbering */
601: PetscArraycpy(all_local_idx_N+local_size+extra,idxs,subset_size);
602: ISRestoreIndices(sub_schurs->is_subs[i],&idxs);
603: local_size += subset_size;
604: local_stash_size += subset_size*subset_size;
605: }
607: /* allocate extra workspace needed only for GETRI or SYTRF */
608: use_potr = use_sytr = PETSC_FALSE;
609: if (benign_trick || (sub_schurs->is_hermitian && sub_schurs->is_posdef)) {
610: use_potr = PETSC_TRUE;
611: } else if (sub_schurs->is_symmetric) {
612: use_sytr = PETSC_TRUE;
613: }
614: if (local_size && !use_potr) {
615: PetscScalar lwork,dummyscalar = 0.;
616: PetscBLASInt dummyint = 0;
618: B_lwork = -1;
619: PetscBLASIntCast(local_size,&B_N);
620: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
621: if (use_sytr) {
622: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
624: } else {
625: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,&dummyscalar,&B_N,&dummyint,&lwork,&B_lwork,&B_ierr));
627: }
628: PetscFPTrapPop();
629: PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);
630: PetscMalloc2(B_lwork,&Bwork,B_N,&pivots);
631: } else {
632: Bwork = NULL;
633: pivots = NULL;
634: }
636: /* prepare data for summing up properly schurs on subsets */
637: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum1,PETSC_OWN_POINTER,&all_subsets_n);
638: ISLocalToGlobalMappingApplyIS(sub_schurs->l2gmap,all_subsets_n,&all_subsets);
639: ISDestroy(&all_subsets_n);
640: ISCreateGeneral(comm_n,sub_schurs->n_subs,auxnum2,PETSC_OWN_POINTER,&all_subsets_mult);
641: ISRenumber(all_subsets,all_subsets_mult,&global_size,&all_subsets_n);
642: ISDestroy(&all_subsets);
643: ISDestroy(&all_subsets_mult);
644: ISGetLocalSize(all_subsets_n,&i);
646: VecCreateSeqWithArray(PETSC_COMM_SELF,1,local_stash_size,NULL,&lstash);
647: VecCreateMPI(comm_n,PETSC_DECIDE,global_size,&gstash);
648: VecScatterCreate(lstash,NULL,gstash,all_subsets_n,&sstash);
649: ISDestroy(&all_subsets_n);
651: /* subset indices in local boundary numbering */
652: if (!sub_schurs->is_Ej_all) {
653: PetscInt *all_local_idx_B;
655: PetscMalloc1(local_size,&all_local_idx_B);
656: ISGlobalToLocalMappingApply(sub_schurs->BtoNmap,IS_GTOLM_DROP,local_size,all_local_idx_N+extra,&subset_size,all_local_idx_B);
658: ISCreateGeneral(PETSC_COMM_SELF,local_size,all_local_idx_B,PETSC_OWN_POINTER,&sub_schurs->is_Ej_all);
659: }
661: if (change) {
662: ISLocalToGlobalMapping BtoS;
663: IS change_primal_B;
664: IS change_primal_all;
668: PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change_primal_sub);
669: for (i=0;i<sub_schurs->n_subs;i++) {
670: ISLocalToGlobalMapping NtoS;
671: ISLocalToGlobalMappingCreateIS(sub_schurs->is_subs[i],&NtoS);
672: ISGlobalToLocalMappingApplyIS(NtoS,IS_GTOLM_DROP,change_primal,&sub_schurs->change_primal_sub[i]);
673: ISLocalToGlobalMappingDestroy(&NtoS);
674: }
675: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,change_primal,&change_primal_B);
676: ISLocalToGlobalMappingCreateIS(sub_schurs->is_Ej_all,&BtoS);
677: ISGlobalToLocalMappingApplyIS(BtoS,IS_GTOLM_DROP,change_primal_B,&change_primal_all);
678: ISLocalToGlobalMappingDestroy(&BtoS);
679: ISDestroy(&change_primal_B);
680: PetscMalloc1(sub_schurs->n_subs,&sub_schurs->change);
681: for (i=0;i<sub_schurs->n_subs;i++) {
682: Mat change_sub;
684: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
685: KSPCreate(PETSC_COMM_SELF,&sub_schurs->change[i]);
686: KSPSetType(sub_schurs->change[i],KSPPREONLY);
687: if (!sub_schurs->change_with_qr) {
688: MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_sub);
689: } else {
690: Mat change_subt;
691: MatCreateSubMatrix(change,sub_schurs->is_subs[i],sub_schurs->is_subs[i],MAT_INITIAL_MATRIX,&change_subt);
692: MatConvert(change_subt,MATSEQDENSE,MAT_INITIAL_MATRIX,&change_sub);
693: MatDestroy(&change_subt);
694: }
695: KSPSetOperators(sub_schurs->change[i],change_sub,change_sub);
696: MatDestroy(&change_sub);
697: KSPSetOptionsPrefix(sub_schurs->change[i],sub_schurs->prefix);
698: KSPAppendOptionsPrefix(sub_schurs->change[i],"sub_schurs_change_");
699: }
700: ISDestroy(&change_primal_all);
701: }
703: /* Local matrix of all local Schur on subsets (transposed) */
704: if (!sub_schurs->S_Ej_all) {
705: Mat T;
706: PetscScalar *v;
707: PetscInt *ii,*jj;
708: PetscInt cum,i,j,k;
710: /* MatSeqAIJSetPreallocation + MatSetValues is slow for these kind of matrices (may have large blocks)
711: Allocate properly a representative matrix and duplicate */
712: PetscMalloc3(local_size+1,&ii,local_stash_size,&jj,local_stash_size,&v);
713: ii[0] = 0;
714: cum = 0;
715: for (i=0;i<sub_schurs->n_subs;i++) {
716: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
717: for (j=0;j<subset_size;j++) {
718: const PetscInt row = cum+j;
719: PetscInt col = cum;
721: ii[row+1] = ii[row] + subset_size;
722: for (k=ii[row];k<ii[row+1];k++) {
723: jj[k] = col;
724: col++;
725: }
726: }
727: cum += subset_size;
728: }
729: MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,local_size,local_size,ii,jj,v,&T);
730: MatDuplicate(T,MAT_DO_NOT_COPY_VALUES,&sub_schurs->S_Ej_all);
731: MatDestroy(&T);
732: PetscFree3(ii,jj,v);
733: }
734: /* matrices for deluxe scaling and adaptive selection */
735: if (compute_Stilda) {
736: if (!sub_schurs->sum_S_Ej_tilda_all) {
737: MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_tilda_all);
738: }
739: if (!sub_schurs->sum_S_Ej_inv_all && deluxe) {
740: MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_inv_all);
741: }
742: }
744: /* Compute Schur complements explicitly */
745: F = NULL;
746: if (!sub_schurs->schur_explicit) {
747: /* this code branch is used when MatFactor with Schur complement support is not present or when explicitly requested;
748: it is not efficient, unless the economic version of the scaling is used */
749: Mat S_Ej_expl;
750: PetscScalar *work;
751: PetscInt j,*dummy_idx;
752: PetscBool Sdense;
754: PetscMalloc2(max_subset_size,&dummy_idx,max_subset_size*max_subset_size,&work);
755: local_size = 0;
756: for (i=0;i<sub_schurs->n_subs;i++) {
757: IS is_subset_B;
758: Mat AE_EE,AE_IE,AE_EI,S_Ej;
760: /* subsets in original and boundary numbering */
761: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,sub_schurs->is_subs[i],&is_subset_B);
762: /* EE block */
763: MatCreateSubMatrix(A_BB,is_subset_B,is_subset_B,MAT_INITIAL_MATRIX,&AE_EE);
764: /* IE block */
765: MatCreateSubMatrix(A_IB,is_I,is_subset_B,MAT_INITIAL_MATRIX,&AE_IE);
766: /* EI block */
767: if (sub_schurs->is_symmetric) {
768: MatCreateTranspose(AE_IE,&AE_EI);
769: } else if (sub_schurs->is_hermitian) {
770: MatCreateHermitianTranspose(AE_IE,&AE_EI);
771: } else {
772: MatCreateSubMatrix(A_BI,is_subset_B,is_I,MAT_INITIAL_MATRIX,&AE_EI);
773: }
774: ISDestroy(&is_subset_B);
775: MatCreateSchurComplement(AE_II,AE_II,AE_IE,AE_EI,AE_EE,&S_Ej);
776: MatDestroy(&AE_EE);
777: MatDestroy(&AE_IE);
778: MatDestroy(&AE_EI);
779: if (AE_II == A_II) { /* we can reuse the same ksp */
780: KSP ksp;
781: MatSchurComplementGetKSP(sub_schurs->S,&ksp);
782: MatSchurComplementSetKSP(S_Ej,ksp);
783: } else { /* build new ksp object which inherits ksp and pc types from the original one */
784: KSP origksp,schurksp;
785: PC origpc,schurpc;
786: KSPType ksp_type;
787: PetscInt n_internal;
788: PetscBool ispcnone;
790: MatSchurComplementGetKSP(sub_schurs->S,&origksp);
791: MatSchurComplementGetKSP(S_Ej,&schurksp);
792: KSPGetType(origksp,&ksp_type);
793: KSPSetType(schurksp,ksp_type);
794: KSPGetPC(schurksp,&schurpc);
795: KSPGetPC(origksp,&origpc);
796: PetscObjectTypeCompare((PetscObject)origpc,PCNONE,&ispcnone);
797: if (!ispcnone) {
798: PCType pc_type;
799: PCGetType(origpc,&pc_type);
800: PCSetType(schurpc,pc_type);
801: } else {
802: PCSetType(schurpc,PCLU);
803: }
804: ISGetSize(is_I,&n_internal);
805: if (!n_internal) { /* UMFPACK gives error with 0 sized problems */
806: MatSolverType solver = NULL;
807: PCFactorGetMatSolverType(origpc,(MatSolverType*)&solver);
808: if (solver) {
809: PCFactorSetMatSolverType(schurpc,solver);
810: }
811: }
812: KSPSetUp(schurksp);
813: }
814: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
815: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&S_Ej_expl);
816: PCBDDCComputeExplicitSchur(S_Ej,sub_schurs->is_symmetric,MAT_REUSE_MATRIX,&S_Ej_expl);
817: PetscObjectTypeCompare((PetscObject)S_Ej_expl,MATSEQDENSE,&Sdense);
818: if (Sdense) {
819: for (j=0;j<subset_size;j++) {
820: dummy_idx[j]=local_size+j;
821: }
822: MatSetValues(sub_schurs->S_Ej_all,subset_size,dummy_idx,subset_size,dummy_idx,work,INSERT_VALUES);
823: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented for sparse matrices");
824: MatDestroy(&S_Ej);
825: MatDestroy(&S_Ej_expl);
826: local_size += subset_size;
827: }
828: PetscFree2(dummy_idx,work);
829: /* free */
830: ISDestroy(&is_I);
831: MatDestroy(&AE_II);
832: PetscFree(all_local_idx_N);
833: } else {
834: Mat A,cs_AIB_mat = NULL,benign_AIIm1_ones_mat = NULL;
835: Vec Dall = NULL;
836: IS is_A_all,*is_p_r = NULL;
837: MatType Stype;
838: PetscScalar *work,*S_data,*schur_factor,infty = PETSC_MAX_REAL;
839: PetscScalar *SEj_arr = NULL,*SEjinv_arr = NULL;
840: const PetscScalar *rS_data;
841: PetscInt n,n_I,size_schur,size_active_schur,cum,cum2;
842: PetscBool economic,solver_S,S_lower_triangular = PETSC_FALSE;
843: PetscBool schur_has_vertices,factor_workaround;
844: PetscBool use_cholesky;
845: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
846: PetscBool oldpin;
847: #endif
849: /* get sizes */
850: n_I = 0;
851: if (is_I_layer) {
852: ISGetLocalSize(is_I_layer,&n_I);
853: }
854: economic = PETSC_FALSE;
855: ISGetLocalSize(sub_schurs->is_I,&cum);
856: if (cum != n_I) economic = PETSC_TRUE;
857: MatGetLocalSize(sub_schurs->A,&n,NULL);
858: size_active_schur = local_size;
860: /* import scaling vector (wrong formulation if we have 3D edges) */
861: if (scaling && compute_Stilda) {
862: const PetscScalar *array;
863: PetscScalar *array2;
864: const PetscInt *idxs;
865: PetscInt i;
867: ISGetIndices(sub_schurs->is_Ej_all,&idxs);
868: VecCreateSeq(PETSC_COMM_SELF,size_active_schur,&Dall);
869: VecGetArrayRead(scaling,&array);
870: VecGetArray(Dall,&array2);
871: for (i=0;i<size_active_schur;i++) array2[i] = array[idxs[i]];
872: VecRestoreArray(Dall,&array2);
873: VecRestoreArrayRead(scaling,&array);
874: ISRestoreIndices(sub_schurs->is_Ej_all,&idxs);
875: deluxe = PETSC_FALSE;
876: }
878: /* size active schurs does not count any dirichlet or vertex dof on the interface */
879: factor_workaround = PETSC_FALSE;
880: schur_has_vertices = PETSC_FALSE;
881: cum = n_I+size_active_schur;
882: if (sub_schurs->is_dir) {
883: const PetscInt* idxs;
884: PetscInt n_dir;
886: ISGetLocalSize(sub_schurs->is_dir,&n_dir);
887: ISGetIndices(sub_schurs->is_dir,&idxs);
888: PetscArraycpy(all_local_idx_N+cum,idxs,n_dir);
889: ISRestoreIndices(sub_schurs->is_dir,&idxs);
890: cum += n_dir;
891: factor_workaround = PETSC_TRUE;
892: }
893: /* include the primal vertices in the Schur complement */
894: if (exact_schur && sub_schurs->is_vertices && (compute_Stilda || benign_n)) {
895: PetscInt n_v;
897: ISGetLocalSize(sub_schurs->is_vertices,&n_v);
898: if (n_v) {
899: const PetscInt* idxs;
901: ISGetIndices(sub_schurs->is_vertices,&idxs);
902: PetscArraycpy(all_local_idx_N+cum,idxs,n_v);
903: ISRestoreIndices(sub_schurs->is_vertices,&idxs);
904: cum += n_v;
905: factor_workaround = PETSC_TRUE;
906: schur_has_vertices = PETSC_TRUE;
907: }
908: }
909: size_schur = cum - n_I;
910: ISCreateGeneral(PETSC_COMM_SELF,cum,all_local_idx_N,PETSC_OWN_POINTER,&is_A_all);
911: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
912: oldpin = sub_schurs->A->boundtocpu;
913: MatBindToCPU(sub_schurs->A,PETSC_TRUE);
914: #endif
915: if (cum == n) {
916: ISSetPermutation(is_A_all);
917: MatPermute(sub_schurs->A,is_A_all,is_A_all,&A);
918: } else {
919: MatCreateSubMatrix(sub_schurs->A,is_A_all,is_A_all,MAT_INITIAL_MATRIX,&A);
920: }
921: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
922: MatBindToCPU(sub_schurs->A,oldpin);
923: #endif
924: MatSetOptionsPrefix(A,sub_schurs->prefix);
925: MatAppendOptionsPrefix(A,"sub_schurs_");
927: /* if we actually change the basis for the pressures, LDL^T factors will use a lot of memory
928: this is a workaround */
929: if (benign_n) {
930: Vec v,benign_AIIm1_ones;
931: ISLocalToGlobalMapping N_to_reor;
932: IS is_p0,is_p0_p;
933: PetscScalar *cs_AIB,*AIIm1_data;
934: PetscInt sizeA;
936: ISLocalToGlobalMappingCreateIS(is_A_all,&N_to_reor);
937: ISCreateGeneral(PETSC_COMM_SELF,benign_n,benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);
938: ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,is_p0,&is_p0_p);
939: ISDestroy(&is_p0);
940: MatCreateVecs(A,&v,&benign_AIIm1_ones);
941: VecGetSize(v,&sizeA);
942: MatCreateSeqDense(PETSC_COMM_SELF,sizeA,benign_n,NULL,&benign_AIIm1_ones_mat);
943: MatCreateSeqDense(PETSC_COMM_SELF,size_schur,benign_n,NULL,&cs_AIB_mat);
944: MatDenseGetArray(cs_AIB_mat,&cs_AIB);
945: MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data);
946: PetscMalloc1(benign_n,&is_p_r);
947: /* compute colsum of A_IB restricted to pressures */
948: for (i=0;i<benign_n;i++) {
949: const PetscScalar *array;
950: const PetscInt *idxs;
951: PetscInt j,nz;
953: ISGlobalToLocalMappingApplyIS(N_to_reor,IS_GTOLM_DROP,benign_zerodiag_subs[i],&is_p_r[i]);
954: ISGetLocalSize(is_p_r[i],&nz);
955: ISGetIndices(is_p_r[i],&idxs);
956: for (j=0;j<nz;j++) AIIm1_data[idxs[j]+sizeA*i] = 1.;
957: ISRestoreIndices(is_p_r[i],&idxs);
958: VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i);
959: MatMult(A,benign_AIIm1_ones,v);
960: VecResetArray(benign_AIIm1_ones);
961: VecGetArrayRead(v,&array);
962: for (j=0;j<size_schur;j++) {
963: #if defined(PETSC_USE_COMPLEX)
964: cs_AIB[i*size_schur + j] = (PetscRealPart(array[j+n_I])/nz + PETSC_i*(PetscImaginaryPart(array[j+n_I])/nz));
965: #else
966: cs_AIB[i*size_schur + j] = array[j+n_I]/nz;
967: #endif
968: }
969: VecRestoreArrayRead(v,&array);
970: }
971: MatDenseRestoreArray(cs_AIB_mat,&cs_AIB);
972: MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data);
973: VecDestroy(&v);
974: VecDestroy(&benign_AIIm1_ones);
975: MatSetOption(A,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);
976: MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);
977: MatSetOption(A,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);
978: MatZeroRowsColumnsIS(A,is_p0_p,1.0,NULL,NULL);
979: ISDestroy(&is_p0_p);
980: ISLocalToGlobalMappingDestroy(&N_to_reor);
981: }
982: MatSetOption(A,MAT_SYMMETRIC,sub_schurs->is_symmetric);
983: MatSetOption(A,MAT_HERMITIAN,sub_schurs->is_hermitian);
984: MatSetOption(A,MAT_SPD,sub_schurs->is_posdef);
986: /* for complexes, symmetric and hermitian at the same time implies null imaginary part */
987: use_cholesky = (PetscBool)((use_potr || use_sytr) && sub_schurs->is_hermitian && sub_schurs->is_symmetric);
989: /* when using the benign subspace trick, the local Schur complements are SPD */
990: /* MKL_PARDISO does not handle well the computation of a Schur complement from a symmetric indefinite factorization
991: Use LU and adapt pivoting perturbation (still, solution is not as accurate as with using MUMPS) */
992: if (benign_trick) {
993: sub_schurs->is_posdef = PETSC_TRUE;
994: PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&flg);
995: if (flg) use_cholesky = PETSC_FALSE;
996: }
998: if (n_I) {
999: IS is_schur;
1000: char stype[64];
1001: PetscBool gpu = PETSC_FALSE;
1003: if (use_cholesky) {
1004: MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_CHOLESKY,&F);
1005: } else {
1006: MatGetFactor(A,sub_schurs->mat_solver_type,MAT_FACTOR_LU,&F);
1007: }
1008: MatSetErrorIfFailure(A,PETSC_TRUE);
1009: #if defined(PETSC_HAVE_MKL_PARDISO)
1010: if (benign_trick) MatMkl_PardisoSetCntl(F,10,10);
1011: #endif
1012: /* subsets ordered last */
1013: ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is_schur);
1014: MatFactorSetSchurIS(F,is_schur);
1015: ISDestroy(&is_schur);
1017: /* factorization step */
1018: if (use_cholesky) {
1019: MatCholeskyFactorSymbolic(F,A,NULL,NULL);
1020: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1021: MatMumpsSetIcntl(F,19,2);
1022: #endif
1023: MatCholeskyFactorNumeric(F,A,NULL);
1024: S_lower_triangular = PETSC_TRUE;
1025: } else {
1026: MatLUFactorSymbolic(F,A,NULL,NULL,NULL);
1027: #if defined(PETSC_HAVE_MUMPS) /* be sure that icntl 19 is not set by command line */
1028: MatMumpsSetIcntl(F,19,3);
1029: #endif
1030: MatLUFactorNumeric(F,A,NULL);
1031: }
1032: MatViewFromOptions(F,(PetscObject)A,"-mat_factor_view");
1034: if (matl_dbg_viewer) {
1035: Mat S;
1036: IS is;
1038: PetscObjectSetName((PetscObject)A,"A");
1039: MatView(A,matl_dbg_viewer);
1040: MatFactorCreateSchurComplement(F,&S,NULL);
1041: PetscObjectSetName((PetscObject)S,"S");
1042: MatView(S,matl_dbg_viewer);
1043: MatDestroy(&S);
1044: ISCreateStride(PETSC_COMM_SELF,n_I,0,1,&is);
1045: PetscObjectSetName((PetscObject)is,"I");
1046: ISView(is,matl_dbg_viewer);
1047: ISDestroy(&is);
1048: ISCreateStride(PETSC_COMM_SELF,size_schur,n_I,1,&is);
1049: PetscObjectSetName((PetscObject)is,"B");
1050: ISView(is,matl_dbg_viewer);
1051: ISDestroy(&is);
1052: PetscObjectSetName((PetscObject)is_A_all,"IA");
1053: ISView(is_A_all,matl_dbg_viewer);
1054: }
1056: /* get explicit Schur Complement computed during numeric factorization */
1057: MatFactorGetSchurComplement(F,&S_all,NULL);
1058: PetscStrncpy(stype,MATSEQDENSE,sizeof(stype));
1059: #if defined(PETSC_HAVE_CUDA)
1060: PetscObjectTypeCompareAny((PetscObject)A,&gpu,MATSEQAIJVIENNACL,MATSEQAIJCUSPARSE,"");
1061: #endif
1062: if (gpu) {
1063: PetscStrncpy(stype,MATSEQDENSECUDA,sizeof(stype));
1064: }
1065: PetscOptionsGetString(NULL,sub_schurs->prefix,"-sub_schurs_schur_mat_type",stype,sizeof(stype),NULL);
1066: MatConvert(S_all,stype,MAT_INPLACE_MATRIX,&S_all);
1067: MatSetOption(S_all,MAT_SPD,sub_schurs->is_posdef);
1068: MatSetOption(S_all,MAT_HERMITIAN,sub_schurs->is_hermitian);
1069: MatGetType(S_all,&Stype);
1071: /* we can reuse the solvers if we are not using the economic version */
1072: reuse_solvers = (PetscBool)(reuse_solvers && !economic);
1073: factor_workaround = (PetscBool)(reuse_solvers && factor_workaround);
1074: if (!sub_schurs->is_posdef && factor_workaround && compute_Stilda && size_active_schur)
1075: reuse_solvers = factor_workaround = PETSC_FALSE;
1077: solver_S = PETSC_TRUE;
1079: /* update the Schur complement with the change of basis on the pressures */
1080: if (benign_n) {
1081: const PetscScalar *cs_AIB;
1082: PetscScalar *S_data,*AIIm1_data;
1083: Mat S2 = NULL,S3 = NULL; /* dbg */
1084: PetscScalar *S2_data,*S3_data; /* dbg */
1085: Vec v,benign_AIIm1_ones;
1086: PetscInt sizeA;
1088: MatDenseGetArray(S_all,&S_data);
1089: MatCreateVecs(A,&v,&benign_AIIm1_ones);
1090: VecGetSize(v,&sizeA);
1091: #if defined(PETSC_HAVE_MUMPS)
1092: MatMumpsSetIcntl(F,26,0);
1093: #endif
1094: #if defined(PETSC_HAVE_MKL_PARDISO)
1095: MatMkl_PardisoSetCntl(F,70,1);
1096: #endif
1097: MatDenseGetArrayRead(cs_AIB_mat,&cs_AIB);
1098: MatDenseGetArray(benign_AIIm1_ones_mat,&AIIm1_data);
1099: if (matl_dbg_viewer) {
1100: MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S2);
1101: MatDuplicate(S_all,MAT_DO_NOT_COPY_VALUES,&S3);
1102: MatDenseGetArray(S2,&S2_data);
1103: MatDenseGetArray(S3,&S3_data);
1104: }
1105: for (i=0;i<benign_n;i++) {
1106: PetscScalar *array,sum = 0.,one = 1.,*sums;
1107: const PetscInt *idxs;
1108: PetscInt k,j,nz;
1109: PetscBLASInt B_k,B_n;
1111: PetscCalloc1(benign_n,&sums);
1112: VecPlaceArray(benign_AIIm1_ones,AIIm1_data+sizeA*i);
1113: VecCopy(benign_AIIm1_ones,v);
1114: MatSolve(F,v,benign_AIIm1_ones);
1115: MatMult(A,benign_AIIm1_ones,v);
1116: VecResetArray(benign_AIIm1_ones);
1117: /* p0 dofs (eliminated) are excluded from the sums */
1118: for (k=0;k<benign_n;k++) {
1119: ISGetLocalSize(is_p_r[k],&nz);
1120: ISGetIndices(is_p_r[k],&idxs);
1121: for (j=0;j<nz-1;j++) sums[k] -= AIIm1_data[idxs[j]+sizeA*i];
1122: ISRestoreIndices(is_p_r[k],&idxs);
1123: }
1124: VecGetArrayRead(v,(const PetscScalar**)&array);
1125: if (matl_dbg_viewer) {
1126: Vec vv;
1127: char name[16];
1129: VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array+n_I,&vv);
1130: PetscSNPrintf(name,sizeof(name),"Pvs%D",i);
1131: PetscObjectSetName((PetscObject)vv,name);
1132: VecView(vv,matl_dbg_viewer);
1133: }
1134: /* perform sparse rank updates on symmetric Schur (TODO: move outside of the loop?) */
1135: /* cs_AIB already scaled by 1./nz */
1136: B_k = 1;
1137: for (k=0;k<benign_n;k++) {
1138: sum = sums[k];
1139: PetscBLASIntCast(size_schur,&B_n);
1141: if (PetscAbsScalar(sum) == 0.0) continue;
1142: if (k == i) {
1143: PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1144: if (matl_dbg_viewer) {
1145: PetscStackCallBLAS("BLASsyrk",BLASsyrk_("L","N",&B_n,&B_k,&sum,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1146: }
1147: } else { /* XXX Is it correct to use symmetric rank-2 update with half of the sum? */
1148: sum /= 2.0;
1149: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1150: if (matl_dbg_viewer) {
1151: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,cs_AIB+k*size_schur,&B_n,cs_AIB+i*size_schur,&B_n,&one,S3_data,&B_n));
1152: }
1153: }
1154: }
1155: sum = 1.;
1156: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S_data,&B_n));
1157: if (matl_dbg_viewer) {
1158: PetscStackCallBLAS("BLASsyr2k",BLASsyr2k_("L","N",&B_n,&B_k,&sum,array+n_I,&B_n,cs_AIB+i*size_schur,&B_n,&one,S2_data,&B_n));
1159: }
1160: VecRestoreArrayRead(v,(const PetscScalar**)&array);
1161: /* set p0 entry of AIIm1_ones to zero */
1162: ISGetLocalSize(is_p_r[i],&nz);
1163: ISGetIndices(is_p_r[i],&idxs);
1164: for (j=0;j<benign_n;j++) AIIm1_data[idxs[nz-1]+sizeA*j] = 0.;
1165: ISRestoreIndices(is_p_r[i],&idxs);
1166: PetscFree(sums);
1167: }
1168: VecDestroy(&benign_AIIm1_ones);
1169: if (matl_dbg_viewer) {
1170: MatDenseRestoreArray(S2,&S2_data);
1171: MatDenseRestoreArray(S3,&S3_data);
1172: }
1173: if (!S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1174: PetscInt k,j;
1175: for (k=0;k<size_schur;k++) {
1176: for (j=k;j<size_schur;j++) {
1177: S_data[j*size_schur+k] = PetscConj(S_data[k*size_schur+j]);
1178: }
1179: }
1180: }
1182: /* restore defaults */
1183: #if defined(PETSC_HAVE_MUMPS)
1184: MatMumpsSetIcntl(F,26,-1);
1185: #endif
1186: #if defined(PETSC_HAVE_MKL_PARDISO)
1187: MatMkl_PardisoSetCntl(F,70,0);
1188: #endif
1189: MatDenseRestoreArrayRead(cs_AIB_mat,&cs_AIB);
1190: MatDenseRestoreArray(benign_AIIm1_ones_mat,&AIIm1_data);
1191: VecDestroy(&v);
1192: MatDenseRestoreArray(S_all,&S_data);
1193: if (matl_dbg_viewer) {
1194: Mat S;
1196: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1197: MatFactorCreateSchurComplement(F,&S,NULL);
1198: PetscObjectSetName((PetscObject)S,"Sb");
1199: MatView(S,matl_dbg_viewer);
1200: MatDestroy(&S);
1201: PetscObjectSetName((PetscObject)S2,"S2P");
1202: MatView(S2,matl_dbg_viewer);
1203: PetscObjectSetName((PetscObject)S3,"S3P");
1204: MatView(S3,matl_dbg_viewer);
1205: PetscObjectSetName((PetscObject)cs_AIB_mat,"cs");
1206: MatView(cs_AIB_mat,matl_dbg_viewer);
1207: MatFactorGetSchurComplement(F,&S_all,NULL);
1208: }
1209: MatDestroy(&S2);
1210: MatDestroy(&S3);
1211: }
1212: if (!reuse_solvers) {
1213: for (i=0;i<benign_n;i++) {
1214: ISDestroy(&is_p_r[i]);
1215: }
1216: PetscFree(is_p_r);
1217: MatDestroy(&cs_AIB_mat);
1218: MatDestroy(&benign_AIIm1_ones_mat);
1219: }
1220: } else { /* we can't use MatFactor when size_schur == size_of_the_problem */
1221: MatConvert(A,MATSEQDENSE,MAT_INITIAL_MATRIX,&S_all);
1222: MatGetType(S_all,&Stype);
1223: reuse_solvers = PETSC_FALSE; /* TODO: why we can't reuse the solvers here? */
1224: factor_workaround = PETSC_FALSE;
1225: solver_S = PETSC_FALSE;
1226: }
1228: if (reuse_solvers) {
1229: Mat A_II,Afake;
1230: Vec vec1_B;
1231: PCBDDCReuseSolvers msolv_ctx;
1232: PetscInt n_R;
1234: if (sub_schurs->reuse_solver) {
1235: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1236: } else {
1237: PetscNew(&sub_schurs->reuse_solver);
1238: }
1239: msolv_ctx = sub_schurs->reuse_solver;
1240: MatSchurComplementGetSubMatrices(sub_schurs->S,&A_II,NULL,NULL,NULL,NULL);
1241: PetscObjectReference((PetscObject)F);
1242: msolv_ctx->F = F;
1243: MatCreateVecs(F,&msolv_ctx->sol,NULL);
1244: /* currently PETSc has no support for MatSolve(F,x,x), so cheat and let rhs and sol share the same memory */
1245: {
1246: PetscScalar *array;
1247: PetscInt n;
1249: VecGetLocalSize(msolv_ctx->sol,&n);
1250: VecGetArray(msolv_ctx->sol,&array);
1251: VecCreateSeqWithArray(PetscObjectComm((PetscObject)msolv_ctx->sol),1,n,array,&msolv_ctx->rhs);
1252: VecRestoreArray(msolv_ctx->sol,&array);
1253: }
1254: msolv_ctx->has_vertices = schur_has_vertices;
1256: /* interior solver */
1257: PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->interior_solver);
1258: PCSetOperators(msolv_ctx->interior_solver,A_II,A_II);
1259: PCSetType(msolv_ctx->interior_solver,PCSHELL);
1260: PCShellSetName(msolv_ctx->interior_solver,"Interior solver (w/o Schur factorization)");
1261: PCShellSetContext(msolv_ctx->interior_solver,msolv_ctx);
1262: PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View);
1263: PCShellSetApply(msolv_ctx->interior_solver,PCBDDCReuseSolvers_Interior);
1264: PCShellSetApplyTranspose(msolv_ctx->interior_solver,PCBDDCReuseSolvers_InteriorTranspose);
1266: /* correction solver */
1267: PCCreate(PetscObjectComm((PetscObject)A_II),&msolv_ctx->correction_solver);
1268: PCSetType(msolv_ctx->correction_solver,PCSHELL);
1269: PCShellSetName(msolv_ctx->correction_solver,"Correction solver (with Schur factorization)");
1270: PCShellSetContext(msolv_ctx->correction_solver,msolv_ctx);
1271: PCShellSetView(msolv_ctx->interior_solver,PCBDDCReuseSolvers_View);
1272: PCShellSetApply(msolv_ctx->correction_solver,PCBDDCReuseSolvers_Correction);
1273: PCShellSetApplyTranspose(msolv_ctx->correction_solver,PCBDDCReuseSolvers_CorrectionTranspose);
1275: /* scatter and vecs for Schur complement solver */
1276: MatCreateVecs(S_all,&msolv_ctx->sol_B,&msolv_ctx->rhs_B);
1277: MatCreateVecs(sub_schurs->S,&vec1_B,NULL);
1278: if (!schur_has_vertices) {
1279: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_A_all,&msolv_ctx->is_B);
1280: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,NULL,&msolv_ctx->correction_scatter_B);
1281: PetscObjectReference((PetscObject)is_A_all);
1282: msolv_ctx->is_R = is_A_all;
1283: } else {
1284: IS is_B_all;
1285: const PetscInt* idxs;
1286: PetscInt dual,n_v,n;
1288: ISGetLocalSize(sub_schurs->is_vertices,&n_v);
1289: dual = size_schur - n_v;
1290: ISGetLocalSize(is_A_all,&n);
1291: ISGetIndices(is_A_all,&idxs);
1292: ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),dual,idxs+n_I,PETSC_COPY_VALUES,&is_B_all);
1293: ISGlobalToLocalMappingApplyIS(sub_schurs->BtoNmap,IS_GTOLM_DROP,is_B_all,&msolv_ctx->is_B);
1294: ISDestroy(&is_B_all);
1295: ISCreateStride(PetscObjectComm((PetscObject)is_A_all),dual,0,1,&is_B_all);
1296: VecScatterCreate(vec1_B,msolv_ctx->is_B,msolv_ctx->sol_B,is_B_all,&msolv_ctx->correction_scatter_B);
1297: ISDestroy(&is_B_all);
1298: ISCreateGeneral(PetscObjectComm((PetscObject)is_A_all),n-n_v,idxs,PETSC_COPY_VALUES,&msolv_ctx->is_R);
1299: ISRestoreIndices(is_A_all,&idxs);
1300: }
1301: ISGetLocalSize(msolv_ctx->is_R,&n_R);
1302: MatCreateSeqAIJ(PETSC_COMM_SELF,n_R,n_R,0,NULL,&Afake);
1303: MatAssemblyBegin(Afake,MAT_FINAL_ASSEMBLY);
1304: MatAssemblyEnd(Afake,MAT_FINAL_ASSEMBLY);
1305: PCSetOperators(msolv_ctx->correction_solver,Afake,Afake);
1306: MatDestroy(&Afake);
1307: VecDestroy(&vec1_B);
1309: /* communicate benign info to solver context */
1310: if (benign_n) {
1311: PetscScalar *array;
1313: msolv_ctx->benign_n = benign_n;
1314: msolv_ctx->benign_zerodiag_subs = is_p_r;
1315: PetscMalloc1(benign_n,&msolv_ctx->benign_save_vals);
1316: msolv_ctx->benign_csAIB = cs_AIB_mat;
1317: MatCreateVecs(cs_AIB_mat,&msolv_ctx->benign_corr_work,NULL);
1318: VecGetArray(msolv_ctx->benign_corr_work,&array);
1319: VecCreateSeqWithArray(PETSC_COMM_SELF,1,size_schur,array,&msolv_ctx->benign_dummy_schur_vec);
1320: VecRestoreArray(msolv_ctx->benign_corr_work,&array);
1321: msolv_ctx->benign_AIIm1ones = benign_AIIm1_ones_mat;
1322: }
1323: } else {
1324: if (sub_schurs->reuse_solver) {
1325: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1326: }
1327: PetscFree(sub_schurs->reuse_solver);
1328: }
1329: MatDestroy(&A);
1330: ISDestroy(&is_A_all);
1332: /* Work arrays */
1333: PetscMalloc1(max_subset_size*max_subset_size,&work);
1335: /* S_Ej_all */
1336: cum = cum2 = 0;
1337: MatDenseGetArrayRead(S_all,&rS_data);
1338: MatSeqAIJGetArray(sub_schurs->S_Ej_all,&SEj_arr);
1339: if (compute_Stilda) {
1340: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr);
1341: }
1342: for (i=0;i<sub_schurs->n_subs;i++) {
1343: PetscInt j;
1345: /* get S_E */
1346: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1347: if (S_lower_triangular) { /* I need to expand the upper triangular data (column oriented) */
1348: PetscInt k;
1349: for (k=0;k<subset_size;k++) {
1350: for (j=k;j<subset_size;j++) {
1351: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1352: work[j*subset_size+k] = PetscConj(rS_data[cum2+k*size_schur+j]);
1353: }
1354: }
1355: } else { /* just copy to workspace */
1356: PetscInt k;
1357: for (k=0;k<subset_size;k++) {
1358: for (j=0;j<subset_size;j++) {
1359: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1360: }
1361: }
1362: }
1363: /* insert S_E values */
1364: if (sub_schurs->change) {
1365: Mat change_sub,SEj,T;
1367: /* change basis */
1368: KSPGetOperators(sub_schurs->change[i],&change_sub,NULL);
1369: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1370: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1371: Mat T2;
1372: MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2);
1373: MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1374: MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T);
1375: MatDestroy(&T2);
1376: } else {
1377: MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1378: }
1379: MatCopy(T,SEj,SAME_NONZERO_PATTERN);
1380: MatDestroy(&T);
1381: MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1.0,NULL,NULL);
1382: MatDestroy(&SEj);
1383: }
1384: if (deluxe) {
1385: PetscArraycpy(SEj_arr,work,subset_size*subset_size);
1386: /* if adaptivity is requested, invert S_E blocks */
1387: if (compute_Stilda) {
1388: Mat M;
1389: const PetscScalar *vals;
1390: PetscBool isdense,isdensecuda;
1392: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&M);
1393: MatSetOption(M,MAT_SPD,sub_schurs->is_posdef);
1394: MatSetOption(M,MAT_HERMITIAN,sub_schurs->is_hermitian);
1395: if (!PetscBTLookup(sub_schurs->is_edge,i)) {
1396: MatSetType(M,Stype);
1397: }
1398: PetscObjectTypeCompare((PetscObject)M,MATSEQDENSE,&isdense);
1399: PetscObjectTypeCompare((PetscObject)M,MATSEQDENSECUDA,&isdensecuda);
1400: if (use_cholesky) {
1401: MatCholeskyFactor(M,NULL,NULL);
1402: } else {
1403: MatLUFactor(M,NULL,NULL,NULL);
1404: }
1405: if (isdense) {
1406: MatSeqDenseInvertFactors_Private(M);
1407: #if defined(PETSC_HAVE_CUDA)
1408: } else if (isdensecuda) {
1409: MatSeqDenseCUDAInvertFactors_Private(M);
1410: #endif
1411: } else SETERRQ(PetscObjectComm((PetscObject)M),PETSC_ERR_SUP,"Not implemented for type %s",Stype);
1412: MatDenseGetArrayRead(M,&vals);
1413: PetscArraycpy(SEjinv_arr,vals,subset_size*subset_size);
1414: MatDenseRestoreArrayRead(M,&vals);
1415: MatDestroy(&M);
1416: }
1417: } else if (compute_Stilda) { /* not using deluxe */
1418: Mat SEj;
1419: Vec D;
1420: PetscScalar *array;
1422: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1423: VecGetArray(Dall,&array);
1424: VecCreateSeqWithArray(PETSC_COMM_SELF,1,subset_size,array+cum,&D);
1425: VecRestoreArray(Dall,&array);
1426: VecShift(D,-1.);
1427: MatDiagonalScale(SEj,D,D);
1428: MatDestroy(&SEj);
1429: VecDestroy(&D);
1430: PetscArraycpy(SEj_arr,work,subset_size*subset_size);
1431: }
1432: cum += subset_size;
1433: cum2 += subset_size*(size_schur + 1);
1434: SEj_arr += subset_size*subset_size;
1435: if (SEjinv_arr) SEjinv_arr += subset_size*subset_size;
1436: }
1437: MatDenseRestoreArrayRead(S_all,&rS_data);
1438: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&SEj_arr);
1439: if (compute_Stilda) {
1440: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&SEjinv_arr);
1441: }
1442: if (solver_S) {
1443: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1444: }
1446: /* may prevent from unneeded copies, since MUMPS or MKL_Pardiso always use CPU memory
1447: however, preliminary tests indicate using GPUs is still faster in the solve phase */
1448: #if defined(PETSC_HAVE_VIENNACL) || defined(PETSC_HAVE_CUDA)
1449: if (reuse_solvers) {
1450: Mat St;
1451: MatFactorSchurStatus st;
1453: flg = PETSC_FALSE;
1454: PetscOptionsGetBool(NULL,sub_schurs->prefix,"-sub_schurs_schur_pin_to_cpu",&flg,NULL);
1455: MatFactorGetSchurComplement(F,&St,&st);
1456: MatBindToCPU(St,flg);
1457: MatFactorRestoreSchurComplement(F,&St,st);
1458: }
1459: #endif
1461: schur_factor = NULL;
1462: if (compute_Stilda && size_active_schur) {
1464: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr);
1465: if (sub_schurs->n_subs == 1 && size_schur == size_active_schur && deluxe) { /* we already computed the inverse */
1466: PetscArraycpy(SEjinv_arr,work,size_schur*size_schur);
1467: } else {
1468: Mat S_all_inv=NULL;
1470: if (solver_S) {
1471: /* for adaptive selection we need S^-1; for solver reusage we need S_\Delta\Delta^-1.
1472: The latter is not the principal subminor for S^-1. However, the factors can be reused since S_\Delta\Delta is the leading principal submatrix of S */
1473: if (factor_workaround) {/* invert without calling MatFactorInvertSchurComplement, since we are hacking */
1474: PetscScalar *data;
1475: PetscInt nd = 0;
1477: if (!use_potr) {
1478: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1479: }
1480: MatFactorGetSchurComplement(F,&S_all_inv,NULL);
1481: MatDenseGetArray(S_all_inv,&data);
1482: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1483: ISGetLocalSize(sub_schurs->is_dir,&nd);
1484: }
1486: /* factor and invert activedofs and vertices (dirichlet dofs does not contribute) */
1487: if (schur_has_vertices) {
1488: Mat M;
1489: PetscScalar *tdata;
1490: PetscInt nv = 0, news;
1492: ISGetLocalSize(sub_schurs->is_vertices,&nv);
1493: news = size_active_schur + nv;
1494: PetscCalloc1(news*news,&tdata);
1495: for (i=0;i<size_active_schur;i++) {
1496: PetscArraycpy(tdata+i*(news+1),data+i*(size_schur+1),size_active_schur-i);
1497: PetscArraycpy(tdata+i*(news+1)+size_active_schur-i,data+i*size_schur+size_active_schur+nd,nv);
1498: }
1499: for (i=0;i<nv;i++) {
1500: PetscInt k = i+size_active_schur;
1501: PetscArraycpy(tdata+k*(news+1),data+(k+nd)*(size_schur+1),nv-i);
1502: }
1504: MatCreateSeqDense(PETSC_COMM_SELF,news,news,tdata,&M);
1505: MatSetOption(M,MAT_SPD,PETSC_TRUE);
1506: MatCholeskyFactor(M,NULL,NULL);
1507: /* save the factors */
1508: cum = 0;
1509: PetscMalloc1((size_active_schur*(size_active_schur +1))/2+nd,&schur_factor);
1510: for (i=0;i<size_active_schur;i++) {
1511: PetscArraycpy(schur_factor+cum,tdata+i*(news+1),size_active_schur-i);
1512: cum += size_active_schur - i;
1513: }
1514: for (i=0;i<nd;i++) schur_factor[cum+i] = PetscSqrtReal(PetscRealPart(data[(i+size_active_schur)*(size_schur+1)]));
1515: MatSeqDenseInvertFactors_Private(M);
1516: /* move back just the active dofs to the Schur complement */
1517: for (i=0;i<size_active_schur;i++) {
1518: PetscArraycpy(data+i*size_schur,tdata+i*news,size_active_schur);
1519: }
1520: PetscFree(tdata);
1521: MatDestroy(&M);
1522: } else { /* we can factorize and invert just the activedofs */
1523: Mat M;
1524: PetscScalar *aux;
1526: PetscMalloc1(nd,&aux);
1527: for (i=0;i<nd;i++) aux[i] = 1.0/data[(i+size_active_schur)*(size_schur+1)];
1528: MatCreateSeqDense(PETSC_COMM_SELF,size_active_schur,size_active_schur,data,&M);
1529: MatDenseSetLDA(M,size_schur);
1530: MatSetOption(M,MAT_SPD,PETSC_TRUE);
1531: MatCholeskyFactor(M,NULL,NULL);
1532: MatSeqDenseInvertFactors_Private(M);
1533: MatDestroy(&M);
1534: MatCreateSeqDense(PETSC_COMM_SELF,size_schur,nd,data+size_active_schur*size_schur,&M);
1535: MatZeroEntries(M);
1536: MatDestroy(&M);
1537: MatCreateSeqDense(PETSC_COMM_SELF,nd,size_schur,data+size_active_schur,&M);
1538: MatDenseSetLDA(M,size_schur);
1539: MatZeroEntries(M);
1540: MatDestroy(&M);
1541: for (i=0;i<nd;i++) data[(i+size_active_schur)*(size_schur+1)] = aux[i];
1542: PetscFree(aux);
1543: }
1544: MatDenseRestoreArray(S_all_inv,&data);
1545: } else { /* use MatFactor calls to invert S */
1546: MatFactorInvertSchurComplement(F);
1547: MatFactorGetSchurComplement(F,&S_all_inv,NULL);
1548: }
1549: } else { /* we need to invert explicitly since we are not using MatFactor for S */
1550: PetscObjectReference((PetscObject)S_all);
1551: S_all_inv = S_all;
1552: MatDenseGetArray(S_all_inv,&S_data);
1553: PetscBLASIntCast(size_schur,&B_N);
1554: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1555: if (use_potr) {
1556: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,S_data,&B_N,&B_ierr));
1558: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,S_data,&B_N,&B_ierr));
1560: } else if (use_sytr) {
1561: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1563: PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,S_data,&B_N,pivots,Bwork,&B_ierr));
1565: } else {
1566: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,S_data,&B_N,pivots,&B_ierr));
1568: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,S_data,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1570: }
1571: PetscLogFlops(1.0*size_schur*size_schur*size_schur);
1572: PetscFPTrapPop();
1573: MatDenseRestoreArray(S_all_inv,&S_data);
1574: }
1575: /* S_Ej_tilda_all */
1576: cum = cum2 = 0;
1577: MatDenseGetArrayRead(S_all_inv,&rS_data);
1578: for (i=0;i<sub_schurs->n_subs;i++) {
1579: PetscInt j;
1581: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1582: /* get (St^-1)_E */
1583: /* Unless we are changing the variables, I don't need to expand to upper triangular since St^-1
1584: will be properly accessed later during adaptive selection */
1585: if (S_lower_triangular) {
1586: PetscInt k;
1587: if (sub_schurs->change) {
1588: for (k=0;k<subset_size;k++) {
1589: for (j=k;j<subset_size;j++) {
1590: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1591: work[j*subset_size+k] = work[k*subset_size+j];
1592: }
1593: }
1594: } else {
1595: for (k=0;k<subset_size;k++) {
1596: for (j=k;j<subset_size;j++) {
1597: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1598: }
1599: }
1600: }
1601: } else {
1602: PetscInt k;
1603: for (k=0;k<subset_size;k++) {
1604: for (j=0;j<subset_size;j++) {
1605: work[k*subset_size+j] = rS_data[cum2+k*size_schur+j];
1606: }
1607: }
1608: }
1609: if (sub_schurs->change) {
1610: Mat change_sub,SEj,T;
1612: /* change basis */
1613: KSPGetOperators(sub_schurs->change[i],&change_sub,NULL);
1614: MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,work,&SEj);
1615: if (!sub_schurs->change_with_qr) { /* currently there's no support for PtAP with P SeqAIJ */
1616: Mat T2;
1617: MatTransposeMatMult(change_sub,SEj,MAT_INITIAL_MATRIX,1.0,&T2);
1618: MatMatMult(T2,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1619: MatDestroy(&T2);
1620: MatConvert(T,MATSEQDENSE,MAT_INPLACE_MATRIX,&T);
1621: } else {
1622: MatPtAP(SEj,change_sub,MAT_INITIAL_MATRIX,1.0,&T);
1623: }
1624: MatCopy(T,SEj,SAME_NONZERO_PATTERN);
1625: MatDestroy(&T);
1626: /* set diagonal entry to a very large value to pick the basis we are eliminating as the first eigenvectors with adaptive selection */
1627: MatZeroRowsColumnsIS(SEj,sub_schurs->change_primal_sub[i],1./PETSC_SMALL,NULL,NULL);
1628: MatDestroy(&SEj);
1629: }
1630: PetscArraycpy(SEjinv_arr,work,subset_size*subset_size);
1631: cum += subset_size;
1632: cum2 += subset_size*(size_schur + 1);
1633: SEjinv_arr += subset_size*subset_size;
1634: }
1635: MatDenseRestoreArrayRead(S_all_inv,&rS_data);
1636: if (solver_S) {
1637: if (schur_has_vertices) {
1638: MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_FACTORED);
1639: } else {
1640: MatFactorRestoreSchurComplement(F,&S_all_inv,MAT_FACTOR_SCHUR_INVERTED);
1641: }
1642: }
1643: MatDestroy(&S_all_inv);
1644: }
1645: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&SEjinv_arr);
1647: /* move back factors if needed */
1648: if (schur_has_vertices) {
1649: Mat S_tmp;
1650: PetscInt nd = 0;
1653: MatFactorGetSchurComplement(F,&S_tmp,NULL);
1654: if (use_potr) {
1655: PetscScalar *data;
1657: MatDenseGetArray(S_tmp,&data);
1658: PetscArrayzero(data,size_schur*size_schur);
1660: if (S_lower_triangular) {
1661: cum = 0;
1662: for (i=0;i<size_active_schur;i++) {
1663: PetscArraycpy(data+i*(size_schur+1),schur_factor+cum,size_active_schur-i);
1664: cum += size_active_schur-i;
1665: }
1666: } else {
1667: PetscArraycpy(data,schur_factor,size_schur*size_schur);
1668: }
1669: if (sub_schurs->is_dir) {
1670: ISGetLocalSize(sub_schurs->is_dir,&nd);
1671: for (i=0;i<nd;i++) {
1672: data[(i+size_active_schur)*(size_schur+1)] = schur_factor[cum+i];
1673: }
1674: }
1675: /* workaround: since I cannot modify the matrices used inside the solvers for the forward and backward substitutions,
1676: set the diagonal entry of the Schur factor to a very large value */
1677: for (i=size_active_schur+nd;i<size_schur;i++) {
1678: data[i*(size_schur+1)] = infty;
1679: }
1680: MatDenseRestoreArray(S_tmp,&data);
1681: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Factor update not yet implemented for non SPD matrices");
1682: MatFactorRestoreSchurComplement(F,&S_tmp,MAT_FACTOR_SCHUR_FACTORED);
1683: }
1684: } else if (factor_workaround) { /* we need to eliminate any unneeded coupling */
1685: PetscScalar *data;
1686: PetscInt nd = 0;
1688: if (sub_schurs->is_dir) { /* dirichlet dofs could have different scalings */
1689: ISGetLocalSize(sub_schurs->is_dir,&nd);
1690: }
1691: MatFactorGetSchurComplement(F,&S_all,NULL);
1692: MatDenseGetArray(S_all,&data);
1693: for (i=0;i<size_active_schur;i++) {
1694: PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur);
1695: }
1696: for (i=size_active_schur+nd;i<size_schur;i++) {
1697: PetscArrayzero(data+i*size_schur+size_active_schur,size_schur-size_active_schur);
1698: data[i*(size_schur+1)] = infty;
1699: }
1700: MatDenseRestoreArray(S_all,&data);
1701: MatFactorRestoreSchurComplement(F,&S_all,MAT_FACTOR_SCHUR_UNFACTORED);
1702: }
1703: PetscFree(work);
1704: PetscFree(schur_factor);
1705: VecDestroy(&Dall);
1706: }
1707: ISDestroy(&is_I_layer);
1708: MatDestroy(&S_all);
1709: MatDestroy(&A_BB);
1710: MatDestroy(&A_IB);
1711: MatDestroy(&A_BI);
1712: MatDestroy(&F);
1714: PetscMalloc1(sub_schurs->n_subs,&nnz);
1715: for (i=0;i<sub_schurs->n_subs;i++) {
1716: ISGetLocalSize(sub_schurs->is_subs[i],&nnz[i]);
1717: }
1718: ISCreateGeneral(PETSC_COMM_SELF,sub_schurs->n_subs,nnz,PETSC_OWN_POINTER,&is_I_layer);
1719: MatSetVariableBlockSizes(sub_schurs->S_Ej_all,sub_schurs->n_subs,nnz);
1720: MatAssemblyBegin(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
1721: MatAssemblyEnd(sub_schurs->S_Ej_all,MAT_FINAL_ASSEMBLY);
1722: if (compute_Stilda) {
1723: MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_tilda_all,sub_schurs->n_subs,nnz);
1724: MatAssemblyBegin(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
1725: MatAssemblyEnd(sub_schurs->sum_S_Ej_tilda_all,MAT_FINAL_ASSEMBLY);
1726: if (deluxe) {
1727: MatSetVariableBlockSizes(sub_schurs->sum_S_Ej_inv_all,sub_schurs->n_subs,nnz);
1728: MatAssemblyBegin(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
1729: MatAssemblyEnd(sub_schurs->sum_S_Ej_inv_all,MAT_FINAL_ASSEMBLY);
1730: }
1731: }
1732: ISDestroy(&is_I_layer);
1734: /* Get local part of (\sum_j S_Ej) */
1735: if (!sub_schurs->sum_S_Ej_all) {
1736: MatDuplicate(sub_schurs->S_Ej_all,MAT_DO_NOT_COPY_VALUES,&sub_schurs->sum_S_Ej_all);
1737: }
1738: VecSet(gstash,0.0);
1739: MatSeqAIJGetArray(sub_schurs->S_Ej_all,&stasharray);
1740: VecPlaceArray(lstash,stasharray);
1741: VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1742: VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1743: MatSeqAIJRestoreArray(sub_schurs->S_Ej_all,&stasharray);
1744: VecResetArray(lstash);
1745: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_all,&stasharray);
1746: VecPlaceArray(lstash,stasharray);
1747: VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1748: VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1749: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_all,&stasharray);
1750: VecResetArray(lstash);
1752: /* Get local part of (\sum_j S^-1_Ej) (\sum_j St^-1_Ej) */
1753: if (compute_Stilda) {
1754: VecSet(gstash,0.0);
1755: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray);
1756: VecPlaceArray(lstash,stasharray);
1757: VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1758: VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1759: VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1760: VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1761: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&stasharray);
1762: VecResetArray(lstash);
1763: if (deluxe) {
1764: VecSet(gstash,0.0);
1765: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&stasharray);
1766: VecPlaceArray(lstash,stasharray);
1767: VecScatterBegin(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1768: VecScatterEnd(sstash,lstash,gstash,ADD_VALUES,SCATTER_FORWARD);
1769: VecScatterBegin(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1770: VecScatterEnd(sstash,gstash,lstash,INSERT_VALUES,SCATTER_REVERSE);
1771: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&stasharray);
1772: VecResetArray(lstash);
1773: } else {
1774: PetscScalar *array;
1775: PetscInt cum;
1777: MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&array);
1778: cum = 0;
1779: for (i=0;i<sub_schurs->n_subs;i++) {
1780: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1781: PetscBLASIntCast(subset_size,&B_N);
1782: PetscFPTrapPush(PETSC_FP_TRAP_OFF);
1783: if (use_potr) {
1784: PetscStackCallBLAS("LAPACKpotrf",LAPACKpotrf_("L",&B_N,array+cum,&B_N,&B_ierr));
1786: PetscStackCallBLAS("LAPACKpotri",LAPACKpotri_("L",&B_N,array+cum,&B_N,&B_ierr));
1788: } else if (use_sytr) {
1789: PetscStackCallBLAS("LAPACKsytrf",LAPACKsytrf_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1791: PetscStackCallBLAS("LAPACKsytri",LAPACKsytri_("L",&B_N,array+cum,&B_N,pivots,Bwork,&B_ierr));
1793: } else {
1794: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&B_N,&B_N,array+cum,&B_N,pivots,&B_ierr));
1796: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&B_N,array+cum,&B_N,pivots,Bwork,&B_lwork,&B_ierr));
1798: }
1799: PetscLogFlops(1.0*subset_size*subset_size*subset_size);
1800: PetscFPTrapPop();
1801: cum += subset_size*subset_size;
1802: }
1803: MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&array);
1804: PetscObjectReference((PetscObject)sub_schurs->sum_S_Ej_all);
1805: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1806: sub_schurs->sum_S_Ej_inv_all = sub_schurs->sum_S_Ej_all;
1807: }
1808: }
1809: VecDestroy(&lstash);
1810: VecDestroy(&gstash);
1811: VecScatterDestroy(&sstash);
1813: if (matl_dbg_viewer) {
1814: PetscInt cum;
1816: if (sub_schurs->S_Ej_all) {
1817: PetscObjectSetName((PetscObject)sub_schurs->S_Ej_all,"SE");
1818: MatView(sub_schurs->S_Ej_all,matl_dbg_viewer);
1819: }
1820: if (sub_schurs->sum_S_Ej_all) {
1821: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_all,"SSE");
1822: MatView(sub_schurs->sum_S_Ej_all,matl_dbg_viewer);
1823: }
1824: if (sub_schurs->sum_S_Ej_inv_all) {
1825: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_inv_all,"SSEm");
1826: MatView(sub_schurs->sum_S_Ej_inv_all,matl_dbg_viewer);
1827: }
1828: if (sub_schurs->sum_S_Ej_tilda_all) {
1829: PetscObjectSetName((PetscObject)sub_schurs->sum_S_Ej_tilda_all,"SSEt");
1830: MatView(sub_schurs->sum_S_Ej_tilda_all,matl_dbg_viewer);
1831: }
1832: for (i=0,cum=0;i<sub_schurs->n_subs;i++) {
1833: IS is;
1834: char name[16];
1836: PetscSNPrintf(name,sizeof(name),"IE%D",i);
1837: ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);
1838: ISCreateStride(PETSC_COMM_SELF,subset_size,cum,1,&is);
1839: PetscObjectSetName((PetscObject)is,name);
1840: ISView(is,matl_dbg_viewer);
1841: ISDestroy(&is);
1842: cum += subset_size;
1843: }
1844: }
1846: /* free workspace */
1847: if (matl_dbg_viewer) PetscViewerFlush(matl_dbg_viewer);
1848: if (sub_schurs->debug) MPI_Barrier(comm_n);
1849: PetscViewerDestroy(&matl_dbg_viewer);
1850: PetscFree2(Bwork,pivots);
1851: PetscCommDestroy(&comm_n);
1852: return 0;
1853: }
1855: PetscErrorCode PCBDDCSubSchursInit(PCBDDCSubSchurs sub_schurs, const char* prefix, IS is_I, IS is_B, PCBDDCGraph graph, ISLocalToGlobalMapping BtoNmap, PetscBool copycc)
1856: {
1857: IS *faces,*edges,*all_cc,vertices;
1858: PetscInt i,n_faces,n_edges,n_all_cc;
1859: PetscBool is_sorted,ispardiso,ismumps;
1860: PetscErrorCode ierr;
1862: ISSorted(is_I,&is_sorted);
1864: ISSorted(is_B,&is_sorted);
1867: /* reset any previous data */
1868: PCBDDCSubSchursReset(sub_schurs);
1870: /* get index sets for faces and edges (already sorted by global ordering) */
1871: PCBDDCGraphGetCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1872: n_all_cc = n_faces+n_edges;
1873: PetscBTCreate(n_all_cc,&sub_schurs->is_edge);
1874: PetscMalloc1(n_all_cc,&all_cc);
1875: for (i=0;i<n_faces;i++) {
1876: if (copycc) {
1877: ISDuplicate(faces[i],&all_cc[i]);
1878: } else {
1879: PetscObjectReference((PetscObject)faces[i]);
1880: all_cc[i] = faces[i];
1881: }
1882: }
1883: for (i=0;i<n_edges;i++) {
1884: if (copycc) {
1885: ISDuplicate(edges[i],&all_cc[n_faces+i]);
1886: } else {
1887: PetscObjectReference((PetscObject)edges[i]);
1888: all_cc[n_faces+i] = edges[i];
1889: }
1890: PetscBTSet(sub_schurs->is_edge,n_faces+i);
1891: }
1892: PetscObjectReference((PetscObject)vertices);
1893: sub_schurs->is_vertices = vertices;
1894: PCBDDCGraphRestoreCandidatesIS(graph,&n_faces,&faces,&n_edges,&edges,&vertices);
1895: sub_schurs->is_dir = NULL;
1896: PCBDDCGraphGetDirichletDofsB(graph,&sub_schurs->is_dir);
1898: /* Determine if MatFactor can be used */
1899: PetscStrallocpy(prefix,&sub_schurs->prefix);
1900: #if defined(PETSC_HAVE_MUMPS)
1901: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMUMPS,sizeof(sub_schurs->mat_solver_type));
1902: #elif defined(PETSC_HAVE_MKL_PARDISO)
1903: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,sizeof(sub_schurs->mat_solver_type));
1904: #else
1905: PetscStrncpy(sub_schurs->mat_solver_type,MATSOLVERPETSC,sizeof(sub_schurs->mat_solver_type));
1906: #endif
1907: #if defined(PETSC_USE_COMPLEX)
1908: sub_schurs->is_hermitian = PETSC_FALSE; /* Hermitian Cholesky is not supported by PETSc and external packages */
1909: #else
1910: sub_schurs->is_hermitian = PETSC_TRUE;
1911: #endif
1912: sub_schurs->is_posdef = PETSC_TRUE;
1913: sub_schurs->is_symmetric = PETSC_TRUE;
1914: sub_schurs->debug = PETSC_FALSE;
1915: sub_schurs->restrict_comm = PETSC_FALSE;
1916: PetscOptionsBegin(PetscObjectComm((PetscObject)graph->l2gmap),sub_schurs->prefix,"BDDC sub_schurs options","PC");
1917: PetscOptionsString("-sub_schurs_mat_solver_type","Specific direct solver to use",NULL,sub_schurs->mat_solver_type,sub_schurs->mat_solver_type,sizeof(sub_schurs->mat_solver_type),NULL);
1918: PetscOptionsBool("-sub_schurs_symmetric","Symmetric problem",NULL,sub_schurs->is_symmetric,&sub_schurs->is_symmetric,NULL);
1919: PetscOptionsBool("-sub_schurs_hermitian","Hermitian problem",NULL,sub_schurs->is_hermitian,&sub_schurs->is_hermitian,NULL);
1920: PetscOptionsBool("-sub_schurs_posdef","Positive definite problem",NULL,sub_schurs->is_posdef,&sub_schurs->is_posdef,NULL);
1921: PetscOptionsBool("-sub_schurs_restrictcomm","Restrict communicator on active processes only",NULL,sub_schurs->restrict_comm,&sub_schurs->restrict_comm,NULL);
1922: PetscOptionsBool("-sub_schurs_debug","Debug output",NULL,sub_schurs->debug,&sub_schurs->debug,NULL);
1923: PetscOptionsEnd();
1924: PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMUMPS,&ismumps);
1925: PetscStrcmp(sub_schurs->mat_solver_type,MATSOLVERMKL_PARDISO,&ispardiso);
1926: sub_schurs->schur_explicit = (PetscBool)(ispardiso || ismumps);
1928: /* for reals, symmetric and hermitian are synonims */
1929: #if !defined(PETSC_USE_COMPLEX)
1930: sub_schurs->is_symmetric = (PetscBool)(sub_schurs->is_symmetric && sub_schurs->is_hermitian);
1931: sub_schurs->is_hermitian = sub_schurs->is_symmetric;
1932: #endif
1934: PetscObjectReference((PetscObject)is_I);
1935: sub_schurs->is_I = is_I;
1936: PetscObjectReference((PetscObject)is_B);
1937: sub_schurs->is_B = is_B;
1938: PetscObjectReference((PetscObject)graph->l2gmap);
1939: sub_schurs->l2gmap = graph->l2gmap;
1940: PetscObjectReference((PetscObject)BtoNmap);
1941: sub_schurs->BtoNmap = BtoNmap;
1942: sub_schurs->n_subs = n_all_cc;
1943: sub_schurs->is_subs = all_cc;
1944: sub_schurs->S_Ej_all = NULL;
1945: sub_schurs->sum_S_Ej_all = NULL;
1946: sub_schurs->sum_S_Ej_inv_all = NULL;
1947: sub_schurs->sum_S_Ej_tilda_all = NULL;
1948: sub_schurs->is_Ej_all = NULL;
1949: return 0;
1950: }
1952: PetscErrorCode PCBDDCSubSchursCreate(PCBDDCSubSchurs *sub_schurs)
1953: {
1954: PCBDDCSubSchurs schurs_ctx;
1956: PetscNew(&schurs_ctx);
1957: schurs_ctx->n_subs = 0;
1958: *sub_schurs = schurs_ctx;
1959: return 0;
1960: }
1962: PetscErrorCode PCBDDCSubSchursReset(PCBDDCSubSchurs sub_schurs)
1963: {
1964: PetscInt i;
1966: if (!sub_schurs) return 0;
1967: PetscFree(sub_schurs->prefix);
1968: MatDestroy(&sub_schurs->A);
1969: MatDestroy(&sub_schurs->S);
1970: ISDestroy(&sub_schurs->is_I);
1971: ISDestroy(&sub_schurs->is_B);
1972: ISLocalToGlobalMappingDestroy(&sub_schurs->l2gmap);
1973: ISLocalToGlobalMappingDestroy(&sub_schurs->BtoNmap);
1974: MatDestroy(&sub_schurs->S_Ej_all);
1975: MatDestroy(&sub_schurs->sum_S_Ej_all);
1976: MatDestroy(&sub_schurs->sum_S_Ej_inv_all);
1977: MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);
1978: ISDestroy(&sub_schurs->is_Ej_all);
1979: ISDestroy(&sub_schurs->is_vertices);
1980: ISDestroy(&sub_schurs->is_dir);
1981: PetscBTDestroy(&sub_schurs->is_edge);
1982: for (i=0;i<sub_schurs->n_subs;i++) {
1983: ISDestroy(&sub_schurs->is_subs[i]);
1984: }
1985: if (sub_schurs->n_subs) {
1986: PetscFree(sub_schurs->is_subs);
1987: }
1988: if (sub_schurs->reuse_solver) {
1989: PCBDDCReuseSolversReset(sub_schurs->reuse_solver);
1990: }
1991: PetscFree(sub_schurs->reuse_solver);
1992: if (sub_schurs->change) {
1993: for (i=0;i<sub_schurs->n_subs;i++) {
1994: KSPDestroy(&sub_schurs->change[i]);
1995: ISDestroy(&sub_schurs->change_primal_sub[i]);
1996: }
1997: }
1998: PetscFree(sub_schurs->change);
1999: PetscFree(sub_schurs->change_primal_sub);
2000: sub_schurs->n_subs = 0;
2001: return 0;
2002: }
2004: PetscErrorCode PCBDDCSubSchursDestroy(PCBDDCSubSchurs* sub_schurs)
2005: {
2006: PCBDDCSubSchursReset(*sub_schurs);
2007: PetscFree(*sub_schurs);
2008: return 0;
2009: }
2011: static inline PetscErrorCode PCBDDCAdjGetNextLayer_Private(PetscInt* queue_tip,PetscInt n_prev,PetscBT touched,PetscInt* xadj,PetscInt* adjncy,PetscInt* n_added)
2012: {
2013: PetscInt i,j,n;
2015: n = 0;
2016: for (i=-n_prev;i<0;i++) {
2017: PetscInt start_dof = queue_tip[i];
2018: for (j=xadj[start_dof];j<xadj[start_dof+1];j++) {
2019: PetscInt dof = adjncy[j];
2020: if (!PetscBTLookup(touched,dof)) {
2021: PetscBTSet(touched,dof);
2022: queue_tip[n] = dof;
2023: n++;
2024: }
2025: }
2026: }
2027: *n_added = n;
2028: return 0;
2029: }