Actual source code: pipebcgs.c
2: /*
3: This file implements pipelined BiCGStab (pipe-BiCGStab).
4: Only allow right preconditioning.
5: */
6: #include <../src/ksp/ksp/impls/bcgs/bcgsimpl.h>
8: static PetscErrorCode KSPSetUp_PIPEBCGS(KSP ksp)
9: {
10: KSPSetWorkVecs(ksp,15);
11: return 0;
12: }
14: /* Only need a few hacks from KSPSolve_BCGS */
15: #include <petsc/private/pcimpl.h>
16: static PetscErrorCode KSPSolve_PIPEBCGS(KSP ksp)
17: {
18: PetscInt i;
19: PetscScalar rho,rhoold,alpha,beta,omega=0.0,d1,d2,d3;
20: Vec X,B,S,R,RP,Y,Q,P2,Q2,R2,S2,W,Z,W2,Z2,T,V;
21: PetscReal dp = 0.0;
22: KSP_BCGS *bcgs = (KSP_BCGS*)ksp->data;
23: PC pc;
25: X = ksp->vec_sol;
26: B = ksp->vec_rhs;
27: R = ksp->work[0];
28: RP = ksp->work[1];
29: S = ksp->work[2];
30: Y = ksp->work[3];
31: Q = ksp->work[4];
32: Q2 = ksp->work[5];
33: P2 = ksp->work[6];
34: R2 = ksp->work[7];
35: S2 = ksp->work[8];
36: W = ksp->work[9];
37: Z = ksp->work[10];
38: W2 = ksp->work[11];
39: Z2 = ksp->work[12];
40: T = ksp->work[13];
41: V = ksp->work[14];
43: /* Only supports right preconditioning */
45: if (!ksp->guess_zero) {
46: if (!bcgs->guess) {
47: VecDuplicate(X,&bcgs->guess);
48: }
49: VecCopy(X,bcgs->guess);
50: } else {
51: VecSet(X,0.0);
52: }
54: /* Compute initial residual */
55: KSPGetPC(ksp,&pc);
56: PCSetUp(pc);
57: if (!ksp->guess_zero) {
58: KSP_MatMult(ksp,pc->mat,X,Q2);
59: VecCopy(B,R);
60: VecAXPY(R,-1.0,Q2);
61: } else {
62: VecCopy(B,R);
63: }
65: /* Test for nothing to do */
66: if (ksp->normtype != KSP_NORM_NONE) {
67: VecNorm(R,NORM_2,&dp);
68: } else dp = 0.0;
69: PetscObjectSAWsTakeAccess((PetscObject)ksp);
70: ksp->its = 0;
71: ksp->rnorm = dp;
72: PetscObjectSAWsGrantAccess((PetscObject)ksp);
73: KSPLogResidualHistory(ksp,dp);
74: KSPMonitor(ksp,0,dp);
75: (*ksp->converged)(ksp,0,dp,&ksp->reason,ksp->cnvP);
76: if (ksp->reason) return 0;
78: /* Initialize */
79: VecCopy(R,RP); /* rp <- r */
81: VecDotBegin(R,RP,&rho); /* rho <- (r,rp) */
82: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
83: KSP_PCApply(ksp,R,R2); /* r2 <- K r */
84: KSP_MatMult(ksp,pc->mat,R2,W); /* w <- A r2 */
85: VecDotEnd(R,RP,&rho);
87: VecDotBegin(W,RP,&d2); /* d2 <- (w,rp) */
88: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)W));
89: KSP_PCApply(ksp,W,W2); /* w2 <- K w */
90: KSP_MatMult(ksp,pc->mat,W2,T); /* t <- A w2 */
91: VecDotEnd(W,RP,&d2);
93: alpha = rho/d2;
94: beta = 0.0;
96: /* Main loop */
97: i=0;
98: do {
99: if (i == 0) {
100: VecCopy(R2,P2); /* p2 <- r2 */
101: VecCopy(W,S); /* s <- w */
102: VecCopy(W2,S2); /* s2 <- w2 */
103: VecCopy(T,Z); /* z <- t */
104: } else {
105: VecAXPBYPCZ(P2,1.0,-beta*omega,beta,R2,S2); /* p2 <- beta * p2 + r2 - beta * omega * s2 */
106: VecAXPBYPCZ(S,1.0,-beta*omega,beta,W,Z); /* s <- beta * s + w - beta * omega * z */
107: VecAXPBYPCZ(S2,1.0,-beta*omega,beta,W2,Z2); /* s2 <- beta * s2 + w2 - beta * omega * z2 */
108: VecAXPBYPCZ(Z,1.0,-beta*omega,beta,T,V); /* z <- beta * z + t - beta * omega * v */
109: }
110: VecWAXPY(Q,-alpha,S,R); /* q <- r - alpha s */
111: VecWAXPY(Q2,-alpha,S2,R2); /* q2 <- r2 - alpha s2 */
112: VecWAXPY(Y,-alpha,Z,W); /* y <- w - alpha z */
114: VecDotBegin(Q,Y,&d1); /* d1 <- (q,y) */
115: VecDotBegin(Y,Y,&d2); /* d2 <- (y,y) */
117: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)Q));
118: KSP_PCApply(ksp,Z,Z2); /* z2 <- K z */
119: KSP_MatMult(ksp,pc->mat,Z2,V); /* v <- A z2 */
121: VecDotEnd(Q,Y,&d1);
122: VecDotEnd(Y,Y,&d2);
124: if (d2 == 0.0) {
125: /* y is 0. if q is 0, then alpha s == r, and hence alpha p may be our solution. Give it a try? */
126: VecDot(Q,Q,&d1);
127: if (d1 != 0.0) {
128: ksp->reason = KSP_DIVERGED_BREAKDOWN;
129: break;
130: }
131: VecAXPY(X,alpha,P2); /* x <- x + alpha p2 */
132: PetscObjectSAWsTakeAccess((PetscObject)ksp);
133: ksp->its++;
134: ksp->rnorm = 0.0;
135: ksp->reason = KSP_CONVERGED_RTOL;
136: PetscObjectSAWsGrantAccess((PetscObject)ksp);
137: KSPLogResidualHistory(ksp,dp);
138: KSPMonitor(ksp,i+1,0.0);
139: break;
140: }
141: omega = d1/d2; /* omega <- (y'q) / (y'y) */
142: VecAXPBYPCZ(X,alpha,omega,1.0,P2,Q2); /* x <- alpha * p2 + omega * q2 + x */
143: VecWAXPY(R,-omega,Y,Q); /* r <- q - omega y */
144: VecWAXPY(R2,-alpha,Z2,W2); /* r2 <- w2 - alpha z2 */
145: VecAYPX(R2,-omega,Q2); /* r2 <- q2 - omega r2 */
146: VecWAXPY(W,-alpha,V,T); /* w <- t - alpha v */
147: VecAYPX(W,-omega,Y); /* w <- y - omega w */
148: rhoold = rho;
150: if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i+2) {
151: VecNormBegin(R,NORM_2,&dp); /* dp <- norm(r) */
152: }
153: VecDotBegin(R,RP,&rho); /* rho <- (r,rp) */
154: VecDotBegin(S,RP,&d1); /* d1 <- (s,rp) */
155: VecDotBegin(W,RP,&d2); /* d2 <- (w,rp) */
156: VecDotBegin(Z,RP,&d3); /* d3 <- (z,rp) */
158: PetscCommSplitReductionBegin(PetscObjectComm((PetscObject)R));
159: KSP_PCApply(ksp,W,W2); /* w2 <- K w */
160: KSP_MatMult(ksp,pc->mat,W2,T); /* t <- A w2 */
162: if (ksp->normtype != KSP_NORM_NONE && ksp->chknorm < i+2) {
163: VecNormEnd(R,NORM_2,&dp);
164: }
165: VecDotEnd(R,RP,&rho);
166: VecDotEnd(S,RP,&d1);
167: VecDotEnd(W,RP,&d2);
168: VecDotEnd(Z,RP,&d3);
172: beta = (rho/rhoold) * (alpha/omega);
173: alpha = rho/(d2 + beta * d1 - beta * omega * d3); /* alpha <- rho / (d2 + beta * d1 - beta * omega * d3) */
175: PetscObjectSAWsTakeAccess((PetscObject)ksp);
176: ksp->its++;
178: /* Residual replacement step */
179: if (i > 0 && i%100 == 0 && i < 1001) {
180: KSP_MatMult(ksp,pc->mat,X,R);
181: VecAYPX(R,-1.0,B); /* r <- b - Ax */
182: KSP_PCApply(ksp,R,R2); /* r2 <- K r */
183: KSP_MatMult(ksp,pc->mat,R2,W); /* w <- A r2 */
184: KSP_PCApply(ksp,W,W2); /* w2 <- K w */
185: KSP_MatMult(ksp,pc->mat,W2,T); /* t <- A w2 */
186: KSP_MatMult(ksp,pc->mat,P2,S); /* s <- A p2 */
187: KSP_PCApply(ksp,S,S2); /* s2 <- K s */
188: KSP_MatMult(ksp,pc->mat,S2,Z); /* z <- A s2 */
189: KSP_PCApply(ksp,Z,Z2); /* z2 <- K z */
190: KSP_MatMult(ksp,pc->mat,Z2,V); /* v <- A z2 */
191: }
193: ksp->rnorm = dp;
194: PetscObjectSAWsGrantAccess((PetscObject)ksp);
195: KSPLogResidualHistory(ksp,dp);
196: KSPMonitor(ksp,i+1,dp);
197: (*ksp->converged)(ksp,i+1,dp,&ksp->reason,ksp->cnvP);
198: if (ksp->reason) break;
199: if (rho == 0.0) {
200: ksp->reason = KSP_DIVERGED_BREAKDOWN;
201: break;
202: }
203: i++;
204: } while (i<ksp->max_it);
206: if (i >= ksp->max_it) ksp->reason = KSP_DIVERGED_ITS;
207: return 0;
208: }
210: /*MC
211: KSPPIPEBCGS - Implements the pipelined BiCGStab method.
213: This method has only two non-blocking reductions per iteration, compared to 3 blocking for standard FBCGS. The
214: non-blocking reductions are overlapped by matrix-vector products and preconditioner applications.
216: Periodic residual replacement may be used to increase robustness and maximal attainable accuracy.
218: Options Database Keys:
219: see KSPSolve()
221: Level: intermediate
223: Notes:
224: Like KSPFBCGS, the KSPPIPEBCGS implementation only allows for right preconditioning.
225: MPI configuration may be necessary for reductions to make asynchronous progress, which is important for
226: performance of pipelined methods. See the FAQ on the PETSc website for details.
228: Contributed by:
229: Siegfried Cools, Universiteit Antwerpen,
230: EXA2CT European Project on EXascale Algorithms and Advanced Computational Techniques, 2016.
232: Reference:
233: S. Cools and W. Vanroose,
234: "The communication-hiding pipelined BiCGStab method for the parallel solution of large unsymmetric linear systems",
235: Parallel Computing, 65:1-20, 2017.
237: .seealso: KSPCreate(), KSPSetType(), KSPType (for list of available types), KSP, KSPBICG, KSPFBCGS, KSPFBCGSL, KSPSetPCSide()
238: M*/
239: PETSC_EXTERN PetscErrorCode KSPCreate_PIPEBCGS(KSP ksp)
240: {
241: KSP_BCGS *bcgs;
243: PetscNewLog(ksp,&bcgs);
245: ksp->data = bcgs;
246: ksp->ops->setup = KSPSetUp_PIPEBCGS;
247: ksp->ops->solve = KSPSolve_PIPEBCGS;
248: ksp->ops->destroy = KSPDestroy_BCGS;
249: ksp->ops->reset = KSPReset_BCGS;
250: ksp->ops->buildresidual = KSPBuildResidualDefault;
251: ksp->ops->setfromoptions = KSPSetFromOptions_BCGS;
253: KSPSetSupportedNorm(ksp,KSP_NORM_UNPRECONDITIONED,PC_RIGHT,2);
254: KSPSetSupportedNorm(ksp,KSP_NORM_NONE,PC_RIGHT,1);
255: return 0;
256: }