Actual source code: elliptic.c

  1: #include <petsc/private/taoimpl.h>

  3: /*T
  4:    Concepts: TAO^Solving a system of nonlinear equations, nonlinear least squares
  5:    Routines: TaoCreate();
  6:    Routines: TaoSetType();
  7:    Routines: TaoSetSolution();
  8:    Routines: TaoSetObjective();
  9:    Routines: TaoSetGradient();
 10:    Routines: TaoSetConstraintsRoutine();
 11:    Routines: TaoSetJacobianStateRoutine();
 12:    Routines: TaoSetJacobianDesignRoutine();
 13:    Routines: TaoSetStateDesignIS();
 14:    Routines: TaoSetFromOptions();
 15:    Routines: TaoSolve();
 16:    Routines: TaoDestroy();
 17:    Processors: n
 18: T*/

 20: typedef struct {
 21:   PetscInt n; /* Number of total variables */
 22:   PetscInt m; /* Number of constraints */
 23:   PetscInt nstate;
 24:   PetscInt ndesign;
 25:   PetscInt mx; /* grid points in each direction */
 26:   PetscInt ns; /* Number of data samples (1<=ns<=8)
 27:                   Currently only ns=1 is supported */
 28:   PetscInt ndata; /* Number of data points per sample */
 29:   IS       s_is;
 30:   IS       d_is;

 32:   VecScatter state_scatter;
 33:   VecScatter design_scatter;
 34:   VecScatter *yi_scatter, *di_scatter;
 35:   Vec        suby,subq,subd;
 36:   Mat        Js,Jd,JsPrec,JsInv,JsBlock;

 38:   PetscReal alpha; /* Regularization parameter */
 39:   PetscReal beta; /* Weight attributed to ||u||^2 in regularization functional */
 40:   PetscReal noise; /* Amount of noise to add to data */
 41:   PetscReal *ones;
 42:   Mat       Q;
 43:   Mat       MQ;
 44:   Mat       L;

 46:   Mat Grad;
 47:   Mat Av,Avwork;
 48:   Mat Div, Divwork;
 49:   Mat DSG;
 50:   Mat Diag,Ones;

 52:   Vec q;
 53:   Vec ur; /* reference */

 55:   Vec d;
 56:   Vec dwork;

 58:   Vec x; /* super vec of y,u */

 60:   Vec y; /* state variables */
 61:   Vec ywork;

 63:   Vec ytrue;

 65:   Vec u; /* design variables */
 66:   Vec uwork;

 68:   Vec utrue;

 70:   Vec js_diag;

 72:   Vec c; /* constraint vector */
 73:   Vec cwork;

 75:   Vec lwork;
 76:   Vec S;
 77:   Vec Swork,Twork,Sdiag,Ywork;
 78:   Vec Av_u;

 80:   KSP solver;
 81:   PC  prec;

 83:   PetscReal tola,tolb,tolc,told;
 84:   PetscInt  ksp_its;
 85:   PetscInt  ksp_its_initial;
 86:   PetscLogStage stages[10];
 87:   PetscBool use_ptap;
 88:   PetscBool use_lrc;
 89: } AppCtx;

 91: PetscErrorCode FormFunction(Tao, Vec, PetscReal*, void*);
 92: PetscErrorCode FormGradient(Tao, Vec, Vec, void*);
 93: PetscErrorCode FormFunctionGradient(Tao, Vec, PetscReal*, Vec, void*);
 94: PetscErrorCode FormJacobianState(Tao, Vec, Mat, Mat, Mat, void*);
 95: PetscErrorCode FormJacobianDesign(Tao, Vec, Mat,void*);
 96: PetscErrorCode FormConstraints(Tao, Vec, Vec, void*);
 97: PetscErrorCode FormHessian(Tao, Vec, Mat, Mat, void*);
 98: PetscErrorCode Gather(Vec, Vec, VecScatter, Vec, VecScatter);
 99: PetscErrorCode Scatter(Vec, Vec, VecScatter, Vec, VecScatter);
100: PetscErrorCode EllipticInitialize(AppCtx*);
101: PetscErrorCode EllipticDestroy(AppCtx*);
102: PetscErrorCode EllipticMonitor(Tao, void*);

104: PetscErrorCode StateBlockMatMult(Mat,Vec,Vec);
105: PetscErrorCode StateMatMult(Mat,Vec,Vec);

107: PetscErrorCode StateInvMatMult(Mat,Vec,Vec);
108: PetscErrorCode DesignMatMult(Mat,Vec,Vec);
109: PetscErrorCode DesignMatMultTranspose(Mat,Vec,Vec);

111: PetscErrorCode QMatMult(Mat,Vec,Vec);
112: PetscErrorCode QMatMultTranspose(Mat,Vec,Vec);

114: static  char help[]="";

116: int main(int argc, char **argv)
117: {
118:   PetscErrorCode     ierr;
119:   Vec                x0;
120:   Tao                tao;
121:   AppCtx             user;
122:   PetscInt           ntests = 1;
123:   PetscInt           i;

125:   PetscInitialize(&argc, &argv, (char*)0,help);
126:   user.mx = 8;
127:   PetscOptionsBegin(PETSC_COMM_WORLD,NULL,"elliptic example",NULL);
128:   PetscOptionsInt("-mx","Number of grid points in each direction","",user.mx,&user.mx,NULL);
129:   user.ns = 6;
130:   PetscOptionsInt("-ns","Number of data samples (1<=ns<=8)","",user.ns,&user.ns,NULL);
131:   user.ndata = 64;
132:   PetscOptionsInt("-ndata","Numbers of data points per sample","",user.ndata,&user.ndata,NULL);
133:   user.alpha = 0.1;
134:   PetscOptionsReal("-alpha","Regularization parameter","",user.alpha,&user.alpha,NULL);
135:   user.beta = 0.00001;
136:   PetscOptionsReal("-beta","Weight attributed to ||u||^2 in regularization functional","",user.beta,&user.beta,NULL);
137:   user.noise = 0.01;
138:   PetscOptionsReal("-noise","Amount of noise to add to data","",user.noise,&user.noise,NULL);

140:   user.use_ptap = PETSC_FALSE;
141:   PetscOptionsBool("-use_ptap","Use ptap matrix for DSG","",user.use_ptap,&user.use_ptap,NULL);
142:   user.use_lrc = PETSC_FALSE;
143:   PetscOptionsBool("-use_lrc","Use lrc matrix for Js","",user.use_lrc,&user.use_lrc,NULL);
144:   PetscOptionsInt("-ntests","Number of times to repeat TaoSolve","",ntests,&ntests,NULL);
145:   PetscOptionsEnd();

147:   user.m = user.ns*user.mx*user.mx*user.mx; /* number of constraints */
148:   user.nstate =  user.m;
149:   user.ndesign = user.mx*user.mx*user.mx;
150:   user.n = user.nstate + user.ndesign; /* number of variables */

152:   /* Create TAO solver and set desired solution method */
153:   TaoCreate(PETSC_COMM_WORLD,&tao);
154:   TaoSetType(tao,TAOLCL);

156:   /* Set up initial vectors and matrices */
157:   EllipticInitialize(&user);

159:   Gather(user.x,user.y,user.state_scatter,user.u,user.design_scatter);
160:   VecDuplicate(user.x,&x0);
161:   VecCopy(user.x,x0);

163:   /* Set solution vector with an initial guess */
164:   TaoSetSolution(tao,user.x);
165:   TaoSetObjective(tao, FormFunction, &user);
166:   TaoSetGradient(tao, NULL, FormGradient, &user);
167:   TaoSetConstraintsRoutine(tao, user.c, FormConstraints, &user);

169:   TaoSetJacobianStateRoutine(tao, user.Js, NULL, user.JsInv, FormJacobianState, &user);
170:   TaoSetJacobianDesignRoutine(tao, user.Jd, FormJacobianDesign, &user);

172:   TaoSetStateDesignIS(tao,user.s_is,user.d_is);
173:   TaoSetFromOptions(tao);

175:   /* SOLVE THE APPLICATION */
176:   PetscLogStageRegister("Trials",&user.stages[1]);
177:   PetscLogStagePush(user.stages[1]);
178:   for (i=0; i<ntests; i++) {
179:     TaoSolve(tao);
180:     PetscPrintf(PETSC_COMM_WORLD,"KSP Iterations = %D\n",user.ksp_its);
181:     VecCopy(x0,user.x);
182:   }
183:   PetscLogStagePop();
184:   PetscBarrier((PetscObject)user.x);
185:   PetscPrintf(PETSC_COMM_WORLD,"KSP iterations within initialization: ");
186:   PetscPrintf(PETSC_COMM_WORLD,"%D\n",user.ksp_its_initial);

188:   TaoDestroy(&tao);
189:   VecDestroy(&x0);
190:   EllipticDestroy(&user);
191:   PetscFinalize();
192:   return 0;
193: }
194: /* ------------------------------------------------------------------- */
195: /*
196:    dwork = Qy - d
197:    lwork = L*(u-ur)
198:    f = 1/2 * (dwork.dwork + alpha*lwork.lwork)
199: */
200: PetscErrorCode FormFunction(Tao tao,Vec X,PetscReal *f,void *ptr)
201: {
202:   PetscReal      d1=0,d2=0;
203:   AppCtx         *user = (AppCtx*)ptr;

205:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
206:   MatMult(user->MQ,user->y,user->dwork);
207:   VecAXPY(user->dwork,-1.0,user->d);
208:   VecDot(user->dwork,user->dwork,&d1);
209:   VecWAXPY(user->uwork,-1.0,user->ur,user->u);
210:   MatMult(user->L,user->uwork,user->lwork);
211:   VecDot(user->lwork,user->lwork,&d2);
212:   *f = 0.5 * (d1 + user->alpha*d2);
213:   return 0;
214: }

216: /* ------------------------------------------------------------------- */
217: /*
218:     state: g_s = Q' *(Qy - d)
219:     design: g_d = alpha*L'*L*(u-ur)
220: */
221: PetscErrorCode FormGradient(Tao tao,Vec X,Vec G,void *ptr)
222: {
223:   AppCtx         *user = (AppCtx*)ptr;

225:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
226:   MatMult(user->MQ,user->y,user->dwork);
227:   VecAXPY(user->dwork,-1.0,user->d);
228:   MatMultTranspose(user->MQ,user->dwork,user->ywork);
229:   VecWAXPY(user->uwork,-1.0,user->ur,user->u);
230:   MatMult(user->L,user->uwork,user->lwork);
231:   MatMultTranspose(user->L,user->lwork,user->uwork);
232:   VecScale(user->uwork, user->alpha);
233:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
234:   return 0;
235: }

237: PetscErrorCode FormFunctionGradient(Tao tao, Vec X, PetscReal *f, Vec G, void *ptr)
238: {
239:   PetscReal      d1,d2;
240:   AppCtx         *user = (AppCtx*)ptr;

242:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
243:   MatMult(user->MQ,user->y,user->dwork);
244:   VecAXPY(user->dwork,-1.0,user->d);
245:   VecDot(user->dwork,user->dwork,&d1);
246:   MatMultTranspose(user->MQ,user->dwork,user->ywork);

248:   VecWAXPY(user->uwork,-1.0,user->ur,user->u);
249:   MatMult(user->L,user->uwork,user->lwork);
250:   VecDot(user->lwork,user->lwork,&d2);
251:   MatMultTranspose(user->L,user->lwork,user->uwork);
252:   VecScale(user->uwork, user->alpha);
253:   *f = 0.5 * (d1 + user->alpha*d2);
254:   Gather(G,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
255:   return 0;
256: }

258: /* ------------------------------------------------------------------- */
259: /* A
260: MatShell object
261: */
262: PetscErrorCode FormJacobianState(Tao tao, Vec X, Mat J, Mat JPre, Mat JInv, void *ptr)
263: {
264:   AppCtx         *user = (AppCtx*)ptr;

266:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
267:   /* DSG = Div * (1/Av_u) * Grad */
268:   VecSet(user->uwork,0);
269:   VecAXPY(user->uwork,-1.0,user->u);
270:   VecExp(user->uwork);
271:   MatMult(user->Av,user->uwork,user->Av_u);
272:   VecCopy(user->Av_u,user->Swork);
273:   VecReciprocal(user->Swork);
274:   if (user->use_ptap) {
275:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);
276:     MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);
277:   } else {
278:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);
279:     MatDiagonalScale(user->Divwork,NULL,user->Swork);
280:     MatProductNumeric(user->DSG);
281:   }
282:   return 0;
283: }
284: /* ------------------------------------------------------------------- */
285: /* B */
286: PetscErrorCode FormJacobianDesign(Tao tao, Vec X, Mat J, void *ptr)
287: {
288:   AppCtx         *user = (AppCtx*)ptr;

290:   Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
291:   return 0;
292: }

294: PetscErrorCode StateBlockMatMult(Mat J_shell, Vec X, Vec Y)
295: {
296:   PetscReal      sum;
297:   AppCtx         *user;

299:   MatShellGetContext(J_shell,&user);
300:   MatMult(user->DSG,X,Y);
301:   VecSum(X,&sum);
302:   sum /= user->ndesign;
303:   VecShift(Y,sum);
304:   return 0;
305: }

307: PetscErrorCode StateMatMult(Mat J_shell, Vec X, Vec Y)
308: {
309:   PetscInt       i;
310:   AppCtx         *user;

312:   MatShellGetContext(J_shell,&user);
313:   if (user->ns == 1) {
314:     MatMult(user->JsBlock,X,Y);
315:   } else {
316:     for (i=0;i<user->ns;i++) {
317:       Scatter(X,user->subq,user->yi_scatter[i],0,0);
318:       Scatter(Y,user->suby,user->yi_scatter[i],0,0);
319:       MatMult(user->JsBlock,user->subq,user->suby);
320:       Gather(Y,user->suby,user->yi_scatter[i],0,0);
321:     }
322:   }
323:   return 0;
324: }

326: PetscErrorCode StateInvMatMult(Mat J_shell, Vec X, Vec Y)
327: {
328:   PetscInt       its,i;
329:   AppCtx         *user;

331:   MatShellGetContext(J_shell,&user);
332:   KSPSetOperators(user->solver,user->JsBlock,user->DSG);
333:   if (Y == user->ytrue) {
334:     /* First solve is done using true solution to set up problem */
335:     KSPSetTolerances(user->solver,1e-8,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
336:   } else {
337:     KSPSetTolerances(user->solver,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT);
338:   }
339:   if (user->ns == 1) {
340:     KSPSolve(user->solver,X,Y);
341:     KSPGetIterationNumber(user->solver,&its);
342:     user->ksp_its+=its;
343:   } else {
344:     for (i=0;i<user->ns;i++) {
345:       Scatter(X,user->subq,user->yi_scatter[i],0,0);
346:       Scatter(Y,user->suby,user->yi_scatter[i],0,0);
347:       KSPSolve(user->solver,user->subq,user->suby);
348:       KSPGetIterationNumber(user->solver,&its);
349:       user->ksp_its+=its;
350:       Gather(Y,user->suby,user->yi_scatter[i],0,0);
351:     }
352:   }
353:   return 0;
354: }
355: PetscErrorCode QMatMult(Mat J_shell, Vec X, Vec Y)
356: {
357:   AppCtx         *user;
358:   PetscInt       i;

360:   MatShellGetContext(J_shell,&user);
361:   if (user->ns == 1) {
362:     MatMult(user->Q,X,Y);
363:   } else {
364:     for (i=0;i<user->ns;i++) {
365:       Scatter(X,user->subq,user->yi_scatter[i],0,0);
366:       Scatter(Y,user->subd,user->di_scatter[i],0,0);
367:       MatMult(user->Q,user->subq,user->subd);
368:       Gather(Y,user->subd,user->di_scatter[i],0,0);
369:     }
370:   }
371:   return 0;
372: }

374: PetscErrorCode QMatMultTranspose(Mat J_shell, Vec X, Vec Y)
375: {
376:   AppCtx         *user;
377:   PetscInt       i;

379:   MatShellGetContext(J_shell,&user);
380:   if (user->ns == 1) {
381:     MatMultTranspose(user->Q,X,Y);
382:   } else {
383:     for (i=0;i<user->ns;i++) {
384:       Scatter(X,user->subd,user->di_scatter[i],0,0);
385:       Scatter(Y,user->suby,user->yi_scatter[i],0,0);
386:       MatMultTranspose(user->Q,user->subd,user->suby);
387:       Gather(Y,user->suby,user->yi_scatter[i],0,0);
388:     }
389:   }
390:   return 0;
391: }

393: PetscErrorCode DesignMatMult(Mat J_shell, Vec X, Vec Y)
394: {
395:   PetscInt       i;
396:   AppCtx         *user;

398:   MatShellGetContext(J_shell,&user);

400:   /* sdiag(1./v) */
401:   VecSet(user->uwork,0);
402:   VecAXPY(user->uwork,-1.0,user->u);
403:   VecExp(user->uwork);

405:   /* sdiag(1./((Av*(1./v)).^2)) */
406:   MatMult(user->Av,user->uwork,user->Swork);
407:   VecPointwiseMult(user->Swork,user->Swork,user->Swork);
408:   VecReciprocal(user->Swork);

410:   /* (Av * (sdiag(1./v) * b)) */
411:   VecPointwiseMult(user->uwork,user->uwork,X);
412:   MatMult(user->Av,user->uwork,user->Twork);

414:   /* (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b))) */
415:   VecPointwiseMult(user->Swork,user->Twork,user->Swork);

417:   if (user->ns == 1) {
418:     /* (sdiag(Grad*y(:,i)) */
419:     MatMult(user->Grad,user->y,user->Twork);

421:     /* Div * (sdiag(Grad*y(:,i)) * (sdiag(1./((Av*(1./v)).^2)) * (Av * (sdiag(1./v) * b)))) */
422:     VecPointwiseMult(user->Swork,user->Twork,user->Swork);
423:     MatMultTranspose(user->Grad,user->Swork,Y);
424:   } else {
425:     for (i=0;i<user->ns;i++) {
426:       Scatter(user->y,user->suby,user->yi_scatter[i],0,0);
427:       Scatter(Y,user->subq,user->yi_scatter[i],0,0);

429:       MatMult(user->Grad,user->suby,user->Twork);
430:       VecPointwiseMult(user->Twork,user->Twork,user->Swork);
431:       MatMultTranspose(user->Grad,user->Twork,user->subq);
432:       Gather(user->y,user->suby,user->yi_scatter[i],0,0);
433:       Gather(Y,user->subq,user->yi_scatter[i],0,0);
434:     }
435:   }
436:   return 0;
437: }

439: PetscErrorCode DesignMatMultTranspose(Mat J_shell, Vec X, Vec Y)
440: {
441:   PetscInt       i;
442:   AppCtx         *user;

444:   MatShellGetContext(J_shell,&user);
445:   VecZeroEntries(Y);

447:   /* Sdiag = 1./((Av*(1./v)).^2) */
448:   VecSet(user->uwork,0);
449:   VecAXPY(user->uwork,-1.0,user->u);
450:   VecExp(user->uwork);
451:   MatMult(user->Av,user->uwork,user->Swork);
452:   VecPointwiseMult(user->Sdiag,user->Swork,user->Swork);
453:   VecReciprocal(user->Sdiag);

455:   for (i=0;i<user->ns;i++) {
456:     Scatter(X,user->subq,user->yi_scatter[i],0,0);
457:     Scatter(user->y,user->suby,user->yi_scatter[i],0,0);

459:     /* Swork = (Div' * b(:,i)) */
460:     MatMult(user->Grad,user->subq,user->Swork);

462:     /* Twork = Grad*y(:,i) */
463:     MatMult(user->Grad,user->suby,user->Twork);

465:     /* Twork = sdiag(Twork) * Swork */
466:     VecPointwiseMult(user->Twork,user->Swork,user->Twork);

468:     /* Swork = pointwisemult(Sdiag,Twork) */
469:     VecPointwiseMult(user->Swork,user->Twork,user->Sdiag);

471:     /* Ywork = Av' * Swork */
472:     MatMultTranspose(user->Av,user->Swork,user->Ywork);

474:     /* Ywork = pointwisemult(uwork,Ywork) */
475:     VecPointwiseMult(user->Ywork,user->uwork,user->Ywork);
476:     VecAXPY(Y,1.0,user->Ywork);
477:     Gather(user->y,user->suby,user->yi_scatter[i],0,0);
478:   }
479:   return 0;
480: }

482: PetscErrorCode FormConstraints(Tao tao, Vec X, Vec C, void *ptr)
483: {
484:    /* C=Ay - q      A = Div * Sigma * Grad + hx*hx*hx*ones(n,n) */
485:    PetscReal      sum;
486:    PetscInt       i;
487:    AppCtx         *user = (AppCtx*)ptr;

489:    Scatter(X,user->y,user->state_scatter,user->u,user->design_scatter);
490:    if (user->ns == 1) {
491:      MatMult(user->Grad,user->y,user->Swork);
492:      VecPointwiseDivide(user->Swork,user->Swork,user->Av_u);
493:      MatMultTranspose(user->Grad,user->Swork,C);
494:      VecSum(user->y,&sum);
495:      sum /= user->ndesign;
496:      VecShift(C,sum);
497:    } else {
498:      for (i=0;i<user->ns;i++) {
499:       Scatter(user->y,user->suby,user->yi_scatter[i],0,0);
500:       Scatter(C,user->subq,user->yi_scatter[i],0,0);
501:       MatMult(user->Grad,user->suby,user->Swork);
502:       VecPointwiseDivide(user->Swork,user->Swork,user->Av_u);
503:       MatMultTranspose(user->Grad,user->Swork,user->subq);

505:       VecSum(user->suby,&sum);
506:       sum /= user->ndesign;
507:       VecShift(user->subq,sum);

509:       Gather(user->y,user->suby,user->yi_scatter[i],0,0);
510:       Gather(C,user->subq,user->yi_scatter[i],0,0);
511:      }
512:    }
513:    VecAXPY(C,-1.0,user->q);
514:    return 0;
515: }

517: PetscErrorCode Scatter(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
518: {
519:   VecScatterBegin(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD);
520:   VecScatterEnd(scat1,x,sub1,INSERT_VALUES,SCATTER_FORWARD);
521:   if (sub2) {
522:     VecScatterBegin(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD);
523:     VecScatterEnd(scat2,x,sub2,INSERT_VALUES,SCATTER_FORWARD);
524:   }
525:   return 0;
526: }

528: PetscErrorCode Gather(Vec x, Vec sub1, VecScatter scat1, Vec sub2, VecScatter scat2)
529: {
530:   VecScatterBegin(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE);
531:   VecScatterEnd(scat1,sub1,x,INSERT_VALUES,SCATTER_REVERSE);
532:   if (sub2) {
533:     VecScatterBegin(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE);
534:     VecScatterEnd(scat2,sub2,x,INSERT_VALUES,SCATTER_REVERSE);
535:   }
536:   return 0;
537: }

539: PetscErrorCode EllipticInitialize(AppCtx *user)
540: {
541:   PetscInt       m,n,i,j,k,l,linear_index,is,js,ks,ls,istart,iend,iblock;
542:   Vec            XX,YY,ZZ,XXwork,YYwork,ZZwork,UTwork;
543:   PetscReal      *x,*y,*z;
544:   PetscReal      h,meanut;
545:   PetscScalar    hinv,neg_hinv,half = 0.5,sqrt_beta;
546:   PetscInt       im,indx1,indx2,indy1,indy2,indz1,indz2,nx,ny,nz;
547:   IS             is_alldesign,is_allstate;
548:   IS             is_from_d;
549:   IS             is_from_y;
550:   PetscInt       lo,hi,hi2,lo2,ysubnlocal,dsubnlocal;
551:   const PetscInt *ranges, *subranges;
552:   PetscMPIInt    size;
553:   PetscReal      xri,yri,zri,xim,yim,zim,dx1,dx2,dy1,dy2,dz1,dz2,Dx,Dy,Dz;
554:   PetscScalar    v,vx,vy,vz;
555:   PetscInt       offset,subindex,subvec,nrank,kk;

557:   PetscScalar xr[64] = {0.4970,     0.8498,     0.7814,     0.6268,     0.7782,     0.6402,     0.3617,     0.3160,
558:                         0.3610,     0.5298,     0.6987,     0.3331,     0.7962,     0.5596,     0.3866,     0.6774,
559:                         0.5407,     0.4518,     0.6702,     0.6061,     0.7580,     0.8997,     0.5198,     0.8326,
560:                         0.2138,     0.9198,     0.3000,     0.2833,     0.8288,     0.7076,     0.1820,     0.0728,
561:                         0.8447,     0.2367,     0.3239,     0.6413,     0.3114,     0.4731,     0.1192,     0.9273,
562:                         0.5724,     0.4331,     0.5136,     0.3547,     0.4413,     0.2602,     0.5698,     0.7278,
563:                         0.5261,     0.6230,     0.2454,     0.3948,     0.7479,     0.6582,     0.4660,     0.5594,
564:                         0.7574,     0.1143,     0.5900,     0.1065,     0.4260,     0.3294,     0.8276,     0.0756};

566:   PetscScalar yr[64] = {0.7345,     0.9120,     0.9288,     0.7528,     0.4463,     0.4985,     0.2497,     0.6256,
567:                         0.3425,     0.9026,     0.6983,     0.4230,     0.7140,     0.2970,     0.4474,     0.8792,
568:                         0.6604,     0.2485,     0.7968,     0.6127,     0.1796,     0.2437,     0.5938,     0.6137,
569:                         0.3867,     0.5658,     0.4575,     0.1009,     0.0863,     0.3361,     0.0738,     0.3985,
570:                         0.6602,     0.1437,     0.0934,     0.5983,     0.5950,     0.0763,     0.0768,     0.2288,
571:                         0.5761,     0.1129,     0.3841,     0.6150,     0.6904,     0.6686,     0.1361,     0.4601,
572:                         0.4491,     0.3716,     0.1969,     0.6537,     0.6743,     0.6991,     0.4811,     0.5480,
573:                         0.1684,     0.4569,     0.6889,     0.8437,     0.3015,     0.2854,     0.8199,     0.2658};

575:   PetscScalar zr[64] = {0.7668,     0.8573,     0.2654,     0.2719,     0.1060,     0.1311,     0.6232,     0.2295,
576:                         0.8009,     0.2147,     0.2119,     0.9325,     0.4473,     0.3600,     0.3374,     0.3819,
577:                         0.4066,     0.5801,     0.1673,     0.0959,     0.4638,     0.8236,     0.8800,     0.2939,
578:                         0.2028,     0.8262,     0.2706,     0.6276,     0.9085,     0.6443,     0.8241,     0.0712,
579:                         0.1824,     0.7789,     0.4389,     0.8415,     0.7055,     0.6639,     0.3653,     0.2078,
580:                         0.1987,     0.2297,     0.4321,     0.8115,     0.4915,     0.7764,     0.4657,     0.4627,
581:                         0.4569,     0.4232,     0.8514,     0.0674,     0.3227,     0.1055,     0.6690,     0.6313,
582:                         0.9226,     0.5461,     0.4126,     0.2364,     0.6096,     0.7042,     0.3914,     0.0711};

584:   MPI_Comm_size(PETSC_COMM_WORLD,&size);
585:   PetscLogStageRegister("Elliptic Setup",&user->stages[0]);
586:   PetscLogStagePush(user->stages[0]);

588:   /* Create u,y,c,x */
589:   VecCreate(PETSC_COMM_WORLD,&user->u);
590:   VecCreate(PETSC_COMM_WORLD,&user->y);
591:   VecCreate(PETSC_COMM_WORLD,&user->c);
592:   VecSetSizes(user->u,PETSC_DECIDE,user->ndesign);
593:   VecSetFromOptions(user->u);
594:   VecGetLocalSize(user->u,&ysubnlocal);
595:   VecSetSizes(user->y,ysubnlocal*user->ns,user->nstate);
596:   VecSetSizes(user->c,ysubnlocal*user->ns,user->m);
597:   VecSetFromOptions(user->y);
598:   VecSetFromOptions(user->c);

600:   /*
601:      *******************************
602:      Create scatters for x <-> y,u
603:      *******************************

605:      If the state vector y and design vector u are partitioned as
606:      [y_1; y_2; ...; y_np] and [u_1; u_2; ...; u_np] (with np = # of processors),
607:      then the solution vector x is organized as
608:      [y_1; u_1; y_2; u_2; ...; y_np; u_np].
609:      The index sets user->s_is and user->d_is correspond to the indices of the
610:      state and design variables owned by the current processor.
611:   */
612:   VecCreate(PETSC_COMM_WORLD,&user->x);

614:   VecGetOwnershipRange(user->y,&lo,&hi);
615:   VecGetOwnershipRange(user->u,&lo2,&hi2);

617:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo,1,&is_allstate);
618:   ISCreateStride(PETSC_COMM_SELF,hi-lo,lo+lo2,1,&user->s_is);
619:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,lo2,1,&is_alldesign);
620:   ISCreateStride(PETSC_COMM_SELF,hi2-lo2,hi+lo2,1,&user->d_is);

622:   VecSetSizes(user->x,hi-lo+hi2-lo2,user->n);
623:   VecSetFromOptions(user->x);

625:   VecScatterCreate(user->x,user->s_is,user->y,is_allstate,&user->state_scatter);
626:   VecScatterCreate(user->x,user->d_is,user->u,is_alldesign,&user->design_scatter);
627:   ISDestroy(&is_alldesign);
628:   ISDestroy(&is_allstate);
629:   /*
630:      *******************************
631:      Create scatter from y to y_1,y_2,...,y_ns
632:      *******************************
633:   */
634:   PetscMalloc1(user->ns,&user->yi_scatter);
635:   VecDuplicate(user->u,&user->suby);
636:   VecDuplicate(user->u,&user->subq);

638:   VecGetOwnershipRange(user->y,&lo2,&hi2);
639:   istart = 0;
640:   for (i=0; i<user->ns; i++) {
641:     VecGetOwnershipRange(user->suby,&lo,&hi);
642:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_y);
643:     VecScatterCreate(user->y,is_from_y,user->suby,NULL,&user->yi_scatter[i]);
644:     istart = istart + hi-lo;
645:     ISDestroy(&is_from_y);
646:   }
647:   /*
648:      *******************************
649:      Create scatter from d to d_1,d_2,...,d_ns
650:      *******************************
651:   */
652:   VecCreate(PETSC_COMM_WORLD,&user->subd);
653:   VecSetSizes(user->subd,PETSC_DECIDE,user->ndata);
654:   VecSetFromOptions(user->subd);
655:   VecCreate(PETSC_COMM_WORLD,&user->d);
656:   VecGetLocalSize(user->subd,&dsubnlocal);
657:   VecSetSizes(user->d,dsubnlocal*user->ns,user->ndata*user->ns);
658:   VecSetFromOptions(user->d);
659:   PetscMalloc1(user->ns,&user->di_scatter);

661:   VecGetOwnershipRange(user->d,&lo2,&hi2);
662:   istart = 0;
663:   for (i=0; i<user->ns; i++) {
664:     VecGetOwnershipRange(user->subd,&lo,&hi);
665:     ISCreateStride(PETSC_COMM_SELF,hi-lo,lo2+istart,1,&is_from_d);
666:     VecScatterCreate(user->d,is_from_d,user->subd,NULL,&user->di_scatter[i]);
667:     istart = istart + hi-lo;
668:     ISDestroy(&is_from_d);
669:   }

671:   PetscMalloc1(user->mx,&x);
672:   PetscMalloc1(user->mx,&y);
673:   PetscMalloc1(user->mx,&z);

675:   user->ksp_its = 0;
676:   user->ksp_its_initial = 0;

678:   n = user->mx * user->mx * user->mx;
679:   m = 3 * user->mx * user->mx * (user->mx-1);
680:   sqrt_beta = PetscSqrtScalar(user->beta);

682:   VecCreate(PETSC_COMM_WORLD,&XX);
683:   VecCreate(PETSC_COMM_WORLD,&user->q);
684:   VecSetSizes(XX,ysubnlocal,n);
685:   VecSetSizes(user->q,ysubnlocal*user->ns,user->m);
686:   VecSetFromOptions(XX);
687:   VecSetFromOptions(user->q);

689:   VecDuplicate(XX,&YY);
690:   VecDuplicate(XX,&ZZ);
691:   VecDuplicate(XX,&XXwork);
692:   VecDuplicate(XX,&YYwork);
693:   VecDuplicate(XX,&ZZwork);
694:   VecDuplicate(XX,&UTwork);
695:   VecDuplicate(XX,&user->utrue);

697:   /* map for striding q */
698:   VecGetOwnershipRanges(user->q,&ranges);
699:   VecGetOwnershipRanges(user->u,&subranges);

701:   VecGetOwnershipRange(user->q,&lo2,&hi2);
702:   VecGetOwnershipRange(user->u,&lo,&hi);
703:   /* Generate 3D grid, and collect ns (1<=ns<=8) right-hand-side vectors into user->q */
704:   h = 1.0/user->mx;
705:   hinv = user->mx;
706:   neg_hinv = -hinv;

708:   VecGetOwnershipRange(XX,&istart,&iend);
709:   for (linear_index=istart; linear_index<iend; linear_index++) {
710:     i = linear_index % user->mx;
711:     j = ((linear_index-i)/user->mx) % user->mx;
712:     k = ((linear_index-i)/user->mx-j) / user->mx;
713:     vx = h*(i+0.5);
714:     vy = h*(j+0.5);
715:     vz = h*(k+0.5);
716:     VecSetValues(XX,1,&linear_index,&vx,INSERT_VALUES);
717:     VecSetValues(YY,1,&linear_index,&vy,INSERT_VALUES);
718:     VecSetValues(ZZ,1,&linear_index,&vz,INSERT_VALUES);
719:     for (is=0; is<2; is++) {
720:       for (js=0; js<2; js++) {
721:         for (ks=0; ks<2; ks++) {
722:           ls = is*4 + js*2 + ks;
723:           if (ls<user->ns) {
724:             l =ls*n + linear_index;
725:             /* remap */
726:             subindex = l%n;
727:             subvec = l/n;
728:             nrank=0;
729:             while (subindex >= subranges[nrank+1]) nrank++;
730:             offset = subindex - subranges[nrank];
731:             istart=0;
732:             for (kk=0;kk<nrank;kk++) istart+=user->ns*(subranges[kk+1]-subranges[kk]);
733:             istart += (subranges[nrank+1]-subranges[nrank])*subvec;
734:             l = istart+offset;
735:             v = 100*PetscSinScalar(2*PETSC_PI*(vx+0.25*is))*PetscSinScalar(2*PETSC_PI*(vy+0.25*js))*PetscSinScalar(2*PETSC_PI*(vz+0.25*ks));
736:             VecSetValues(user->q,1,&l,&v,INSERT_VALUES);
737:           }
738:         }
739:       }
740:     }
741:   }

743:   VecAssemblyBegin(XX);
744:   VecAssemblyEnd(XX);
745:   VecAssemblyBegin(YY);
746:   VecAssemblyEnd(YY);
747:   VecAssemblyBegin(ZZ);
748:   VecAssemblyEnd(ZZ);
749:   VecAssemblyBegin(user->q);
750:   VecAssemblyEnd(user->q);

752:   /* Compute true parameter function
753:      ut = exp(-((x-0.25)^2+(y-0.25)^2+(z-0.25)^2)/0.05) - exp((x-0.75)^2-(y-0.75)^2-(z-0.75))^2/0.05) */
754:   VecCopy(XX,XXwork);
755:   VecCopy(YY,YYwork);
756:   VecCopy(ZZ,ZZwork);

758:   VecShift(XXwork,-0.25);
759:   VecShift(YYwork,-0.25);
760:   VecShift(ZZwork,-0.25);

762:   VecPointwiseMult(XXwork,XXwork,XXwork);
763:   VecPointwiseMult(YYwork,YYwork,YYwork);
764:   VecPointwiseMult(ZZwork,ZZwork,ZZwork);

766:   VecCopy(XXwork,UTwork);
767:   VecAXPY(UTwork,1.0,YYwork);
768:   VecAXPY(UTwork,1.0,ZZwork);
769:   VecScale(UTwork,-20.0);
770:   VecExp(UTwork);
771:   VecCopy(UTwork,user->utrue);

773:   VecCopy(XX,XXwork);
774:   VecCopy(YY,YYwork);
775:   VecCopy(ZZ,ZZwork);

777:   VecShift(XXwork,-0.75);
778:   VecShift(YYwork,-0.75);
779:   VecShift(ZZwork,-0.75);

781:   VecPointwiseMult(XXwork,XXwork,XXwork);
782:   VecPointwiseMult(YYwork,YYwork,YYwork);
783:   VecPointwiseMult(ZZwork,ZZwork,ZZwork);

785:   VecCopy(XXwork,UTwork);
786:   VecAXPY(UTwork,1.0,YYwork);
787:   VecAXPY(UTwork,1.0,ZZwork);
788:   VecScale(UTwork,-20.0);
789:   VecExp(UTwork);

791:   VecAXPY(user->utrue,-1.0,UTwork);

793:   VecDestroy(&XX);
794:   VecDestroy(&YY);
795:   VecDestroy(&ZZ);
796:   VecDestroy(&XXwork);
797:   VecDestroy(&YYwork);
798:   VecDestroy(&ZZwork);
799:   VecDestroy(&UTwork);

801:   /* Initial guess and reference model */
802:   VecDuplicate(user->utrue,&user->ur);
803:   VecSum(user->utrue,&meanut);
804:   meanut = meanut / n;
805:   VecSet(user->ur,meanut);
806:   VecCopy(user->ur,user->u);

808:   /* Generate Grad matrix */
809:   MatCreate(PETSC_COMM_WORLD,&user->Grad);
810:   MatSetSizes(user->Grad,PETSC_DECIDE,ysubnlocal,m,n);
811:   MatSetFromOptions(user->Grad);
812:   MatMPIAIJSetPreallocation(user->Grad,2,NULL,2,NULL);
813:   MatSeqAIJSetPreallocation(user->Grad,2,NULL);
814:   MatGetOwnershipRange(user->Grad,&istart,&iend);

816:   for (i=istart; i<iend; i++) {
817:     if (i<m/3) {
818:       iblock = i / (user->mx-1);
819:       j = iblock*user->mx + (i % (user->mx-1));
820:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
821:       j = j+1;
822:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);
823:     }
824:     if (i>=m/3 && i<2*m/3) {
825:       iblock = (i-m/3) / (user->mx*(user->mx-1));
826:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
827:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
828:       j = j + user->mx;
829:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);
830:     }
831:     if (i>=2*m/3) {
832:       j = i-2*m/3;
833:       MatSetValues(user->Grad,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
834:       j = j + user->mx*user->mx;
835:       MatSetValues(user->Grad,1,&i,1,&j,&hinv,INSERT_VALUES);
836:     }
837:   }

839:   MatAssemblyBegin(user->Grad,MAT_FINAL_ASSEMBLY);
840:   MatAssemblyEnd(user->Grad,MAT_FINAL_ASSEMBLY);

842:   /* Generate arithmetic averaging matrix Av */
843:   MatCreate(PETSC_COMM_WORLD,&user->Av);
844:   MatSetSizes(user->Av,PETSC_DECIDE,ysubnlocal,m,n);
845:   MatSetFromOptions(user->Av);
846:   MatMPIAIJSetPreallocation(user->Av,2,NULL,2,NULL);
847:   MatSeqAIJSetPreallocation(user->Av,2,NULL);
848:   MatGetOwnershipRange(user->Av,&istart,&iend);

850:   for (i=istart; i<iend; i++) {
851:     if (i<m/3) {
852:       iblock = i / (user->mx-1);
853:       j = iblock*user->mx + (i % (user->mx-1));
854:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
855:       j = j+1;
856:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
857:     }
858:     if (i>=m/3 && i<2*m/3) {
859:       iblock = (i-m/3) / (user->mx*(user->mx-1));
860:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
861:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
862:       j = j + user->mx;
863:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
864:     }
865:     if (i>=2*m/3) {
866:       j = i-2*m/3;
867:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
868:       j = j + user->mx*user->mx;
869:       MatSetValues(user->Av,1,&i,1,&j,&half,INSERT_VALUES);
870:     }
871:   }

873:   MatAssemblyBegin(user->Av,MAT_FINAL_ASSEMBLY);
874:   MatAssemblyEnd(user->Av,MAT_FINAL_ASSEMBLY);

876:   MatCreate(PETSC_COMM_WORLD,&user->L);
877:   MatSetSizes(user->L,PETSC_DECIDE,ysubnlocal,m+n,n);
878:   MatSetFromOptions(user->L);
879:   MatMPIAIJSetPreallocation(user->L,2,NULL,2,NULL);
880:   MatSeqAIJSetPreallocation(user->L,2,NULL);
881:   MatGetOwnershipRange(user->L,&istart,&iend);

883:   for (i=istart; i<iend; i++) {
884:     if (i<m/3) {
885:       iblock = i / (user->mx-1);
886:       j = iblock*user->mx + (i % (user->mx-1));
887:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
888:       j = j+1;
889:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);
890:     }
891:     if (i>=m/3 && i<2*m/3) {
892:       iblock = (i-m/3) / (user->mx*(user->mx-1));
893:       j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
894:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
895:       j = j + user->mx;
896:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);
897:     }
898:     if (i>=2*m/3 && i<m) {
899:       j = i-2*m/3;
900:       MatSetValues(user->L,1,&i,1,&j,&neg_hinv,INSERT_VALUES);
901:       j = j + user->mx*user->mx;
902:       MatSetValues(user->L,1,&i,1,&j,&hinv,INSERT_VALUES);
903:     }
904:     if (i>=m) {
905:       j = i - m;
906:       MatSetValues(user->L,1,&i,1,&j,&sqrt_beta,INSERT_VALUES);
907:     }
908:   }
909:   MatAssemblyBegin(user->L,MAT_FINAL_ASSEMBLY);
910:   MatAssemblyEnd(user->L,MAT_FINAL_ASSEMBLY);
911:   MatScale(user->L,PetscPowScalar(h,1.5));

913:   /* Generate Div matrix */
914:   if (!user->use_ptap) {
915:     /* Generate Div matrix */
916:     MatCreate(PETSC_COMM_WORLD,&user->Div);
917:     MatSetSizes(user->Div,ysubnlocal,PETSC_DECIDE,n,m);
918:     MatSetFromOptions(user->Div);
919:     MatMPIAIJSetPreallocation(user->Div,4,NULL,4,NULL);
920:     MatSeqAIJSetPreallocation(user->Div,6,NULL);
921:     MatGetOwnershipRange(user->Grad,&istart,&iend);

923:     for (i=istart; i<iend; i++) {
924:       if (i<m/3) {
925:         iblock = i / (user->mx-1);
926:         j = iblock*user->mx + (i % (user->mx-1));
927:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);
928:         j = j+1;
929:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);
930:       }
931:       if (i>=m/3 && i<2*m/3) {
932:         iblock = (i-m/3) / (user->mx*(user->mx-1));
933:         j = iblock*user->mx*user->mx + ((i-m/3) % (user->mx*(user->mx-1)));
934:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);
935:         j = j + user->mx;
936:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);
937:       }
938:       if (i>=2*m/3) {
939:         j = i-2*m/3;
940:         MatSetValues(user->Div,1,&j,1,&i,&neg_hinv,INSERT_VALUES);
941:         j = j + user->mx*user->mx;
942:         MatSetValues(user->Div,1,&j,1,&i,&hinv,INSERT_VALUES);
943:       }
944:     }

946:     MatAssemblyBegin(user->Div,MAT_FINAL_ASSEMBLY);
947:     MatAssemblyEnd(user->Div,MAT_FINAL_ASSEMBLY);
948:     MatDuplicate(user->Div,MAT_SHARE_NONZERO_PATTERN,&user->Divwork);
949:   } else {
950:     MatCreate(PETSC_COMM_WORLD,&user->Diag);
951:     MatSetSizes(user->Diag,PETSC_DECIDE,PETSC_DECIDE,m,m);
952:     MatSetFromOptions(user->Diag);
953:     MatMPIAIJSetPreallocation(user->Diag,1,NULL,0,NULL);
954:     MatSeqAIJSetPreallocation(user->Diag,1,NULL);
955:   }

957:   /* Build work vectors and matrices */
958:   VecCreate(PETSC_COMM_WORLD,&user->S);
959:   VecSetSizes(user->S, PETSC_DECIDE, m);
960:   VecSetFromOptions(user->S);

962:   VecCreate(PETSC_COMM_WORLD,&user->lwork);
963:   VecSetSizes(user->lwork,PETSC_DECIDE,m+user->mx*user->mx*user->mx);
964:   VecSetFromOptions(user->lwork);

966:   MatDuplicate(user->Av,MAT_SHARE_NONZERO_PATTERN,&user->Avwork);

968:   VecDuplicate(user->S,&user->Swork);
969:   VecDuplicate(user->S,&user->Sdiag);
970:   VecDuplicate(user->S,&user->Av_u);
971:   VecDuplicate(user->S,&user->Twork);
972:   VecDuplicate(user->y,&user->ywork);
973:   VecDuplicate(user->u,&user->Ywork);
974:   VecDuplicate(user->u,&user->uwork);
975:   VecDuplicate(user->u,&user->js_diag);
976:   VecDuplicate(user->c,&user->cwork);
977:   VecDuplicate(user->d,&user->dwork);

979:   /* Create a matrix-free shell user->Jd for computing B*x */
980:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal,user->nstate,user->ndesign,user,&user->Jd);
981:   MatShellSetOperation(user->Jd,MATOP_MULT,(void(*)(void))DesignMatMult);
982:   MatShellSetOperation(user->Jd,MATOP_MULT_TRANSPOSE,(void(*)(void))DesignMatMultTranspose);

984:   /* Compute true state function ytrue given utrue */
985:   VecDuplicate(user->y,&user->ytrue);

987:   /* First compute Av_u = Av*exp(-u) */
988:   VecSet(user->uwork, 0);
989:   VecAXPY(user->uwork,-1.0,user->utrue); /* Note: user->utrue */
990:   VecExp(user->uwork);
991:   MatMult(user->Av,user->uwork,user->Av_u);

993:   /* Next form DSG = Div*S*Grad */
994:   VecCopy(user->Av_u,user->Swork);
995:   VecReciprocal(user->Swork);
996:   if (user->use_ptap) {
997:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);
998:     MatPtAP(user->Diag,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG);
999:   } else {
1000:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);
1001:     MatDiagonalScale(user->Divwork,NULL,user->Swork);

1003:     MatMatMult(user->Divwork,user->Grad,MAT_INITIAL_MATRIX,1.0,&user->DSG);
1004:   }

1006:   MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE);
1007:   MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);

1009:   if (user->use_lrc == PETSC_TRUE) {
1010:     v=PetscSqrtReal(1.0 /user->ndesign);
1011:     PetscMalloc1(user->ndesign,&user->ones);

1013:     for (i=0;i<user->ndesign;i++) {
1014:       user->ones[i]=v;
1015:     }
1016:     MatCreateDense(PETSC_COMM_WORLD,ysubnlocal,PETSC_DECIDE,user->ndesign,1,user->ones,&user->Ones);
1017:     MatAssemblyBegin(user->Ones, MAT_FINAL_ASSEMBLY);
1018:     MatAssemblyEnd(user->Ones, MAT_FINAL_ASSEMBLY);
1019:     MatCreateLRC(user->DSG,user->Ones,NULL,user->Ones,&user->JsBlock);
1020:     MatSetUp(user->JsBlock);
1021:   } else {
1022:     /* Create matrix-free shell user->Js for computing (A + h^3*e*e^T)*x */
1023:     MatCreateShell(PETSC_COMM_WORLD,ysubnlocal,ysubnlocal,user->ndesign,user->ndesign,user,&user->JsBlock);
1024:     MatShellSetOperation(user->JsBlock,MATOP_MULT,(void(*)(void))StateBlockMatMult);
1025:     MatShellSetOperation(user->JsBlock,MATOP_MULT_TRANSPOSE,(void(*)(void))StateBlockMatMult);
1026:   }
1027:   MatSetOption(user->JsBlock,MAT_SYMMETRIC,PETSC_TRUE);
1028:   MatSetOption(user->JsBlock,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);
1029:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->Js);
1030:   MatShellSetOperation(user->Js,MATOP_MULT,(void(*)(void))StateMatMult);
1031:   MatShellSetOperation(user->Js,MATOP_MULT_TRANSPOSE,(void(*)(void))StateMatMult);
1032:   MatSetOption(user->Js,MAT_SYMMETRIC,PETSC_TRUE);
1033:   MatSetOption(user->Js,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);

1035:   MatCreateShell(PETSC_COMM_WORLD,ysubnlocal*user->ns,ysubnlocal*user->ns,user->nstate,user->nstate,user,&user->JsInv);
1036:   MatShellSetOperation(user->JsInv,MATOP_MULT,(void(*)(void))StateInvMatMult);
1037:   MatShellSetOperation(user->JsInv,MATOP_MULT_TRANSPOSE,(void(*)(void))StateInvMatMult);
1038:   MatSetOption(user->JsInv,MAT_SYMMETRIC,PETSC_TRUE);
1039:   MatSetOption(user->JsInv,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);

1041:   MatSetOption(user->DSG,MAT_SYMMETRIC,PETSC_TRUE);
1042:   MatSetOption(user->DSG,MAT_SYMMETRY_ETERNAL,PETSC_TRUE);
1043:   /* Now solve for ytrue */
1044:   KSPCreate(PETSC_COMM_WORLD,&user->solver);
1045:   KSPSetFromOptions(user->solver);

1047:   KSPSetOperators(user->solver,user->JsBlock,user->DSG);

1049:   MatMult(user->JsInv,user->q,user->ytrue);
1050:   /* First compute Av_u = Av*exp(-u) */
1051:   VecSet(user->uwork,0);
1052:   VecAXPY(user->uwork,-1.0,user->u); /* Note: user->u */
1053:   VecExp(user->uwork);
1054:   MatMult(user->Av,user->uwork,user->Av_u);

1056:   /* Next update DSG = Div*S*Grad  with user->u */
1057:   VecCopy(user->Av_u,user->Swork);
1058:   VecReciprocal(user->Swork);
1059:   if (user->use_ptap) {
1060:     MatDiagonalSet(user->Diag,user->Swork,INSERT_VALUES);
1061:     MatPtAP(user->Diag,user->Grad,MAT_REUSE_MATRIX,1.0,&user->DSG);
1062:   } else {
1063:     MatCopy(user->Div,user->Divwork,SAME_NONZERO_PATTERN);
1064:     MatDiagonalScale(user->Divwork,NULL,user->Av_u);
1065:     MatProductNumeric(user->DSG);
1066:   }

1068:   /* Now solve for y */

1070:   MatMult(user->JsInv,user->q,user->y);

1072:   user->ksp_its_initial = user->ksp_its;
1073:   user->ksp_its = 0;
1074:   /* Construct projection matrix Q (blocks) */
1075:   MatCreate(PETSC_COMM_WORLD,&user->Q);
1076:   MatSetSizes(user->Q,dsubnlocal,ysubnlocal,user->ndata,user->ndesign);
1077:   MatSetFromOptions(user->Q);
1078:   MatMPIAIJSetPreallocation(user->Q,8,NULL,8,NULL);
1079:   MatSeqAIJSetPreallocation(user->Q,8,NULL);

1081:   for (i=0; i<user->mx; i++) {
1082:     x[i] = h*(i+0.5);
1083:     y[i] = h*(i+0.5);
1084:     z[i] = h*(i+0.5);
1085:   }
1086:   MatGetOwnershipRange(user->Q,&istart,&iend);

1088:   nx = user->mx; ny = user->mx; nz = user->mx;
1089:   for (i=istart; i<iend; i++) {

1091:     xri = xr[i];
1092:     im = 0;
1093:     xim = x[im];
1094:     while (xri>xim && im<nx) {
1095:       im = im+1;
1096:       xim = x[im];
1097:     }
1098:     indx1 = im-1;
1099:     indx2 = im;
1100:     dx1 = xri - x[indx1];
1101:     dx2 = x[indx2] - xri;

1103:     yri = yr[i];
1104:     im = 0;
1105:     yim = y[im];
1106:     while (yri>yim && im<ny) {
1107:       im = im+1;
1108:       yim = y[im];
1109:     }
1110:     indy1 = im-1;
1111:     indy2 = im;
1112:     dy1 = yri - y[indy1];
1113:     dy2 = y[indy2] - yri;

1115:     zri = zr[i];
1116:     im = 0;
1117:     zim = z[im];
1118:     while (zri>zim && im<nz) {
1119:       im = im+1;
1120:       zim = z[im];
1121:     }
1122:     indz1 = im-1;
1123:     indz2 = im;
1124:     dz1 = zri - z[indz1];
1125:     dz2 = z[indz2] - zri;

1127:     Dx = x[indx2] - x[indx1];
1128:     Dy = y[indy2] - y[indy1];
1129:     Dz = z[indz2] - z[indz1];

1131:     j = indx1 + indy1*nx + indz1*nx*ny;
1132:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1133:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1135:     j = indx1 + indy1*nx + indz2*nx*ny;
1136:     v = (1-dx1/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1137:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1139:     j = indx1 + indy2*nx + indz1*nx*ny;
1140:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1141:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1143:     j = indx1 + indy2*nx + indz2*nx*ny;
1144:     v = (1-dx1/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1145:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1147:     j = indx2 + indy1*nx + indz1*nx*ny;
1148:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz1/Dz);
1149:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1151:     j = indx2 + indy1*nx + indz2*nx*ny;
1152:     v = (1-dx2/Dx)*(1-dy1/Dy)*(1-dz2/Dz);
1153:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1155:     j = indx2 + indy2*nx + indz1*nx*ny;
1156:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz1/Dz);
1157:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);

1159:     j = indx2 + indy2*nx + indz2*nx*ny;
1160:     v = (1-dx2/Dx)*(1-dy2/Dy)*(1-dz2/Dz);
1161:     MatSetValues(user->Q,1,&i,1,&j,&v,INSERT_VALUES);
1162:   }

1164:   MatAssemblyBegin(user->Q,MAT_FINAL_ASSEMBLY);
1165:   MatAssemblyEnd(user->Q,MAT_FINAL_ASSEMBLY);
1166:   /* Create MQ (composed of blocks of Q */
1167:   MatCreateShell(PETSC_COMM_WORLD,dsubnlocal*user->ns,PETSC_DECIDE,user->ndata*user->ns,user->nstate,user,&user->MQ);
1168:   MatShellSetOperation(user->MQ,MATOP_MULT,(void(*)(void))QMatMult);
1169:   MatShellSetOperation(user->MQ,MATOP_MULT_TRANSPOSE,(void(*)(void))QMatMultTranspose);

1171:   /* Add noise to the measurement data */
1172:   VecSet(user->ywork,1.0);
1173:   VecAYPX(user->ywork,user->noise,user->ytrue);
1174:   MatMult(user->MQ,user->ywork,user->d);

1176:   /* Now that initial conditions have been set, let the user pass tolerance options to the KSP solver */
1177:   PetscFree(x);
1178:   PetscFree(y);
1179:   PetscFree(z);
1180:   PetscLogStagePop();
1181:   return 0;
1182: }

1184: PetscErrorCode EllipticDestroy(AppCtx *user)
1185: {
1186:   PetscInt       i;

1188:   MatDestroy(&user->DSG);
1189:   KSPDestroy(&user->solver);
1190:   MatDestroy(&user->Q);
1191:   MatDestroy(&user->MQ);
1192:   if (!user->use_ptap) {
1193:     MatDestroy(&user->Div);
1194:     MatDestroy(&user->Divwork);
1195:   } else {
1196:     MatDestroy(&user->Diag);
1197:   }
1198:   if (user->use_lrc) {
1199:     MatDestroy(&user->Ones);
1200:   }

1202:   MatDestroy(&user->Grad);
1203:   MatDestroy(&user->Av);
1204:   MatDestroy(&user->Avwork);
1205:   MatDestroy(&user->L);
1206:   MatDestroy(&user->Js);
1207:   MatDestroy(&user->Jd);
1208:   MatDestroy(&user->JsBlock);
1209:   MatDestroy(&user->JsInv);

1211:   VecDestroy(&user->x);
1212:   VecDestroy(&user->u);
1213:   VecDestroy(&user->uwork);
1214:   VecDestroy(&user->utrue);
1215:   VecDestroy(&user->y);
1216:   VecDestroy(&user->ywork);
1217:   VecDestroy(&user->ytrue);
1218:   VecDestroy(&user->c);
1219:   VecDestroy(&user->cwork);
1220:   VecDestroy(&user->ur);
1221:   VecDestroy(&user->q);
1222:   VecDestroy(&user->d);
1223:   VecDestroy(&user->dwork);
1224:   VecDestroy(&user->lwork);
1225:   VecDestroy(&user->S);
1226:   VecDestroy(&user->Swork);
1227:   VecDestroy(&user->Sdiag);
1228:   VecDestroy(&user->Ywork);
1229:   VecDestroy(&user->Twork);
1230:   VecDestroy(&user->Av_u);
1231:   VecDestroy(&user->js_diag);
1232:   ISDestroy(&user->s_is);
1233:   ISDestroy(&user->d_is);
1234:   VecDestroy(&user->suby);
1235:   VecDestroy(&user->subd);
1236:   VecDestroy(&user->subq);
1237:   VecScatterDestroy(&user->state_scatter);
1238:   VecScatterDestroy(&user->design_scatter);
1239:   for (i=0;i<user->ns;i++) {
1240:     VecScatterDestroy(&user->yi_scatter[i]);
1241:     VecScatterDestroy(&user->di_scatter[i]);
1242:   }
1243:   PetscFree(user->yi_scatter);
1244:   PetscFree(user->di_scatter);
1245:   if (user->use_lrc) {
1246:     PetscFree(user->ones);
1247:     MatDestroy(&user->Ones);
1248:   }
1249:   return 0;
1250: }

1252: PetscErrorCode EllipticMonitor(Tao tao, void *ptr)
1253: {
1254:   Vec            X;
1255:   PetscReal      unorm,ynorm;
1256:   AppCtx         *user = (AppCtx*)ptr;

1258:   TaoGetSolution(tao,&X);
1259:   Scatter(X,user->ywork,user->state_scatter,user->uwork,user->design_scatter);
1260:   VecAXPY(user->ywork,-1.0,user->ytrue);
1261:   VecAXPY(user->uwork,-1.0,user->utrue);
1262:   VecNorm(user->uwork,NORM_2,&unorm);
1263:   VecNorm(user->ywork,NORM_2,&ynorm);
1264:   PetscPrintf(MPI_COMM_WORLD, "||u-ut||=%g ||y-yt||=%g\n",(double)unorm,(double)ynorm);
1265:   return 0;
1266: }

1268: /*TEST

1270:    build:
1271:       requires: !complex

1273:    test:
1274:       args: -tao_cmonitor -ns 1 -tao_type lcl -tao_gatol 1.e-3 -tao_max_it 11
1275:       requires: !single

1277:    test:
1278:       suffix: 2
1279:       args: -tao_cmonitor -tao_type lcl -tao_max_it 11 -use_ptap -use_lrc -ns 1 -tao_gatol 1.e-3
1280:       requires: !single

1282: TEST*/