Actual source code: flow.c
2: static char help[] = "FUN3D - 3-D, Unstructured Incompressible Euler Solver.\n\
3: originally written by W. K. Anderson of NASA Langley, \n\
4: and ported into PETSc by D. K. Kaushik, ODU and ICASE.\n\n";
6: #include <petscsnes.h>
7: #include <petsctime.h>
8: #include <petscao.h>
9: #include "user.h"
10: #if defined(_OPENMP)
11: #include "omp.h"
12: #if !defined(HAVE_REDUNDANT_WORK)
13: #include "metis.h"
14: #endif
15: #endif
17: #define ICALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
18: #define FCALLOC(size,y) PetscMalloc1(PetscMax(size,1),y);
20: typedef struct {
21: Vec qnew,qold,func;
22: double fnorm_ini,dt_ini,cfl_ini;
23: double ptime;
24: double cfl_max,max_time;
25: double fnorm,dt,cfl;
26: double fnorm_ratio;
27: int ires,iramp,itstep;
28: int max_steps,print_freq;
29: int LocalTimeStepping;
30: } TstepCtx;
32: typedef struct { /*============================*/
33: GRID *grid; /* Pointer to Grid info */
34: TstepCtx *tsCtx; /* Pointer to Time Stepping Context */
35: PetscBool PreLoading;
36: } AppCtx; /*============================*/
38: extern int FormJacobian(SNES,Vec,Mat,Mat,void*),
39: FormFunction(SNES,Vec,Vec,void*),
40: FormInitialGuess(SNES,GRID*),
41: Update(SNES,void*),
42: ComputeTimeStep(SNES,int,void*),
43: GetLocalOrdering(GRID*),
44: SetPetscDS(GRID *,TstepCtx*);
45: static PetscErrorCode WritePVTU(AppCtx*,const char*,PetscBool);
46: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
47: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncolor,int *ncount);
48: #endif
49: /* Global Variables */
51: /*============================*/
52: CINFO *c_info; /* Pointer to COMMON INFO */
53: CRUNGE *c_runge; /* Pointer to COMMON RUNGE */
54: CGMCOM *c_gmcom; /* Pointer to COMMON GMCOM */
55: /*============================*/
56: int rank,size,rstart;
57: REAL memSize = 0.0,grad_time = 0.0;
58: #if defined(_OPENMP)
59: int max_threads = 2,tot_threads,my_thread_id;
60: #endif
62: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
63: int event0,event1;
64: Scalar time_counters;
65: long long counter0,counter1;
66: #endif
67: int ntran[max_nbtran]; /* transition stuff put here to make global */
68: REAL dxtran[max_nbtran];
70: /* ======================== MAIN ROUTINE =================================== */
71: /* */
72: /* Finite volume flux split solver for general polygons */
73: /* */
74: /*===========================================================================*/
76: int main(int argc,char **args)
77: {
78: AppCtx user;
79: GRID f_pntr;
80: TstepCtx tsCtx;
81: SNES snes; /* nonlinear solver context */
82: Mat Jpc; /* Jacobian and Preconditioner matrices */
83: PetscScalar *qnode;
84: int ierr;
85: PetscBool flg,write_pvtu,pvtu_base64;
86: MPI_Comm comm;
87: PetscInt maxfails = 10000;
88: char pvtu_fname[PETSC_MAX_PATH_LEN] = "incomp";
90: PetscInitialize(&argc,&args,NULL,help);
91: PetscInitializeFortran();
92: PetscOptionsInsertFile(PETSC_COMM_WORLD,"petsc.opt",PETSC_FALSE);
94: comm = PETSC_COMM_WORLD;
95: f77FORLINK(); /* Link FORTRAN and C COMMONS */
97: MPI_Comm_rank(comm,&rank);
98: MPI_Comm_size(comm,&size);
100: flg = PETSC_FALSE;
101: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
102: if (flg) PetscMemorySetGetMaximumUsage();
104: /*======================================================================*/
105: /* Initilize stuff related to time stepping */
106: /*======================================================================*/
107: tsCtx.fnorm_ini = 0.0; tsCtx.cfl_ini = 50.0; tsCtx.cfl_max = 1.0e+05;
108: tsCtx.max_steps = 50; tsCtx.max_time = 1.0e+12; tsCtx.iramp = -50;
109: tsCtx.dt = -5.0; tsCtx.fnorm_ratio = 1.0e+10;
110: tsCtx.LocalTimeStepping = 1;
111: PetscOptionsGetInt(NULL,NULL,"-max_st",&tsCtx.max_steps,NULL);
112: PetscOptionsGetReal(NULL,"-ts_rtol",&tsCtx.fnorm_ratio,NULL);
113: PetscOptionsGetReal(NULL,"-cfl_ini",&tsCtx.cfl_ini,NULL);
114: PetscOptionsGetReal(NULL,"-cfl_max",&tsCtx.cfl_max,NULL);
115: tsCtx.print_freq = tsCtx.max_steps;
116: PetscOptionsGetInt(NULL,NULL,"-print_freq",&tsCtx.print_freq,&flg);
117: PetscOptionsGetString(NULL,NULL,"-pvtu",pvtu_fname,sizeof(pvtu_fname),&write_pvtu);
118: pvtu_base64 = PETSC_FALSE;
119: PetscOptionsGetBool(NULL,NULL,"-pvtu_base64",&pvtu_base64,NULL);
121: c_info->alpha = 3.0;
122: c_info->beta = 15.0;
123: c_info->ivisc = 0;
125: c_gmcom->ilu0 = 1;
126: c_gmcom->nsrch = 10;
128: c_runge->nitfo = 0;
130: PetscMemzero(&f_pntr,sizeof(f_pntr));
131: f_pntr.jvisc = c_info->ivisc;
132: f_pntr.ileast = 4;
133: PetscOptionsGetReal(NULL,"-alpha",&c_info->alpha,NULL);
134: PetscOptionsGetReal(NULL,"-beta",&c_info->beta,NULL);
136: /*======================================================================*/
138: /*Set the maximum number of threads for OpenMP */
139: #if defined(_OPENMP)
140: PetscOptionsGetInt(NULL,NULL,"-max_threads",&max_threads,&flg);
141: omp_set_num_threads(max_threads);
142: PetscPrintf(comm,"Using %d threads for each MPI process\n",max_threads);
143: #endif
145: /* Get the grid information into local ordering */
146: GetLocalOrdering(&f_pntr);
148: /* Allocate Memory for Some Other Grid Arrays */
149: set_up_grid(&f_pntr);
151: /* If using least squares for the gradients,calculate the r's */
152: if (f_pntr.ileast == 4) f77SUMGS(&f_pntr.nnodesLoc,&f_pntr.nedgeLoc,f_pntr.eptr,f_pntr.xyz,f_pntr.rxy,&rank,&f_pntr.nvertices);
154: user.grid = &f_pntr;
155: user.tsCtx = &tsCtx;
157: /* SAWs Stuff */
159: /*
160: Preload the executable to get accurate timings. This runs the following chunk of
161: code twice, first to get the executable pages into memory and the second time for
162: accurate timings.
163: */
164: PetscPreLoadBegin(PETSC_TRUE,"Time integration");
165: user.PreLoading = PetscPreLoading;
167: /* Create nonlinear solver */
168: SetPetscDS(&f_pntr,&tsCtx);
169: SNESCreate(comm,&snes);
170: SNESSetType(snes,"newtonls");
172: /* Set various routines and options */
173: SNESSetFunction(snes,user.grid->res,FormFunction,&user);
174: flg = PETSC_FALSE;
175: PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
176: if (flg) {
177: /* Use matrix-free to define Newton system; use explicit (approx) Jacobian for preconditioner */
178: MatCreateSNESMF(snes,&Jpc);
179: SNESSetJacobian(snes,Jpc,user.grid->A,FormJacobian,&user);
180: } else {
181: /* Use explicit (approx) Jacobian to define Newton system and preconditioner */
182: SNESSetJacobian(snes,user.grid->A,user.grid->A,FormJacobian,&user);
183: }
185: SNESSetMaxLinearSolveFailures(snes,maxfails);
186: SNESSetFromOptions(snes);
188: /* Initialize the flowfield */
189: FormInitialGuess(snes,user.grid);
191: /* Solve nonlinear system */
192: Update(snes,&user);
194: /* Write restart file */
195: VecGetArray(user.grid->qnode,&qnode);
196: /*f77WREST(&user.grid->nnodes,qnode,user.grid->turbre,user.grid->amut);*/
198: /* Write Tecplot solution file */
199: #if 0
200: if (rank == 0)
201: f77TECFLO(&user.grid->nnodes,
202: &user.grid->nnbound,&user.grid->nvbound,&user.grid->nfbound,
203: &user.grid->nnfacet,&user.grid->nvfacet,&user.grid->nffacet,
204: &user.grid->nsnode, &user.grid->nvnode, &user.grid->nfnode,
205: c_info->title,
206: user.grid->x, user.grid->y, user.grid->z,
207: qnode,
208: user.grid->nnpts, user.grid->nntet, user.grid->nvpts,
209: user.grid->nvtet, user.grid->nfpts, user.grid->nftet,
210: user.grid->f2ntn, user.grid->f2ntv, user.grid->f2ntf,
211: user.grid->isnode, user.grid->ivnode, user.grid->ifnode,
212: &rank);
213: #endif
214: if (write_pvtu) WritePVTU(&user,pvtu_fname,pvtu_base64);
216: /* Write residual,lift,drag,and moment history file */
217: /*
218: if (rank == 0) f77PLLAN(&user.grid->nnodes,&rank);
219: */
221: VecRestoreArray(user.grid->qnode,&qnode);
222: flg = PETSC_FALSE;
223: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
224: if (flg) {
225: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage before destroying\n");
226: }
228: VecDestroy(&user.grid->qnode);
229: VecDestroy(&user.grid->qnodeLoc);
230: VecDestroy(&user.tsCtx->qold);
231: VecDestroy(&user.tsCtx->func);
232: VecDestroy(&user.grid->res);
233: VecDestroy(&user.grid->grad);
234: VecDestroy(&user.grid->gradLoc);
235: MatDestroy(&user.grid->A);
236: flg = PETSC_FALSE;
237: PetscOptionsGetBool(NULL,NULL,"-matrix_free",&flg,NULL);
238: if (flg) MatDestroy(&Jpc);
239: SNESDestroy(&snes);
240: VecScatterDestroy(&user.grid->scatter);
241: VecScatterDestroy(&user.grid->gradScatter);
242: flg = PETSC_FALSE;
243: PetscOptionsGetBool(NULL,NULL,"-mem_use",&flg,NULL);
244: if (flg) {
245: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after destroying\n");
246: }
247: PetscPreLoadEnd();
249: /* allocated in set_up_grid() */
250: PetscFree(user.grid->isface);
251: PetscFree(user.grid->ivface);
252: PetscFree(user.grid->ifface);
253: PetscFree(user.grid->us);
254: PetscFree(user.grid->vs);
255: PetscFree(user.grid->as);
257: /* Allocated in GetLocalOrdering() */
258: PetscFree(user.grid->eptr);
259: PetscFree(user.grid->ia);
260: PetscFree(user.grid->ja);
261: PetscFree(user.grid->loc2glo);
262: PetscFree(user.grid->loc2pet);
263: PetscFree(user.grid->xyzn);
264: #if defined(_OPENMP)
265: # if defined(HAVE_REDUNDANT_WORK)
266: PetscFree(user.grid->resd);
267: # else
268: PetscFree(user.grid->part_thr);
269: PetscFree(user.grid->nedge_thr);
270: PetscFree(user.grid->edge_thr);
271: PetscFree(user.grid->xyzn_thr);
272: # endif
273: #endif
274: PetscFree(user.grid->xyz);
275: PetscFree(user.grid->area);
277: PetscFree(user.grid->nntet);
278: PetscFree(user.grid->nnpts);
279: PetscFree(user.grid->f2ntn);
280: PetscFree(user.grid->isnode);
281: PetscFree(user.grid->sxn);
282: PetscFree(user.grid->syn);
283: PetscFree(user.grid->szn);
284: PetscFree(user.grid->sa);
285: PetscFree(user.grid->sface_bit);
287: PetscFree(user.grid->nvtet);
288: PetscFree(user.grid->nvpts);
289: PetscFree(user.grid->f2ntv);
290: PetscFree(user.grid->ivnode);
291: PetscFree(user.grid->vxn);
292: PetscFree(user.grid->vyn);
293: PetscFree(user.grid->vzn);
294: PetscFree(user.grid->va);
295: PetscFree(user.grid->vface_bit);
297: PetscFree(user.grid->nftet);
298: PetscFree(user.grid->nfpts);
299: PetscFree(user.grid->f2ntf);
300: PetscFree(user.grid->ifnode);
301: PetscFree(user.grid->fxn);
302: PetscFree(user.grid->fyn);
303: PetscFree(user.grid->fzn);
304: PetscFree(user.grid->fa);
305: PetscFree(user.grid->cdt);
306: PetscFree(user.grid->phi);
307: PetscFree(user.grid->rxy);
309: PetscPrintf(comm,"Time taken in gradient calculation %g sec.\n",grad_time);
311: PetscFinalize();
312: return 0;
313: }
315: /*---------------------------------------------------------------------*/
316: /* --------------------- Form initial approximation ----------------- */
317: int FormInitialGuess(SNES snes,GRID *grid)
318: /*---------------------------------------------------------------------*/
319: {
320: int ierr;
321: PetscScalar *qnode;
323: VecGetArray(grid->qnode,&qnode);
324: f77INIT(&grid->nnodesLoc,qnode,grid->turbre,grid->amut,&grid->nvnodeLoc,grid->ivnode,&rank);
325: VecRestoreArray(grid->qnode,&qnode);
326: return 0;
327: }
329: /*---------------------------------------------------------------------*/
330: /* --------------------- Evaluate Function F(x) --------------------- */
331: int FormFunction(SNES snes,Vec x,Vec f,void *dummy)
332: /*---------------------------------------------------------------------*/
333: {
334: AppCtx *user = (AppCtx*) dummy;
335: GRID *grid = user->grid;
336: TstepCtx *tsCtx = user->tsCtx;
337: PetscScalar *qnode,*res,*qold;
338: PetscScalar *grad;
339: PetscScalar temp;
340: VecScatter scatter = grid->scatter;
341: VecScatter gradScatter = grid->gradScatter;
342: Vec localX = grid->qnodeLoc;
343: Vec localGrad = grid->gradLoc;
344: int i,j,in,ierr;
345: int nbface,ires;
346: PetscScalar time_ini,time_fin;
348: /* Get X into the local work vector */
349: VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
350: VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
351: /* VecCopy(x,localX); */
352: /* access the local work f,grad,and input */
353: VecGetArray(f,&res);
354: VecGetArray(grid->grad,&grad);
355: VecGetArray(localX,&qnode);
356: ires = tsCtx->ires;
358: PetscTime(&time_ini);
359: f77LSTGS(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,qnode,grad,grid->xyz,grid->rxy,
360: &rank,&grid->nvertices);
361: PetscTime(&time_fin);
362: grad_time += time_fin - time_ini;
363: VecRestoreArray(grid->grad,&grad);
365: VecScatterBegin(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
366: VecScatterEnd(gradScatter,grid->grad,localGrad,INSERT_VALUES,SCATTER_FORWARD);
367: /*VecCopy(grid->grad,localGrad);*/
369: VecGetArray(localGrad,&grad);
370: nbface = grid->nsface + grid->nvface + grid->nfface;
371: f77GETRES(&grid->nnodesLoc,&grid->ncell, &grid->nedgeLoc, &grid->nsface,
372: &grid->nvface,&grid->nfface, &nbface,
373: &grid->nsnodeLoc,&grid->nvnodeLoc, &grid->nfnodeLoc,
374: grid->isface, grid->ivface, grid->ifface, &grid->ileast,
375: grid->isnode, grid->ivnode, grid->ifnode,
376: &grid->nnfacetLoc,grid->f2ntn, &grid->nnbound,
377: &grid->nvfacetLoc,grid->f2ntv, &grid->nvbound,
378: &grid->nffacetLoc,grid->f2ntf, &grid->nfbound,
379: grid->eptr,
380: grid->sxn, grid->syn, grid->szn,
381: grid->vxn, grid->vyn, grid->vzn,
382: grid->fxn, grid->fyn, grid->fzn,
383: grid->xyzn,
384: qnode, grid->cdt,
385: grid->xyz, grid->area,
386: grad, res,
387: grid->turbre,
388: grid->slen, grid->c2n,
389: grid->c2e,
390: grid->us, grid->vs, grid->as,
391: grid->phi,
392: grid->amut, &ires,
393: #if defined(_OPENMP)
394: &max_threads,
395: #if defined(HAVE_EDGE_COLORING)
396: &grid->ncolor, grid->ncount,
397: #elif defined(HAVE_REDUNDANT_WORK)
398: grid->resd,
399: #else
400: &grid->nedgeAllThr,
401: grid->part_thr,grid->nedge_thr,grid->edge_thr,grid->xyzn_thr,
402: #endif
403: #endif
404: &tsCtx->LocalTimeStepping,&rank,&grid->nvertices);
406: /* Add the contribution due to time stepping */
407: if (ires == 1) {
408: VecGetArray(tsCtx->qold,&qold);
409: #if defined(INTERLACING)
410: for (i = 0; i < grid->nnodesLoc; i++) {
411: temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
412: for (j = 0; j < 4; j++) {
413: in = 4*i + j;
414: res[in] += temp*(qnode[in] - qold[in]);
415: }
416: }
417: #else
418: for (j = 0; j < 4; j++) {
419: for (i = 0; i < grid->nnodesLoc; i++) {
420: temp = grid->area[i]/(tsCtx->cfl*grid->cdt[i]);
421: in = grid->nnodesLoc*j + i;
422: res[in] += temp*(qnode[in] - qold[in]);
423: }
424: }
425: #endif
426: VecRestoreArray(tsCtx->qold,&qold);
427: }
428: VecRestoreArray(localX,&qnode);
429: VecRestoreArray(f,&res);
430: VecRestoreArray(localGrad,&grad);
431: return 0;
432: }
434: /*---------------------------------------------------------------------*/
435: /* -------------------- Evaluate Jacobian F'(x) -------------------- */
437: int FormJacobian(SNES snes,Vec x,Mat Jac,Mat pc_mat,void *dummy)
438: /*---------------------------------------------------------------------*/
439: {
440: AppCtx *user = (AppCtx*) dummy;
441: GRID *grid = user->grid;
442: TstepCtx *tsCtx = user->tsCtx;
443: Vec localX = grid->qnodeLoc;
444: PetscScalar *qnode;
445: int ierr;
447: /* VecScatterBegin(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD);
448: VecScatterEnd(scatter,x,localX,INSERT_VALUES,SCATTER_FORWARD); */
449: /* VecCopy(x,localX); */
450: MatSetUnfactored(pc_mat);
452: VecGetArray(localX,&qnode);
453: f77FILLA(&grid->nnodesLoc,&grid->nedgeLoc,grid->eptr,
454: &grid->nsface,
455: grid->isface,grid->fxn,grid->fyn,grid->fzn,
456: grid->sxn,grid->syn,grid->szn,
457: &grid->nsnodeLoc,&grid->nvnodeLoc,&grid->nfnodeLoc,grid->isnode,
458: grid->ivnode,grid->ifnode,qnode,&pc_mat,grid->cdt,
459: grid->area,grid->xyzn,&tsCtx->cfl,
460: &rank,&grid->nvertices);
461: VecRestoreArray(localX,&qnode);
462: MatAssemblyBegin(Jac,MAT_FINAL_ASSEMBLY);
463: MatAssemblyEnd(Jac,MAT_FINAL_ASSEMBLY);
464: #if defined(MATRIX_VIEW)
465: if ((tsCtx->itstep != 0) &&(tsCtx->itstep % tsCtx->print_freq) == 0) {
466: PetscViewer viewer;
467: char mat_file[PETSC_MAX_PATH_LEN];
468: sprintf(mat_file,"mat_bin.%d",tsCtx->itstep);
469: PetscViewerBinaryOpen(MPI_COMM_WORLD,mat_file,FILE_MODE_WRITE,&viewer);
470: MatView(pc_mat,viewer);
471: PetscViewerDestroy(&viewer);
472: }
473: #endif
474: return 0;
475: }
477: /*---------------------------------------------------------------------*/
478: int Update(SNES snes,void *ctx)
479: /*---------------------------------------------------------------------*/
480: {
481: AppCtx *user = (AppCtx*) ctx;
482: GRID *grid = user->grid;
483: TstepCtx *tsCtx = user->tsCtx;
484: VecScatter scatter = grid->scatter;
485: Vec localX = grid->qnodeLoc;
486: PetscScalar *qnode,*res;
487: PetscScalar clift,cdrag,cmom;
488: int ierr,its;
489: PetscScalar fratio;
490: PetscScalar time1,time2,cpuloc,cpuglo;
491: int max_steps;
492: PetscBool print_flag = PETSC_FALSE;
493: FILE *fptr = 0;
494: int nfailsCum = 0,nfails = 0;
495: /*Scalar cpu_ini,cpu_fin,cpu_time;*/
496: /*int event0 = 14,event1 = 25,gen_start,gen_read;
497: PetscScalar time_start_counters,time_read_counters;
498: long long counter0,counter1;*/
500: PetscOptionsGetBool(NULL,NULL,"-print",&print_flag,NULL);
501: if (print_flag) {
502: PetscFOpen(PETSC_COMM_WORLD,"history.out","w",&fptr);
503: PetscFPrintf(PETSC_COMM_WORLD,fptr,"VARIABLES = iter,cfl,fnorm,clift,cdrag,cmom,cpu\n");
504: }
505: if (user->PreLoading) max_steps = 1;
506: else max_steps = tsCtx->max_steps;
507: fratio = 1.0;
508: /*tsCtx->ptime = 0.0;*/
509: VecCopy(grid->qnode,tsCtx->qold);
510: PetscTime(&time1);
511: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
512: /* if (!user->PreLoading) {
513: PetscBool flg = PETSC_FALSE;
514: PetscOptionsGetInt(NULL,NULL,"-e0",&event0,&flg);
515: PetscOptionsGetInt(NULL,NULL,"-e1",&event1,&flg);
516: PetscTime(&time_start_counters);
517: if ((gen_start = start_counters(event0,event1)) < 0)
518: SETERRQ(PETSC_COMM_SELF,1,>"Error in start_counters");
519: }*/
520: #endif
521: /*cpu_ini = PetscGetCPUTime();*/
522: for (tsCtx->itstep = 0; (tsCtx->itstep < max_steps) &&
523: (fratio <= tsCtx->fnorm_ratio); tsCtx->itstep++) {
524: ComputeTimeStep(snes,tsCtx->itstep,user);
525: /*tsCtx->ptime += tsCtx->dt;*/
527: SNESSolve(snes,NULL,grid->qnode);
528: SNESGetIterationNumber(snes,&its);
530: SNESGetNonlinearStepFailures(snes,&nfails);
531: nfailsCum += nfails; nfails = 0;
533: if (print_flag) {
534: PetscPrintf(PETSC_COMM_WORLD,"At Time Step %d cfl = %g and fnorm = %g\n",
535: tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm);
536: }
537: VecCopy(grid->qnode,tsCtx->qold);
539: c_info->ntt = tsCtx->itstep+1;
540: PetscTime(&time2);
541: cpuloc = time2-time1;
542: cpuglo = 0.0;
543: MPI_Allreduce(&cpuloc,&cpuglo,1,MPIU_REAL,MPIU_MAX,PETSC_COMM_WORLD);
544: c_info->tot = cpuglo; /* Total CPU time used upto this time step */
546: VecScatterBegin(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
547: VecScatterEnd(scatter,grid->qnode,localX,INSERT_VALUES,SCATTER_FORWARD);
548: /* VecCopy(grid->qnode,localX); */
550: VecGetArray(grid->res,&res);
551: VecGetArray(localX,&qnode);
553: f77FORCE(&grid->nnodesLoc,&grid->nedgeLoc,
554: grid->isnode, grid->ivnode,
555: &grid->nnfacetLoc,grid->f2ntn,&grid->nnbound,
556: &grid->nvfacetLoc,grid->f2ntv,&grid->nvbound,
557: grid->eptr, qnode,
558: grid->xyz,
559: grid->sface_bit,grid->vface_bit,
560: &clift,&cdrag,&cmom,&rank,&grid->nvertices);
561: if (print_flag) {
562: PetscPrintf(PETSC_COMM_WORLD,"%d\t%g\t%g\t%g\t%g\t%g\n",tsCtx->itstep,
563: tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom);
564: PetscPrintf(PETSC_COMM_WORLD,"Wall clock time needed %g seconds for %d time steps\n",
565: cpuglo,tsCtx->itstep);
566: PetscFPrintf(PETSC_COMM_WORLD,fptr,"%d\t%g\t%g\t%g\t%g\t%g\t%g\n",
567: tsCtx->itstep,tsCtx->cfl,tsCtx->fnorm,clift,cdrag,cmom,cpuglo);
568: }
569: VecRestoreArray(localX,&qnode);
570: VecRestoreArray(grid->res,&res);
571: fratio = tsCtx->fnorm_ini/tsCtx->fnorm;
572: MPI_Barrier(PETSC_COMM_WORLD);
574: } /* End of time step loop */
576: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
577: if (!user->PreLoading) {
578: int eve0,eve1;
579: FILE *cfp0,*cfp1;
580: char str[256];
581: /* if ((gen_read = read_counters(event0,&counter0,event1,&counter1)) < 0)
582: SETERRQ(PETSC_COMM_SELF,1,"Error in read_counter");
583: PetscTime(&time_read_counters);
584: if (gen_read != gen_start) {
585: SETERRQ(PETSC_COMM_SELF,1,"Lost Counters!! Aborting ...");
586: }*/
587: /*sprintf(str,"counters%d_and_%d",event0,event1);
588: cfp0 = fopen(str,"a");*/
589: /*print_counters(event0,counter0,event1,counter1);*/
590: /*fprintf(cfp0,"%lld %lld %g\n",counter0,counter1,
591: time_counters);
592: fclose(cfp0);*/
593: }
594: #endif
595: PetscPrintf(PETSC_COMM_WORLD,"Total wall clock time needed %g seconds for %d time steps\n",
596: cpuglo,tsCtx->itstep);
597: PetscPrintf(PETSC_COMM_WORLD,"cfl = %g fnorm = %g\n",tsCtx->cfl,tsCtx->fnorm);
598: PetscPrintf(PETSC_COMM_WORLD,"clift = %g cdrag = %g cmom = %g\n",clift,cdrag,cmom);
600: if (rank == 0 && print_flag) fclose(fptr);
601: if (user->PreLoading) {
602: tsCtx->fnorm_ini = 0.0;
603: PetscPrintf(PETSC_COMM_WORLD,"Preloading done ...\n");
604: }
605: return 0;
606: }
608: /*---------------------------------------------------------------------*/
609: int ComputeTimeStep(SNES snes,int iter,void *ctx)
610: /*---------------------------------------------------------------------*/
611: {
612: AppCtx *user = (AppCtx*) ctx;
613: TstepCtx *tsCtx = user->tsCtx;
614: Vec func = tsCtx->func;
615: PetscScalar inc = 1.1;
616: PetscScalar newcfl;
617: int ierr;
618: /*int iramp = tsCtx->iramp;*/
620: tsCtx->ires = 0;
621: FormFunction(snes,tsCtx->qold,func,user);
622: tsCtx->ires = 1;
623: VecNorm(func,NORM_2,&tsCtx->fnorm);
624: /* first time through so compute initial function norm */
625: if (tsCtx->fnorm_ini == 0.0) {
626: tsCtx->fnorm_ini = tsCtx->fnorm;
627: tsCtx->cfl = tsCtx->cfl_ini;
628: } else {
629: newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
630: tsCtx->cfl = PetscMin(newcfl,tsCtx->cfl_max);
631: }
633: /* if (iramp < 0) {
634: newcfl = inc*tsCtx->cfl_ini*tsCtx->fnorm_ini/tsCtx->fnorm;
635: } else {
636: if (tsCtx->dt < 0 && iramp > 0)
637: if (iter > iramp) newcfl = tsCtx->cfl_max;
638: else newcfl = tsCtx->cfl_ini + (tsCtx->cfl_max - tsCtx->cfl_ini)*
639: (double) iter/(double) iramp;
640: }
641: tsCtx->cfl = MIN(newcfl,tsCtx->cfl_max);*/
642: /*printf("In ComputeTime Step - fnorm is %f\n",tsCtx->fnorm);*/
643: /*VecDestroy(&func);*/
644: return 0;
645: }
647: /*---------------------------------------------------------------------*/
648: int GetLocalOrdering(GRID *grid)
649: /*---------------------------------------------------------------------*/
650: {
651: int ierr,i,j,k,inode,isurf,nte,nb,node1,node2,node3;
652: int nnodes,nedge,nnz,jstart,jend;
653: int nnodesLoc,nvertices,nedgeLoc,nnodesLocEst;
654: int nedgeLocEst,remEdges,readEdges,remNodes,readNodes;
655: int nnfacet,nvfacet,nffacet;
656: int nnfacetLoc,nvfacetLoc,nffacetLoc;
657: int nsnode,nvnode,nfnode;
658: int nsnodeLoc,nvnodeLoc,nfnodeLoc;
659: int nnbound,nvbound,nfbound;
660: int bs = 4;
661: int fdes = 0;
662: off_t currentPos = 0,newPos = 0;
663: int grid_param = 13;
664: int cross_edges = 0;
665: int *edge_bit,*pordering;
666: int *l2p,*l2a,*p2l,*a2l,*v2p,*eperm;
667: int *tmp,*tmp1,*tmp2;
668: PetscScalar time_ini,time_fin;
669: PetscScalar *ftmp,*ftmp1;
670: char mesh_file[PETSC_MAX_PATH_LEN] = "";
671: AO ao;
672: FILE *fptr,*fptr1;
673: PetscBool flg;
674: MPI_Comm comm = PETSC_COMM_WORLD;
676: /* Read the integer grid parameters */
677: ICALLOC(grid_param,&tmp);
678: if (rank == 0) {
679: PetscBool exists;
680: PetscOptionsGetString(NULL,NULL,"-mesh",mesh_file,sizeof(mesh_file),&flg);
681: PetscTestFile(mesh_file,'r',&exists);
682: if (!exists) { /* try uns3d.msh as the file name */
683: PetscStrcpy(mesh_file,"uns3d.msh");
684: }
685: PetscBinaryOpen(mesh_file,FILE_MODE_READ,&fdes);
686: }
687: PetscBinarySynchronizedRead(comm,fdes,tmp,grid_param,PETSC_INT);
688: grid->ncell = tmp[0];
689: grid->nnodes = tmp[1];
690: grid->nedge = tmp[2];
691: grid->nnbound = tmp[3];
692: grid->nvbound = tmp[4];
693: grid->nfbound = tmp[5];
694: grid->nnfacet = tmp[6];
695: grid->nvfacet = tmp[7];
696: grid->nffacet = tmp[8];
697: grid->nsnode = tmp[9];
698: grid->nvnode = tmp[10];
699: grid->nfnode = tmp[11];
700: grid->ntte = tmp[12];
701: grid->nsface = 0;
702: grid->nvface = 0;
703: grid->nfface = 0;
704: PetscFree(tmp);
705: PetscPrintf(comm,"nnodes = %d,nedge = %d,nnfacet = %d,nsnode = %d,nfnode = %d\n",
706: grid->nnodes,grid->nedge,grid->nnfacet,grid->nsnode,grid->nfnode);
708: nnodes = grid->nnodes;
709: nedge = grid->nedge;
710: nnfacet = grid->nnfacet;
711: nvfacet = grid->nvfacet;
712: nffacet = grid->nffacet;
713: nnbound = grid->nnbound;
714: nvbound = grid->nvbound;
715: nfbound = grid->nfbound;
716: nsnode = grid->nsnode;
717: nvnode = grid->nvnode;
718: nfnode = grid->nfnode;
720: /* Read the partitioning vector generated by MeTiS */
721: ICALLOC(nnodes,&l2a);
722: ICALLOC(nnodes,&v2p);
723: ICALLOC(nnodes,&a2l);
724: nnodesLoc = 0;
726: for (i = 0; i < nnodes; i++) a2l[i] = -1;
727: PetscTime(&time_ini);
729: if (rank == 0) {
730: if (size == 1) {
731: PetscMemzero(v2p,nnodes*sizeof(int));
732: } else {
733: char spart_file[PETSC_MAX_PATH_LEN],part_file[PETSC_MAX_PATH_LEN];
734: PetscBool exists;
736: PetscOptionsGetString(NULL,NULL,"-partition",spart_file,sizeof(spart_file),&flg);
737: PetscTestFile(spart_file,'r',&exists);
738: if (!exists) { /* try appending the number of processors */
739: sprintf(part_file,"part_vec.part.%d",size);
740: PetscStrcpy(spart_file,part_file);
741: }
742: fptr = fopen(spart_file,"r");
744: for (inode = 0; inode < nnodes; inode++) {
745: fscanf(fptr,"%d\n",&node1);
746: v2p[inode] = node1;
747: }
748: fclose(fptr);
749: }
750: }
751: MPI_Bcast(v2p,nnodes,MPI_INT,0,comm);
752: for (inode = 0; inode < nnodes; inode++) {
753: if (v2p[inode] == rank) {
754: l2a[nnodesLoc] = inode;
755: a2l[inode] = nnodesLoc;
756: nnodesLoc++;
757: }
758: }
760: PetscTime(&time_fin);
761: time_fin -= time_ini;
762: PetscPrintf(comm,"Partition Vector read successfully\n");
763: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
765: MPI_Scan(&nnodesLoc,&rstart,1,MPI_INT,MPI_SUM,comm);
766: rstart -= nnodesLoc;
767: ICALLOC(nnodesLoc,&pordering);
768: for (i=0; i < nnodesLoc; i++) pordering[i] = rstart + i;
769: AOCreateBasic(comm,nnodesLoc,l2a,pordering,&ao);
770: PetscFree(pordering);
772: /* Now count the local number of edges - including edges with
773: ghost nodes but edges between ghost nodes are NOT counted */
774: nedgeLoc = 0;
775: nvertices = nnodesLoc;
776: /* Choose an estimated number of local edges. The choice
777: nedgeLocEst = 1000000 looks reasonable as it will read
778: the edge and edge normal arrays in 8 MB chunks */
779: /*nedgeLocEst = nedge/size;*/
780: nedgeLocEst = PetscMin(nedge,1000000);
781: remEdges = nedge;
782: ICALLOC(2*nedgeLocEst,&tmp);
783: PetscBinarySynchronizedSeek(comm,fdes,0,PETSC_BINARY_SEEK_CUR,¤tPos);
784: PetscTime(&time_ini);
785: while (remEdges > 0) {
786: readEdges = PetscMin(remEdges,nedgeLocEst);
787: /*time_ini = PetscTime();*/
788: PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
789: PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
790: PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
791: PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
792: /*time_fin += PetscTime()-time_ini;*/
793: for (j = 0; j < readEdges; j++) {
794: node1 = tmp[j]-1;
795: node2 = tmp[j+readEdges]-1;
796: if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
797: nedgeLoc++;
798: if (a2l[node1] == -1) {
799: l2a[nvertices] = node1;
800: a2l[node1] = nvertices;
801: nvertices++;
802: }
803: if (a2l[node2] == -1) {
804: l2a[nvertices] = node2;
805: a2l[node2] = nvertices;
806: nvertices++;
807: }
808: }
809: }
810: remEdges = remEdges - readEdges;
811: MPI_Barrier(comm);
812: }
813: PetscTime(&time_fin);
814: time_fin -= time_ini;
815: PetscPrintf(comm,"Local edges counted with MPI_Bcast %d\n",nedgeLoc);
816: PetscPrintf(comm,"Local vertices counted %d\n",nvertices);
817: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
819: /* Now store the local edges */
820: ICALLOC(2*nedgeLoc,&grid->eptr);
821: ICALLOC(nedgeLoc,&edge_bit);
822: ICALLOC(nedgeLoc,&eperm);
823: i = 0; j = 0; k = 0;
824: remEdges = nedge;
825: PetscBinarySynchronizedSeek(comm,fdes,currentPos,PETSC_BINARY_SEEK_SET,&newPos);
826: currentPos = newPos;
828: PetscTime(&time_ini);
829: while (remEdges > 0) {
830: readEdges = PetscMin(remEdges,nedgeLocEst);
831: PetscBinarySynchronizedRead(comm,fdes,tmp,readEdges,PETSC_INT);
832: PetscBinarySynchronizedSeek(comm,fdes,(nedge-readEdges)*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
833: PetscBinarySynchronizedRead(comm,fdes,tmp+readEdges,readEdges,PETSC_INT);
834: PetscBinarySynchronizedSeek(comm,fdes,-nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_CUR,&newPos);
835: for (j = 0; j < readEdges; j++) {
836: node1 = tmp[j]-1;
837: node2 = tmp[j+readEdges]-1;
838: if ((v2p[node1] == rank) || (v2p[node2] == rank)) {
839: grid->eptr[k] = a2l[node1];
840: grid->eptr[k+nedgeLoc] = a2l[node2];
841: edge_bit[k] = i; /* Record global file index of the edge */
842: eperm[k] = k;
843: k++;
844: }
845: i++;
846: }
847: remEdges = remEdges - readEdges;
848: MPI_Barrier(comm);
849: }
850: PetscBinarySynchronizedSeek(comm,fdes,currentPos+2*nedge*PETSC_BINARY_INT_SIZE,PETSC_BINARY_SEEK_SET,&newPos);
851: PetscTime(&time_fin);
852: time_fin -= time_ini;
853: PetscPrintf(comm,"Local edges stored\n");
854: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
856: PetscFree(tmp);
857: ICALLOC(2*nedgeLoc,&tmp);
858: PetscMemcpy(tmp,grid->eptr,2*nedgeLoc*sizeof(int));
859: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
860: EdgeColoring(nvertices,nedgeLoc,grid->eptr,eperm,&grid->ncolor,grid->ncount);
861: #else
862: /* Now reorder the edges for better cache locality */
863: /*
864: tmp[0]=7;tmp[1]=6;tmp[2]=3;tmp[3]=9;tmp[4]=2;tmp[5]=0;
865: PetscSortIntWithPermutation(6,tmp,eperm);
866: for (i=0; i<6; i++)
867: printf("%d %d %d\n",i,tmp[i],eperm[i]);
868: */
869: flg = PETSC_FALSE;
870: PetscOptionsGetBool(0,"-no_edge_reordering",&flg,NULL);
871: if (!flg) {
872: PetscSortIntWithPermutation(nedgeLoc,tmp,eperm);
873: }
874: #endif
875: PetscMallocValidate(__LINE__,PETSC_FUNCTION_NAME,__FILE__);
876: k = 0;
877: for (i = 0; i < nedgeLoc; i++) {
878: int cross_node=nnodesLoc/2;
879: node1 = tmp[eperm[i]] + 1;
880: node2 = tmp[nedgeLoc+eperm[i]] + 1;
881: #if defined(INTERLACING)
882: grid->eptr[k++] = node1;
883: grid->eptr[k++] = node2;
884: #else
885: grid->eptr[i] = node1;
886: grid->eptr[nedgeLoc+i] = node2;
887: #endif
888: /* if (node1 > node2)
889: printf("On processor %d, for edge %d node1 = %d, node2 = %d\n",
890: rank,i,node1,node2);*/
891: if ((node1 <= cross_node) && (node2 > cross_node)) cross_edges++;
892: }
893: PetscPrintf(comm,"Number of cross edges %d\n", cross_edges);
894: PetscFree(tmp);
895: #if defined(_OPENMP) && !defined(HAVE_REDUNDANT_WORK) && !defined(HAVE_EDGE_COLORING)
896: /* Now make the local 'ia' and 'ja' arrays */
897: ICALLOC(nvertices+1,&grid->ia);
898: /* Use tmp for a work array */
899: ICALLOC(nvertices,&tmp);
900: f77GETIA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
901: nnz = grid->ia[nvertices] - 1;
902: ICALLOC(nnz,&grid->ja);
903: f77GETJA(&nvertices,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
904: PetscFree(tmp);
905: #else
906: /* Now make the local 'ia' and 'ja' arrays */
907: ICALLOC(nnodesLoc+1,&grid->ia);
908: /* Use tmp for a work array */
909: ICALLOC(nnodesLoc,&tmp);
910: f77GETIA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,tmp,&rank);
911: nnz = grid->ia[nnodesLoc] - 1;
912: #if defined(BLOCKING)
913: PetscPrintf(comm,"The Jacobian has %d non-zero blocks with block size = %d\n",nnz,bs);
914: #else
915: PetscPrintf(comm,"The Jacobian has %d non-zeros\n",nnz);
916: #endif
917: ICALLOC(nnz,&grid->ja);
918: f77GETJA(&nnodesLoc,&nedgeLoc,grid->eptr,grid->ia,grid->ja,tmp,&rank);
919: PetscFree(tmp);
920: #endif
921: ICALLOC(nvertices,&grid->loc2glo);
922: PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
923: PetscFree(l2a);
924: l2a = grid->loc2glo;
925: ICALLOC(nvertices,&grid->loc2pet);
926: l2p = grid->loc2pet;
927: PetscMemcpy(l2p,l2a,nvertices*sizeof(int));
928: AOApplicationToPetsc(ao,nvertices,l2p);
930: /* Renumber unit normals of dual face (from node1 to node2)
931: and the area of the dual mesh face */
932: FCALLOC(nedgeLocEst,&ftmp);
933: FCALLOC(nedgeLoc,&ftmp1);
934: FCALLOC(4*nedgeLoc,&grid->xyzn);
935: /* Do the x-component */
936: i = 0; k = 0;
937: remEdges = nedge;
938: PetscTime(&time_ini);
939: while (remEdges > 0) {
940: readEdges = PetscMin(remEdges,nedgeLocEst);
941: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
942: for (j = 0; j < readEdges; j++)
943: if (edge_bit[k] == (i+j)) {
944: ftmp1[k] = ftmp[j];
945: k++;
946: }
947: i += readEdges;
948: remEdges = remEdges - readEdges;
949: MPI_Barrier(comm);
950: }
951: for (i = 0; i < nedgeLoc; i++)
952: #if defined(INTERLACING)
953: grid->xyzn[4*i] = ftmp1[eperm[i]];
954: #else
955: grid->xyzn[i] = ftmp1[eperm[i]];
956: #endif
957: /* Do the y-component */
958: i = 0; k = 0;
959: remEdges = nedge;
960: while (remEdges > 0) {
961: readEdges = PetscMin(remEdges,nedgeLocEst);
962: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
963: for (j = 0; j < readEdges; j++)
964: if (edge_bit[k] == (i+j)) {
965: ftmp1[k] = ftmp[j];
966: k++;
967: }
968: i += readEdges;
969: remEdges = remEdges - readEdges;
970: MPI_Barrier(comm);
971: }
972: for (i = 0; i < nedgeLoc; i++)
973: #if defined(INTERLACING)
974: grid->xyzn[4*i+1] = ftmp1[eperm[i]];
975: #else
976: grid->xyzn[nedgeLoc+i] = ftmp1[eperm[i]];
977: #endif
978: /* Do the z-component */
979: i = 0; k = 0;
980: remEdges = nedge;
981: while (remEdges > 0) {
982: readEdges = PetscMin(remEdges,nedgeLocEst);
983: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
984: for (j = 0; j < readEdges; j++)
985: if (edge_bit[k] == (i+j)) {
986: ftmp1[k] = ftmp[j];
987: k++;
988: }
989: i += readEdges;
990: remEdges = remEdges - readEdges;
991: MPI_Barrier(comm);
992: }
993: for (i = 0; i < nedgeLoc; i++)
994: #if defined(INTERLACING)
995: grid->xyzn[4*i+2] = ftmp1[eperm[i]];
996: #else
997: grid->xyzn[2*nedgeLoc+i] = ftmp1[eperm[i]];
998: #endif
999: /* Do the area */
1000: i = 0; k = 0;
1001: remEdges = nedge;
1002: while (remEdges > 0) {
1003: readEdges = PetscMin(remEdges,nedgeLocEst);
1004: PetscBinarySynchronizedRead(comm,fdes,ftmp,readEdges,PETSC_SCALAR);
1005: for (j = 0; j < readEdges; j++)
1006: if (edge_bit[k] == (i+j)) {
1007: ftmp1[k] = ftmp[j];
1008: k++;
1009: }
1010: i += readEdges;
1011: remEdges = remEdges - readEdges;
1012: MPI_Barrier(comm);
1013: }
1014: for (i = 0; i < nedgeLoc; i++)
1015: #if defined(INTERLACING)
1016: grid->xyzn[4*i+3] = ftmp1[eperm[i]];
1017: #else
1018: grid->xyzn[3*nedgeLoc+i] = ftmp1[eperm[i]];
1019: #endif
1021: PetscFree(edge_bit);
1022: PetscFree(eperm);
1023: PetscFree(ftmp);
1024: PetscFree(ftmp1);
1025: PetscTime(&time_fin);
1026: time_fin -= time_ini;
1027: PetscPrintf(comm,"Edge normals partitioned\n");
1028: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1029: #if defined(_OPENMP)
1030: /*Arrange for the division of work among threads*/
1031: #if defined(HAVE_EDGE_COLORING)
1032: #elif defined(HAVE_REDUNDANT_WORK)
1033: FCALLOC(4*nnodesLoc, &grid->resd);
1034: #else
1035: {
1036: /* Get the local adjacency structure of the graph for partitioning the local
1037: graph into max_threads pieces */
1038: int *ia,*ja,*vwtg=0,*adjwgt=0,options[5];
1039: int numflag = 0, wgtflag = 0, edgecut;
1040: int thr1,thr2,nedgeAllThreads,ned1,ned2;
1041: ICALLOC((nvertices+1),&ia);
1042: ICALLOC((2*nedgeLoc),&ja);
1043: ia[0] = 0;
1044: for (i = 1; i <= nvertices; i++) ia[i] = grid->ia[i]-i-1;
1045: for (i = 0; i < nvertices; i++) {
1046: int jstart,jend;
1047: jstart = grid->ia[i]-1;
1048: jend = grid->ia[i+1]-1;
1049: k = ia[i];
1050: for (j=jstart; j < jend; j++) {
1051: inode = grid->ja[j]-1;
1052: if (inode != i) ja[k++] = inode;
1053: }
1054: }
1055: ICALLOC(nvertices,&grid->part_thr);
1056: PetscMemzero(grid->part_thr,nvertices*sizeof(int));
1057: options[0] = 0;
1058: /* Call the pmetis library routine */
1059: if (max_threads > 1)
1060: METIS_PartGraphRecursive(&nvertices,ia,ja,vwtg,adjwgt,
1061: &wgtflag,&numflag,&max_threads,options,&edgecut,grid->part_thr);
1062: PetscPrintf(MPI_COMM_WORLD,"The number of cut edges is %d\n", edgecut);
1063: /* Write the partition vector to disk */
1064: flg = PETSC_FALSE;
1065: PetscOptionsGetBool(0,"-omp_partitioning",&flg,NULL);
1066: if (flg) {
1067: int *partv_loc, *partv_glo;
1068: int *disp,*counts,*loc2glo_glo;
1069: char part_file[PETSC_MAX_PATH_LEN];
1070: FILE *fp;
1072: ICALLOC(nnodes, &partv_glo);
1073: ICALLOC(nnodesLoc, &partv_loc);
1074: for (i = 0; i < nnodesLoc; i++)
1075: /*partv_loc[i] = grid->part_thr[i]*size + rank;*/
1076: partv_loc[i] = grid->part_thr[i] + max_threads*rank;
1077: ICALLOC(size,&disp);
1078: ICALLOC(size,&counts);
1079: MPI_Allgather(&nnodesLoc,1,MPI_INT,counts,1,MPI_INT,MPI_COMM_WORLD);
1080: disp[0] = 0;
1081: for (i = 1; i < size; i++) disp[i] = counts[i-1] + disp[i-1];
1082: ICALLOC(nnodes, &loc2glo_glo);
1083: MPI_Gatherv(grid->loc2glo,nnodesLoc,MPI_INT,loc2glo_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1084: MPI_Gatherv(partv_loc,nnodesLoc,MPI_INT,partv_glo,counts,disp,MPI_INT,0,MPI_COMM_WORLD);
1085: if (rank == 0) {
1086: PetscSortIntWithArray(nnodes,loc2glo_glo,partv_glo);
1087: sprintf(part_file,"hyb_part_vec.%d",2*size);
1088: fp = fopen(part_file,"w");
1089: for (i = 0; i < nnodes; i++) fprintf(fp,"%d\n",partv_glo[i]);
1090: fclose(fp);
1091: }
1092: PetscFree(partv_loc);
1093: PetscFree(partv_glo);
1094: PetscFree(disp);
1095: PetscFree(counts);
1096: PetscFree(loc2glo_glo);
1097: }
1099: /* Divide the work among threads */
1100: k = 0;
1101: ICALLOC((max_threads+1),&grid->nedge_thr);
1102: PetscMemzero(grid->nedge_thr,(max_threads+1)*sizeof(int));
1103: cross_edges = 0;
1104: for (i = 0; i < nedgeLoc; i++) {
1105: node1 = grid->eptr[k++]-1;
1106: node2 = grid->eptr[k++]-1;
1107: thr1 = grid->part_thr[node1];
1108: thr2 = grid->part_thr[node2];
1109: grid->nedge_thr[thr1]+=1;
1110: if (thr1 != thr2) {
1111: grid->nedge_thr[thr2]+=1;
1112: cross_edges++;
1113: }
1114: }
1115: PetscPrintf(MPI_COMM_WORLD,"The number of cross edges after Metis partitioning is %d\n",cross_edges);
1116: ned1 = grid->nedge_thr[0];
1117: grid->nedge_thr[0] = 1;
1118: for (i = 1; i <= max_threads; i++) {
1119: ned2 = grid->nedge_thr[i];
1120: grid->nedge_thr[i] = grid->nedge_thr[i-1]+ned1;
1121: ned1 = ned2;
1122: }
1123: /* Allocate a shared edge array. Note that a cut edge is evaluated
1124: by both the threads but updates are done only for the locally
1125: owned node */
1126: grid->nedgeAllThr = nedgeAllThreads = grid->nedge_thr[max_threads]-1;
1127: ICALLOC(2*nedgeAllThreads, &grid->edge_thr);
1128: ICALLOC(max_threads,&tmp);
1129: FCALLOC(4*nedgeAllThreads,&grid->xyzn_thr);
1130: for (i = 0; i < max_threads; i++) tmp[i] = grid->nedge_thr[i]-1;
1131: k = 0;
1132: for (i = 0; i < nedgeLoc; i++) {
1133: int ie1,ie2,ie3;
1134: node1 = grid->eptr[k++];
1135: node2 = grid->eptr[k++];
1136: thr1 = grid->part_thr[node1-1];
1137: thr2 = grid->part_thr[node2-1];
1138: ie1 = 2*tmp[thr1];
1139: ie2 = 4*tmp[thr1];
1140: ie3 = 4*i;
1142: grid->edge_thr[ie1] = node1;
1143: grid->edge_thr[ie1+1] = node2;
1144: grid->xyzn_thr[ie2] = grid->xyzn[ie3];
1145: grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1146: grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1147: grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];
1149: tmp[thr1]+=1;
1150: if (thr1 != thr2) {
1151: ie1 = 2*tmp[thr2];
1152: ie2 = 4*tmp[thr2];
1154: grid->edge_thr[ie1] = node1;
1155: grid->edge_thr[ie1+1] = node2;
1156: grid->xyzn_thr[ie2] = grid->xyzn[ie3];
1157: grid->xyzn_thr[ie2+1] = grid->xyzn[ie3+1];
1158: grid->xyzn_thr[ie2+2] = grid->xyzn[ie3+2];
1159: grid->xyzn_thr[ie2+3] = grid->xyzn[ie3+3];
1161: tmp[thr2]+=1;
1162: }
1163: }
1164: }
1165: #endif
1166: #endif
1168: /* Remap coordinates */
1169: /*nnodesLocEst = nnodes/size;*/
1170: nnodesLocEst = PetscMin(nnodes,500000);
1171: FCALLOC(nnodesLocEst,&ftmp);
1172: FCALLOC(3*nvertices,&grid->xyz);
1173: remNodes = nnodes;
1174: i = 0;
1175: PetscTime(&time_ini);
1176: while (remNodes > 0) {
1177: readNodes = PetscMin(remNodes,nnodesLocEst);
1178: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1179: for (j = 0; j < readNodes; j++) {
1180: if (a2l[i+j] >= 0) {
1181: #if defined(INTERLACING)
1182: grid->xyz[3*a2l[i+j]] = ftmp[j];
1183: #else
1184: grid->xyz[a2l[i+j]] = ftmp[j];
1185: #endif
1186: }
1187: }
1188: i += nnodesLocEst;
1189: remNodes -= nnodesLocEst;
1190: MPI_Barrier(comm);
1191: }
1193: remNodes = nnodes;
1194: i = 0;
1195: while (remNodes > 0) {
1196: readNodes = PetscMin(remNodes,nnodesLocEst);
1197: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1198: for (j = 0; j < readNodes; j++) {
1199: if (a2l[i+j] >= 0) {
1200: #if defined(INTERLACING)
1201: grid->xyz[3*a2l[i+j]+1] = ftmp[j];
1202: #else
1203: grid->xyz[nnodesLoc+a2l[i+j]] = ftmp[j];
1204: #endif
1205: }
1206: }
1207: i += nnodesLocEst;
1208: remNodes -= nnodesLocEst;
1209: MPI_Barrier(comm);
1210: }
1212: remNodes = nnodes;
1213: i = 0;
1214: while (remNodes > 0) {
1215: readNodes = PetscMin(remNodes,nnodesLocEst);
1216: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1217: for (j = 0; j < readNodes; j++) {
1218: if (a2l[i+j] >= 0) {
1219: #if defined(INTERLACING)
1220: grid->xyz[3*a2l[i+j]+2] = ftmp[j];
1221: #else
1222: grid->xyz[2*nnodesLoc+a2l[i+j]] = ftmp[j];
1223: #endif
1224: }
1225: }
1226: i += nnodesLocEst;
1227: remNodes -= nnodesLocEst;
1228: MPI_Barrier(comm);
1229: }
1231: /* Renumber dual volume "area" */
1232: FCALLOC(nvertices,&grid->area);
1233: remNodes = nnodes;
1234: i = 0;
1235: while (remNodes > 0) {
1236: readNodes = PetscMin(remNodes,nnodesLocEst);
1237: PetscBinarySynchronizedRead(comm,fdes,ftmp,readNodes,PETSC_SCALAR);
1238: for (j = 0; j < readNodes; j++)
1239: if (a2l[i+j] >= 0)
1240: grid->area[a2l[i+j]] = ftmp[j];
1241: i += nnodesLocEst;
1242: remNodes -= nnodesLocEst;
1243: MPI_Barrier(comm);
1244: }
1246: PetscFree(ftmp);
1247: PetscTime(&time_fin);
1248: time_fin -= time_ini;
1249: PetscPrintf(comm,"Coordinates remapped\n");
1250: PetscPrintf(comm,"Time taken in this phase was %g\n",time_fin);
1252: /* Now,handle all the solid boundaries - things to be done :
1253: * 1. Identify the nodes belonging to the solid
1254: * boundaries and count them.
1255: * 2. Put proper indices into f2ntn array,after making it
1256: * of suitable size.
1257: * 3. Remap the normals and areas of solid faces (sxn,syn,szn,
1258: * and sa arrays).
1259: */
1260: ICALLOC(nnbound, &grid->nntet);
1261: ICALLOC(nnbound, &grid->nnpts);
1262: ICALLOC(4*nnfacet,&grid->f2ntn);
1263: ICALLOC(nsnode,&grid->isnode);
1264: FCALLOC(nsnode,&grid->sxn);
1265: FCALLOC(nsnode,&grid->syn);
1266: FCALLOC(nsnode,&grid->szn);
1267: FCALLOC(nsnode,&grid->sa);
1268: PetscBinarySynchronizedRead(comm,fdes,grid->nntet,nnbound,PETSC_INT);
1269: PetscBinarySynchronizedRead(comm,fdes,grid->nnpts,nnbound,PETSC_INT);
1270: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntn,4*nnfacet,PETSC_INT);
1271: PetscBinarySynchronizedRead(comm,fdes,grid->isnode,nsnode,PETSC_INT);
1272: PetscBinarySynchronizedRead(comm,fdes,grid->sxn,nsnode,PETSC_SCALAR);
1273: PetscBinarySynchronizedRead(comm,fdes,grid->syn,nsnode,PETSC_SCALAR);
1274: PetscBinarySynchronizedRead(comm,fdes,grid->szn,nsnode,PETSC_SCALAR);
1276: isurf = 0;
1277: nsnodeLoc = 0;
1278: nnfacetLoc = 0;
1279: nb = 0;
1280: nte = 0;
1281: ICALLOC(3*nnfacet,&tmp);
1282: ICALLOC(nsnode,&tmp1);
1283: ICALLOC(nnodes,&tmp2);
1284: FCALLOC(4*nsnode,&ftmp);
1285: PetscMemzero(tmp,3*nnfacet*sizeof(int));
1286: PetscMemzero(tmp1,nsnode*sizeof(int));
1287: PetscMemzero(tmp2,nnodes*sizeof(int));
1289: j = 0;
1290: for (i = 0; i < nsnode; i++) {
1291: node1 = a2l[grid->isnode[i] - 1];
1292: if (node1 >= 0) {
1293: tmp1[nsnodeLoc] = node1;
1294: tmp2[node1] = nsnodeLoc;
1295: ftmp[j++] = grid->sxn[i];
1296: ftmp[j++] = grid->syn[i];
1297: ftmp[j++] = grid->szn[i];
1298: ftmp[j++] = grid->sa[i];
1299: nsnodeLoc++;
1300: }
1301: }
1302: for (i = 0; i < nnbound; i++) {
1303: for (j = isurf; j < isurf + grid->nntet[i]; j++) {
1304: node1 = a2l[grid->isnode[grid->f2ntn[j] - 1] - 1];
1305: node2 = a2l[grid->isnode[grid->f2ntn[nnfacet + j] - 1] - 1];
1306: node3 = a2l[grid->isnode[grid->f2ntn[2*nnfacet + j] - 1] - 1];
1308: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1309: nnfacetLoc++;
1310: nte++;
1311: tmp[nb++] = tmp2[node1];
1312: tmp[nb++] = tmp2[node2];
1313: tmp[nb++] = tmp2[node3];
1314: }
1315: }
1316: isurf += grid->nntet[i];
1317: /*printf("grid->nntet[%d] before reordering is %d\n",i,grid->nntet[i]);*/
1318: grid->nntet[i] = nte;
1319: /*printf("grid->nntet[%d] after reordering is %d\n",i,grid->nntet[i]);*/
1320: nte = 0;
1321: }
1322: PetscFree(grid->f2ntn);
1323: PetscFree(grid->isnode);
1324: PetscFree(grid->sxn);
1325: PetscFree(grid->syn);
1326: PetscFree(grid->szn);
1327: PetscFree(grid->sa);
1328: ICALLOC(4*nnfacetLoc,&grid->f2ntn);
1329: ICALLOC(nsnodeLoc,&grid->isnode);
1330: FCALLOC(nsnodeLoc,&grid->sxn);
1331: FCALLOC(nsnodeLoc,&grid->syn);
1332: FCALLOC(nsnodeLoc,&grid->szn);
1333: FCALLOC(nsnodeLoc,&grid->sa);
1334: j = 0;
1335: for (i = 0; i < nsnodeLoc; i++) {
1336: grid->isnode[i] = tmp1[i] + 1;
1337: grid->sxn[i] = ftmp[j++];
1338: grid->syn[i] = ftmp[j++];
1339: grid->szn[i] = ftmp[j++];
1340: grid->sa[i] = ftmp[j++];
1341: }
1342: j = 0;
1343: for (i = 0; i < nnfacetLoc; i++) {
1344: grid->f2ntn[i] = tmp[j++] + 1;
1345: grid->f2ntn[nnfacetLoc+i] = tmp[j++] + 1;
1346: grid->f2ntn[2*nnfacetLoc+i] = tmp[j++] + 1;
1347: }
1348: PetscFree(tmp);
1349: PetscFree(tmp1);
1350: PetscFree(tmp2);
1351: PetscFree(ftmp);
1353: /* Now identify the triangles on which the current proceesor
1354: would perform force calculation */
1355: ICALLOC(nnfacetLoc,&grid->sface_bit);
1356: PetscMemzero(grid->sface_bit,nnfacetLoc*sizeof(int));
1357: for (i = 0; i < nnfacetLoc; i++) {
1358: node1 = l2a[grid->isnode[grid->f2ntn[i] - 1] - 1];
1359: node2 = l2a[grid->isnode[grid->f2ntn[nnfacetLoc + i] - 1] - 1];
1360: node3 = l2a[grid->isnode[grid->f2ntn[2*nnfacetLoc + i] - 1] - 1];
1361: if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1362: && (v2p[node3] >= rank)) &&
1363: ((v2p[node1] == rank) || (v2p[node2] == rank)
1364: || (v2p[node3] == rank)))
1365: grid->sface_bit[i] = 1;
1366: }
1367: /*printf("On processor %d total solid triangles = %d,locally owned = %d alpha = %d\n",rank,totTr,myTr,alpha);*/
1368: PetscPrintf(comm,"Solid boundaries partitioned\n");
1370: /* Now,handle all the viscous boundaries - things to be done :
1371: * 1. Identify the nodes belonging to the viscous
1372: * boundaries and count them.
1373: * 2. Put proper indices into f2ntv array,after making it
1374: * of suitable size
1375: * 3. Remap the normals and areas of viscous faces (vxn,vyn,vzn,
1376: * and va arrays).
1377: */
1378: ICALLOC(nvbound, &grid->nvtet);
1379: ICALLOC(nvbound, &grid->nvpts);
1380: ICALLOC(4*nvfacet,&grid->f2ntv);
1381: ICALLOC(nvnode,&grid->ivnode);
1382: FCALLOC(nvnode,&grid->vxn);
1383: FCALLOC(nvnode,&grid->vyn);
1384: FCALLOC(nvnode,&grid->vzn);
1385: FCALLOC(nvnode,&grid->va);
1386: PetscBinarySynchronizedRead(comm,fdes,grid->nvtet,nvbound,PETSC_INT);
1387: PetscBinarySynchronizedRead(comm,fdes,grid->nvpts,nvbound,PETSC_INT);
1388: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntv,4*nvfacet,PETSC_INT);
1389: PetscBinarySynchronizedRead(comm,fdes,grid->ivnode,nvnode,PETSC_INT);
1390: PetscBinarySynchronizedRead(comm,fdes,grid->vxn,nvnode,PETSC_SCALAR);
1391: PetscBinarySynchronizedRead(comm,fdes,grid->vyn,nvnode,PETSC_SCALAR);
1392: PetscBinarySynchronizedRead(comm,fdes,grid->vzn,nvnode,PETSC_SCALAR);
1394: isurf = 0;
1395: nvnodeLoc = 0;
1396: nvfacetLoc = 0;
1397: nb = 0;
1398: nte = 0;
1399: ICALLOC(3*nvfacet,&tmp);
1400: ICALLOC(nvnode,&tmp1);
1401: ICALLOC(nnodes,&tmp2);
1402: FCALLOC(4*nvnode,&ftmp);
1403: PetscMemzero(tmp,3*nvfacet*sizeof(int));
1404: PetscMemzero(tmp1,nvnode*sizeof(int));
1405: PetscMemzero(tmp2,nnodes*sizeof(int));
1407: j = 0;
1408: for (i = 0; i < nvnode; i++) {
1409: node1 = a2l[grid->ivnode[i] - 1];
1410: if (node1 >= 0) {
1411: tmp1[nvnodeLoc] = node1;
1412: tmp2[node1] = nvnodeLoc;
1413: ftmp[j++] = grid->vxn[i];
1414: ftmp[j++] = grid->vyn[i];
1415: ftmp[j++] = grid->vzn[i];
1416: ftmp[j++] = grid->va[i];
1417: nvnodeLoc++;
1418: }
1419: }
1420: for (i = 0; i < nvbound; i++) {
1421: for (j = isurf; j < isurf + grid->nvtet[i]; j++) {
1422: node1 = a2l[grid->ivnode[grid->f2ntv[j] - 1] - 1];
1423: node2 = a2l[grid->ivnode[grid->f2ntv[nvfacet + j] - 1] - 1];
1424: node3 = a2l[grid->ivnode[grid->f2ntv[2*nvfacet + j] - 1] - 1];
1425: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1426: nvfacetLoc++;
1427: nte++;
1428: tmp[nb++] = tmp2[node1];
1429: tmp[nb++] = tmp2[node2];
1430: tmp[nb++] = tmp2[node3];
1431: }
1432: }
1433: isurf += grid->nvtet[i];
1434: grid->nvtet[i] = nte;
1435: nte = 0;
1436: }
1437: PetscFree(grid->f2ntv);
1438: PetscFree(grid->ivnode);
1439: PetscFree(grid->vxn);
1440: PetscFree(grid->vyn);
1441: PetscFree(grid->vzn);
1442: PetscFree(grid->va);
1443: ICALLOC(4*nvfacetLoc,&grid->f2ntv);
1444: ICALLOC(nvnodeLoc,&grid->ivnode);
1445: FCALLOC(nvnodeLoc,&grid->vxn);
1446: FCALLOC(nvnodeLoc,&grid->vyn);
1447: FCALLOC(nvnodeLoc,&grid->vzn);
1448: FCALLOC(nvnodeLoc,&grid->va);
1449: j = 0;
1450: for (i = 0; i < nvnodeLoc; i++) {
1451: grid->ivnode[i] = tmp1[i] + 1;
1452: grid->vxn[i] = ftmp[j++];
1453: grid->vyn[i] = ftmp[j++];
1454: grid->vzn[i] = ftmp[j++];
1455: grid->va[i] = ftmp[j++];
1456: }
1457: j = 0;
1458: for (i = 0; i < nvfacetLoc; i++) {
1459: grid->f2ntv[i] = tmp[j++] + 1;
1460: grid->f2ntv[nvfacetLoc+i] = tmp[j++] + 1;
1461: grid->f2ntv[2*nvfacetLoc+i] = tmp[j++] + 1;
1462: }
1463: PetscFree(tmp);
1464: PetscFree(tmp1);
1465: PetscFree(tmp2);
1466: PetscFree(ftmp);
1468: /* Now identify the triangles on which the current proceesor
1469: would perform force calculation */
1470: ICALLOC(nvfacetLoc,&grid->vface_bit);
1471: PetscMemzero(grid->vface_bit,nvfacetLoc*sizeof(int));
1472: for (i = 0; i < nvfacetLoc; i++) {
1473: node1 = l2a[grid->ivnode[grid->f2ntv[i] - 1] - 1];
1474: node2 = l2a[grid->ivnode[grid->f2ntv[nvfacetLoc + i] - 1] - 1];
1475: node3 = l2a[grid->ivnode[grid->f2ntv[2*nvfacetLoc + i] - 1] - 1];
1476: if (((v2p[node1] >= rank) && (v2p[node2] >= rank)
1477: && (v2p[node3] >= rank)) &&
1478: ((v2p[node1] == rank) || (v2p[node2] == rank)
1479: || (v2p[node3] == rank))) {
1480: grid->vface_bit[i] = 1;
1481: }
1482: }
1483: PetscFree(v2p);
1484: PetscPrintf(comm,"Viscous boundaries partitioned\n");
1486: /* Now,handle all the free boundaries - things to be done :
1487: * 1. Identify the nodes belonging to the free
1488: * boundaries and count them.
1489: * 2. Put proper indices into f2ntf array,after making it
1490: * of suitable size
1491: * 3. Remap the normals and areas of free bound. faces (fxn,fyn,fzn,
1492: * and fa arrays).
1493: */
1495: ICALLOC(nfbound, &grid->nftet);
1496: ICALLOC(nfbound, &grid->nfpts);
1497: ICALLOC(4*nffacet,&grid->f2ntf);
1498: ICALLOC(nfnode,&grid->ifnode);
1499: FCALLOC(nfnode,&grid->fxn);
1500: FCALLOC(nfnode,&grid->fyn);
1501: FCALLOC(nfnode,&grid->fzn);
1502: FCALLOC(nfnode,&grid->fa);
1503: PetscBinarySynchronizedRead(comm,fdes,grid->nftet,nfbound,PETSC_INT);
1504: PetscBinarySynchronizedRead(comm,fdes,grid->nfpts,nfbound,PETSC_INT);
1505: PetscBinarySynchronizedRead(comm,fdes,grid->f2ntf,4*nffacet,PETSC_INT);
1506: PetscBinarySynchronizedRead(comm,fdes,grid->ifnode,nfnode,PETSC_INT);
1507: PetscBinarySynchronizedRead(comm,fdes,grid->fxn,nfnode,PETSC_SCALAR);
1508: PetscBinarySynchronizedRead(comm,fdes,grid->fyn,nfnode,PETSC_SCALAR);
1509: PetscBinarySynchronizedRead(comm,fdes,grid->fzn,nfnode,PETSC_SCALAR);
1511: isurf = 0;
1512: nfnodeLoc = 0;
1513: nffacetLoc = 0;
1514: nb = 0;
1515: nte = 0;
1516: ICALLOC(3*nffacet,&tmp);
1517: ICALLOC(nfnode,&tmp1);
1518: ICALLOC(nnodes,&tmp2);
1519: FCALLOC(4*nfnode,&ftmp);
1520: PetscMemzero(tmp,3*nffacet*sizeof(int));
1521: PetscMemzero(tmp1,nfnode*sizeof(int));
1522: PetscMemzero(tmp2,nnodes*sizeof(int));
1524: j = 0;
1525: for (i = 0; i < nfnode; i++) {
1526: node1 = a2l[grid->ifnode[i] - 1];
1527: if (node1 >= 0) {
1528: tmp1[nfnodeLoc] = node1;
1529: tmp2[node1] = nfnodeLoc;
1530: ftmp[j++] = grid->fxn[i];
1531: ftmp[j++] = grid->fyn[i];
1532: ftmp[j++] = grid->fzn[i];
1533: ftmp[j++] = grid->fa[i];
1534: nfnodeLoc++;
1535: }
1536: }
1537: for (i = 0; i < nfbound; i++) {
1538: for (j = isurf; j < isurf + grid->nftet[i]; j++) {
1539: node1 = a2l[grid->ifnode[grid->f2ntf[j] - 1] - 1];
1540: node2 = a2l[grid->ifnode[grid->f2ntf[nffacet + j] - 1] - 1];
1541: node3 = a2l[grid->ifnode[grid->f2ntf[2*nffacet + j] - 1] - 1];
1542: if ((node1 >= 0) && (node2 >= 0) && (node3 >= 0)) {
1543: nffacetLoc++;
1544: nte++;
1545: tmp[nb++] = tmp2[node1];
1546: tmp[nb++] = tmp2[node2];
1547: tmp[nb++] = tmp2[node3];
1548: }
1549: }
1550: isurf += grid->nftet[i];
1551: grid->nftet[i] = nte;
1552: nte = 0;
1553: }
1554: PetscFree(grid->f2ntf);
1555: PetscFree(grid->ifnode);
1556: PetscFree(grid->fxn);
1557: PetscFree(grid->fyn);
1558: PetscFree(grid->fzn);
1559: PetscFree(grid->fa);
1560: ICALLOC(4*nffacetLoc,&grid->f2ntf);
1561: ICALLOC(nfnodeLoc,&grid->ifnode);
1562: FCALLOC(nfnodeLoc,&grid->fxn);
1563: FCALLOC(nfnodeLoc,&grid->fyn);
1564: FCALLOC(nfnodeLoc,&grid->fzn);
1565: FCALLOC(nfnodeLoc,&grid->fa);
1566: j = 0;
1567: for (i = 0; i < nfnodeLoc; i++) {
1568: grid->ifnode[i] = tmp1[i] + 1;
1569: grid->fxn[i] = ftmp[j++];
1570: grid->fyn[i] = ftmp[j++];
1571: grid->fzn[i] = ftmp[j++];
1572: grid->fa[i] = ftmp[j++];
1573: }
1574: j = 0;
1575: for (i = 0; i < nffacetLoc; i++) {
1576: grid->f2ntf[i] = tmp[j++] + 1;
1577: grid->f2ntf[nffacetLoc+i] = tmp[j++] + 1;
1578: grid->f2ntf[2*nffacetLoc+i] = tmp[j++] + 1;
1579: }
1581: PetscFree(tmp);
1582: PetscFree(tmp1);
1583: PetscFree(tmp2);
1584: PetscFree(ftmp);
1585: PetscPrintf(comm,"Free boundaries partitioned\n");
1587: flg = PETSC_FALSE;
1588: PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
1589: if (flg) {
1590: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after partitioning\n");
1591: }
1593: /* Put different mappings and other info into grid */
1594: /* ICALLOC(nvertices,&grid->loc2pet);
1595: ICALLOC(nvertices,&grid->loc2glo);
1596: PetscMemcpy(grid->loc2pet,l2p,nvertices*sizeof(int));
1597: PetscMemcpy(grid->loc2glo,l2a,nvertices*sizeof(int));
1598: PetscFree(l2a);
1599: PetscFree(l2p);*/
1601: grid->nnodesLoc = nnodesLoc;
1602: grid->nedgeLoc = nedgeLoc;
1603: grid->nvertices = nvertices;
1604: grid->nsnodeLoc = nsnodeLoc;
1605: grid->nvnodeLoc = nvnodeLoc;
1606: grid->nfnodeLoc = nfnodeLoc;
1607: grid->nnfacetLoc = nnfacetLoc;
1608: grid->nvfacetLoc = nvfacetLoc;
1609: grid->nffacetLoc = nffacetLoc;
1610: /*
1611: * FCALLOC(nvertices*4, &grid->gradx);
1612: * FCALLOC(nvertices*4, &grid->grady);
1613: * FCALLOC(nvertices*4, &grid->gradz);
1614: */
1615: FCALLOC(nvertices, &grid->cdt);
1616: FCALLOC(nvertices*4, &grid->phi);
1617: /*
1618: FCALLOC(nvertices, &grid->r11);
1619: FCALLOC(nvertices, &grid->r12);
1620: FCALLOC(nvertices, &grid->r13);
1621: FCALLOC(nvertices, &grid->r22);
1622: FCALLOC(nvertices, &grid->r23);
1623: FCALLOC(nvertices, &grid->r33);
1624: */
1625: FCALLOC(7*nnodesLoc, &grid->rxy);
1627: /* Map the 'ja' array in petsc ordering */
1628: for (i = 0; i < nnz; i++) grid->ja[i] = l2a[grid->ja[i] - 1];
1629: AOApplicationToPetsc(ao,nnz,grid->ja);
1630: AODestroy(&ao);
1632: /* Print the different mappings
1633: *
1634: */
1635: {
1636: int partLoc[7],partMax[7],partMin[7],partSum[7];
1637: partLoc[0] = nnodesLoc;
1638: partLoc[1] = nvertices;
1639: partLoc[2] = nedgeLoc;
1640: partLoc[3] = nnfacetLoc;
1641: partLoc[4] = nffacetLoc;
1642: partLoc[5] = nsnodeLoc;
1643: partLoc[6] = nfnodeLoc;
1644: for (i = 0; i < 7; i++) {
1645: partMin[i] = 0;
1646: partMax[i] = 0;
1647: partSum[i] = 0;
1648: }
1650: MPI_Allreduce(partLoc,partMax,7,MPI_INT,MPI_MAX,comm);
1651: MPI_Allreduce(partLoc,partMin,7,MPI_INT,MPI_MIN,comm);
1652: MPI_Allreduce(partLoc,partSum,7,MPI_INT,MPI_SUM,comm);
1653: PetscPrintf(comm,"==============================\n");
1654: PetscPrintf(comm,"Partitioning quality info ....\n");
1655: PetscPrintf(comm,"==============================\n");
1656: PetscPrintf(comm,"------------------------------------------------------------\n");
1657: PetscPrintf(comm,"Item Min Max Average Total\n");
1658: PetscPrintf(comm,"------------------------------------------------------------\n");
1659: PetscPrintf(comm,"Local Nodes %9d %9d %9d %9d\n",
1660: partMin[0],partMax[0],partSum[0]/size,partSum[0]);
1661: PetscPrintf(comm,"Local+Ghost Nodes %9d %9d %9d %9d\n",
1662: partMin[1],partMax[1],partSum[1]/size,partSum[1]);
1663: PetscPrintf(comm,"Local Edges %9d %9d %9d %9d\n",
1664: partMin[2],partMax[2],partSum[2]/size,partSum[2]);
1665: PetscPrintf(comm,"Local solid faces %9d %9d %9d %9d\n",
1666: partMin[3],partMax[3],partSum[3]/size,partSum[3]);
1667: PetscPrintf(comm,"Local free faces %9d %9d %9d %9d\n",
1668: partMin[4],partMax[4],partSum[4]/size,partSum[4]);
1669: PetscPrintf(comm,"Local solid nodes %9d %9d %9d %9d\n",
1670: partMin[5],partMax[5],partSum[5]/size,partSum[5]);
1671: PetscPrintf(comm,"Local free nodes %9d %9d %9d %9d\n",
1672: partMin[6],partMax[6],partSum[6]/size,partSum[6]);
1673: PetscPrintf(comm,"------------------------------------------------------------\n");
1674: }
1675: flg = PETSC_FALSE;
1676: PetscOptionsGetBool(0,"-partition_info",&flg,NULL);
1677: if (flg) {
1678: char part_file[PETSC_MAX_PATH_LEN];
1679: sprintf(part_file,"output.%d",rank);
1680: fptr1 = fopen(part_file,"w");
1682: fprintf(fptr1,"---------------------------------------------\n");
1683: fprintf(fptr1,"Local and Global Grid Parameters are :\n");
1684: fprintf(fptr1,"---------------------------------------------\n");
1685: fprintf(fptr1,"Local\t\t\t\tGlobal\n");
1686: fprintf(fptr1,"nnodesLoc = %d\t\tnnodes = %d\n",nnodesLoc,nnodes);
1687: fprintf(fptr1,"nedgeLoc = %d\t\t\tnedge = %d\n",nedgeLoc,nedge);
1688: fprintf(fptr1,"nnfacetLoc = %d\t\tnnfacet = %d\n",nnfacetLoc,nnfacet);
1689: fprintf(fptr1,"nvfacetLoc = %d\t\t\tnvfacet = %d\n",nvfacetLoc,nvfacet);
1690: fprintf(fptr1,"nffacetLoc = %d\t\t\tnffacet = %d\n",nffacetLoc,nffacet);
1691: fprintf(fptr1,"nsnodeLoc = %d\t\t\tnsnode = %d\n",nsnodeLoc,nsnode);
1692: fprintf(fptr1,"nvnodeLoc = %d\t\t\tnvnode = %d\n",nvnodeLoc,nvnode);
1693: fprintf(fptr1,"nfnodeLoc = %d\t\t\tnfnode = %d\n",nfnodeLoc,nfnode);
1694: fprintf(fptr1,"\n");
1695: fprintf(fptr1,"nvertices = %d\n",nvertices);
1696: fprintf(fptr1,"nnbound = %d\n",nnbound);
1697: fprintf(fptr1,"nvbound = %d\n",nvbound);
1698: fprintf(fptr1,"nfbound = %d\n",nfbound);
1699: fprintf(fptr1,"\n");
1701: fprintf(fptr1,"---------------------------------------------\n");
1702: fprintf(fptr1,"Different Orderings\n");
1703: fprintf(fptr1,"---------------------------------------------\n");
1704: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1705: fprintf(fptr1,"---------------------------------------------\n");
1706: for (i = 0; i < nvertices; i++) fprintf(fptr1,"%d\t\t%d\t\t%d\n",i,grid->loc2pet[i],grid->loc2glo[i]);
1707: fprintf(fptr1,"\n");
1709: fprintf(fptr1,"---------------------------------------------\n");
1710: fprintf(fptr1,"Solid Boundary Nodes\n");
1711: fprintf(fptr1,"---------------------------------------------\n");
1712: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1713: fprintf(fptr1,"---------------------------------------------\n");
1714: for (i = 0; i < nsnodeLoc; i++) {
1715: j = grid->isnode[i]-1;
1716: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1717: }
1718: fprintf(fptr1,"\n");
1719: fprintf(fptr1,"---------------------------------------------\n");
1720: fprintf(fptr1,"f2ntn array\n");
1721: fprintf(fptr1,"---------------------------------------------\n");
1722: for (i = 0; i < nnfacetLoc; i++) {
1723: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntn[i],
1724: grid->f2ntn[nnfacetLoc+i],grid->f2ntn[2*nnfacetLoc+i]);
1725: }
1726: fprintf(fptr1,"\n");
1728: fprintf(fptr1,"---------------------------------------------\n");
1729: fprintf(fptr1,"Viscous Boundary Nodes\n");
1730: fprintf(fptr1,"---------------------------------------------\n");
1731: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1732: fprintf(fptr1,"---------------------------------------------\n");
1733: for (i = 0; i < nvnodeLoc; i++) {
1734: j = grid->ivnode[i]-1;
1735: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1736: }
1737: fprintf(fptr1,"\n");
1738: fprintf(fptr1,"---------------------------------------------\n");
1739: fprintf(fptr1,"f2ntv array\n");
1740: fprintf(fptr1,"---------------------------------------------\n");
1741: for (i = 0; i < nvfacetLoc; i++) {
1742: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntv[i],
1743: grid->f2ntv[nvfacetLoc+i],grid->f2ntv[2*nvfacetLoc+i]);
1744: }
1745: fprintf(fptr1,"\n");
1747: fprintf(fptr1,"---------------------------------------------\n");
1748: fprintf(fptr1,"Free Boundary Nodes\n");
1749: fprintf(fptr1,"---------------------------------------------\n");
1750: fprintf(fptr1,"Local\t\tPETSc\t\tGlobal\n");
1751: fprintf(fptr1,"---------------------------------------------\n");
1752: for (i = 0; i < nfnodeLoc; i++) {
1753: j = grid->ifnode[i]-1;
1754: fprintf(fptr1,"%d\t\t%d\t\t%d\n",j,grid->loc2pet[j],grid->loc2glo[j]);
1755: }
1756: fprintf(fptr1,"\n");
1757: fprintf(fptr1,"---------------------------------------------\n");
1758: fprintf(fptr1,"f2ntf array\n");
1759: fprintf(fptr1,"---------------------------------------------\n");
1760: for (i = 0; i < nffacetLoc; i++) {
1761: fprintf(fptr1,"%d\t\t%d\t\t%d\t\t%d\n",i,grid->f2ntf[i],
1762: grid->f2ntf[nffacetLoc+i],grid->f2ntf[2*nffacetLoc+i]);
1763: }
1764: fprintf(fptr1,"\n");
1766: fprintf(fptr1,"---------------------------------------------\n");
1767: fprintf(fptr1,"Neighborhood Info In Various Ordering\n");
1768: fprintf(fptr1,"---------------------------------------------\n");
1769: ICALLOC(nnodes,&p2l);
1770: for (i = 0; i < nvertices; i++) p2l[grid->loc2pet[i]] = i;
1771: for (i = 0; i < nnodesLoc; i++) {
1772: jstart = grid->ia[grid->loc2glo[i]] - 1;
1773: jend = grid->ia[grid->loc2glo[i]+1] - 1;
1774: fprintf(fptr1,"Neighbors of Node %d in Local Ordering are :",i);
1775: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",p2l[grid->ja[j]]);
1776: fprintf(fptr1,"\n");
1778: fprintf(fptr1,"Neighbors of Node %d in PETSc ordering are :",grid->loc2pet[i]);
1779: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->ja[j]);
1780: fprintf(fptr1,"\n");
1782: fprintf(fptr1,"Neighbors of Node %d in Global Ordering are :",grid->loc2glo[i]);
1783: for (j = jstart; j < jend; j++) fprintf(fptr1,"%d ",grid->loc2glo[p2l[grid->ja[j]]]);
1784: fprintf(fptr1,"\n");
1786: }
1787: fprintf(fptr1,"\n");
1788: PetscFree(p2l);
1789: fclose(fptr1);
1790: }
1792: /* Free the temporary arrays */
1793: PetscFree(a2l);
1794: MPI_Barrier(comm);
1795: return 0;
1796: }
1798: /*
1799: encode 3 8-bit binary bytes as 4 '6-bit' characters, len is the number of bytes remaining, at most 3 are used
1800: */
1801: void *base64_encodeblock(void *vout,const void *vin,int len)
1802: {
1803: unsigned char *out = (unsigned char*)vout,in[3] = {0,0,0};
1804: /* Translation Table as described in RFC1113 */
1805: static const char cb64[]="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
1806: memcpy(in,vin,PetscMin(len,3));
1807: out[0] = cb64[in[0] >> 2];
1808: out[1] = cb64[((in[0] & 0x03) << 4) | ((in[1] & 0xf0) >> 4)];
1809: out[2] = (unsigned char) (len > 1 ? cb64[((in[1] & 0x0f) << 2) | ((in[2] & 0xc0) >> 6)] : '=');
1810: out[3] = (unsigned char) (len > 2 ? cb64[in[2] & 0x3f] : '=');
1811: return (void*)(out+4);
1812: }
1814: /* Write binary data, does not do byte swapping. */
1815: static PetscErrorCode PetscFWrite_FUN3D(MPI_Comm comm,FILE *fp,void *data,PetscInt n,PetscDataType dtype,PetscBool base64)
1816: {
1817: PetscMPIInt rank;
1820: if (!n) return 0;
1821: MPI_Comm_rank(comm,&rank);
1822: if (rank == 0) {
1823: size_t count;
1824: int bytes;
1825: switch (dtype) {
1826: case PETSC_DOUBLE:
1827: size = sizeof(double);
1828: break;
1829: case PETSC_FLOAT:
1830: size = sizeof(float);
1831: break;
1832: case PETSC_INT:
1833: size = sizeof(PetscInt);
1834: break;
1835: case PETSC_CHAR:
1836: size = sizeof(char);
1837: break;
1838: default: SETERRQ(comm,PETSC_ERR_SUP,"Data type not supported");
1839: }
1840: bytes = size*n;
1841: if (base64) {
1842: unsigned char *buf,*ptr;
1843: int i;
1844: size_t b64alloc = 9 + (n*size*4) / 3 + (n*size*4) % 3;
1845: PetscMalloc(b64alloc,&buf);
1846: ptr = buf;
1847: ptr = (unsigned char*)base64_encodeblock(ptr,&bytes,3);
1848: ptr = (unsigned char*)base64_encodeblock(ptr,((char*)&bytes)+3,1);
1849: for (i=0; i<bytes; i+=3) {
1850: int left = bytes - i;
1851: ptr = (unsigned char*)base64_encodeblock(ptr,((char*)data)+i,left);
1852: }
1853: *ptr++ = '\n';
1854: /* printf("encoded 4+%d raw bytes in %zd base64 chars, allocated for %zd\n",bytes,ptr-buf,b64alloc); */
1855: count = fwrite(buf,1,ptr-buf,fp);
1856: if (count < (size_t)(ptr-buf)) {
1857: perror("");
1858: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %" PetscInt_FMT " of %" PetscInt_FMT " bytes",(PetscInt)count,(PetscInt)(ptr-buf));
1859: }
1860: PetscFree(buf);
1861: } else {
1862: count = fwrite(&bytes,sizeof(int),1,fp);
1863: if (count != 1) {
1864: perror("");
1865: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Error writing byte count");
1866: }
1867: count = fwrite(data,size,(size_t)n,fp);
1868: if ((int)count != n) {
1869: perror("");
1870: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_WRITE,"Wrote %" PetscInt_FMT "/%" PetscInt_FMT " array members of size %" PetscInt_FMT,(PetscInt)count,n,(PetscInt)size);
1871: }
1872: }
1873: }
1874: return 0;
1875: }
1877: static void SortInt2(PetscInt *a,PetscInt *b)
1878: {
1879: if (*b < *a) {
1880: PetscInt c = *b;
1881: *b = *a;
1882: *a = c;
1883: }
1884: }
1886: /* b = intersection(a,b) */
1887: static PetscErrorCode IntersectInt(PetscInt na,const PetscInt *a,PetscInt *nb,PetscInt *b)
1888: {
1889: PetscInt i,n,j;
1891: j = 0;
1892: n = 0;
1893: for (i=0; i<*nb; i++) {
1894: while (j<na && a[j]<b[i]) j++;
1895: if (j<na && a[j]==b[i]) {
1896: b[n++] = b[i];
1897: j++;
1898: }
1899: }
1900: *nb = n;
1901: return 0;
1902: }
1904: /*
1905: This function currently has a semantic bug: it only produces cells containing all local edges. Since the local mesh
1906: does not even store edges between unowned nodes, primal cells that are effectively shared between processes will not
1907: be constructed. This causes visualization artifacts.
1909: This issue could be resolved by either (a) storing more edges from the original mesh or (b) communicating an extra
1910: layer of edges in this function.
1911: */
1912: static PetscErrorCode InferLocalCellConnectivity(PetscInt nnodes,PetscInt nedge,const PetscInt *eptr,PetscInt *incell,PetscInt **iconn)
1913: {
1914: PetscInt ncell,acell,(*conn)[4],node0,node1,node2,node3,i,j,k,l,rowmax;
1915: PetscInt *ui,*uj,*utmp,*tmp1,*tmp2,*tmp3,ntmp1,ntmp2,ntmp3;
1916: #if defined(INTERLACING)
1917: # define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i*2+0]-1; n2 = eptr[i*2+1]-1; } while (0)
1918: #else
1919: # define GetEdge(eptr,i,n1,n2) do { n1 = eptr[i+0*nedge]-1; n2 = eptr[i+1*nedge]-1; } while (0)
1920: #endif
1922: *incell = -1;
1923: *iconn = NULL;
1924: acell = 100000; /* allocate for this many cells */
1925: PetscMalloc1(acell,&conn);
1926: PetscMalloc2(nnodes+1,&ui,nedge,&uj);
1927: PetscCalloc1(nnodes,&utmp);
1928: /* count the number of edges in the upper-triangular matrix u */
1929: for (i=0; i<nedge; i++) { /* count number of nonzeros in upper triangular matrix */
1930: GetEdge(eptr,i,node0,node1);
1931: utmp[PetscMin(node0,node1)]++;
1932: }
1933: rowmax = 0;
1934: ui[0] = 0;
1935: for (i=0; i<nnodes; i++) {
1936: rowmax = PetscMax(rowmax,utmp[i]);
1937: ui[i+1] = ui[i] + utmp[i]; /* convert from count to row offsets */
1938: utmp[i] = 0;
1939: }
1940: for (i=0; i<nedge; i++) { /* assemble upper triangular matrix U */
1941: GetEdge(eptr,i,node0,node1);
1942: SortInt2(&node0,&node1);
1943: uj[ui[node0] + utmp[node0]++] = node1;
1944: }
1945: PetscFree(utmp);
1946: for (i=0; i<nnodes; i++) { /* sort every row */
1947: PetscInt n = ui[i+1] - ui[i];
1948: PetscSortInt(n,&uj[ui[i]]);
1949: }
1951: /* Infer cells */
1952: ncell = 0;
1953: PetscMalloc3(rowmax,&tmp1,rowmax,&tmp2,rowmax,&tmp3);
1954: for (i=0; i<nnodes; i++) {
1955: node0 = i;
1956: ntmp1 = ui[node0+1] - ui[node0]; /* Number of candidates for node1 */
1957: PetscMemcpy(tmp1,&uj[ui[node0]],ntmp1*sizeof(PetscInt));
1958: for (j=0; j<ntmp1; j++) {
1959: node1 = tmp1[j];
1962: ntmp2 = ui[node1+1] - ui[node1];
1963: PetscMemcpy(tmp2,&uj[ui[node1]],ntmp2*sizeof(PetscInt));
1964: IntersectInt(ntmp1,tmp1,&ntmp2,tmp2);
1965: for (k=0; k<ntmp2; k++) {
1966: node2 = tmp2[k];
1969: ntmp3 = ui[node2+1] - ui[node2];
1970: PetscMemcpy(tmp3,&uj[ui[node2]],ntmp3*sizeof(PetscInt));
1971: IntersectInt(ntmp2,tmp2,&ntmp3,tmp3);
1972: for (l=0; l<ntmp3; l++) {
1973: node3 = tmp3[l];
1977: if (ntmp3 < 3) continue;
1978: conn[ncell][0] = node0;
1979: conn[ncell][1] = node1;
1980: conn[ncell][2] = node2;
1981: conn[ncell][3] = node3;
1982: if (0) {
1983: PetscViewer viewer = PETSC_VIEWER_STDOUT_WORLD;
1984: PetscViewerASCIIPrintf(viewer,"created cell %d: %d %d %d %d\n",ncell,node0,node1,node2,node3);
1985: PetscIntView(ntmp1,tmp1,viewer);
1986: PetscIntView(ntmp2,tmp2,viewer);
1987: PetscIntView(ntmp3,tmp3,viewer);
1988: /* uns3d.msh has a handful of "tetrahedra" that overlap by violating the following condition. As far as I
1989: * can tell, that means it is an invalid mesh. I don't know what the intent was. */
1991: }
1992: ncell++;
1993: }
1994: }
1995: }
1996: }
1997: PetscFree3(tmp1,tmp2,tmp3);
1998: PetscFree2(ui,uj);
2000: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"Inferred %" PetscInt_FMT " cells with nnodes=%" PetscInt_FMT " nedge=%" PetscInt_FMT "\n",ncell,nnodes,nedge);
2001: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2002: *incell = ncell;
2003: *iconn = (PetscInt*)conn;
2004: return 0;
2005: }
2007: static PetscErrorCode GridCompleteOverlap(GRID *grid,PetscInt *invertices,PetscInt *inedgeOv,PetscInt **ieptrOv)
2008: {
2009: PetscInt nedgeLoc,nedgeOv,i,j,cnt,node0,node1,node0p,node1p,nnodes,nnodesLoc,nvertices,rstart,nodeEdgeCountAll,nodeEdgeRstart;
2010: PetscInt *nodeEdgeCount,*nodeEdgeOffset,*eIdxOv,*p2l,*eptrOv;
2011: Vec VNodeEdge,VNodeEdgeInfo,VNodeEdgeInfoOv,VNodeEdgeOv;
2012: PetscScalar *vne,*vnei;
2013: IS isglobal,isedgeOv;
2014: VecScatter nescat,neiscat;
2015: PetscBool flg;
2017: nnodes = grid->nnodes; /* Total number of global nodes */
2018: nnodesLoc = grid->nnodesLoc; /* Number of owned nodes */
2019: nvertices = grid->nvertices; /* Number of owned+ghosted nodes */
2020: nedgeLoc = grid->nedgeLoc; /* Number of edges connected to owned nodes */
2022: /* Count the number of neighbors of each owned node */
2023: MPI_Scan(&nnodesLoc,&rstart,1,MPIU_INT,MPI_SUM,PETSC_COMM_WORLD);
2024: rstart -= nnodesLoc;
2025: PetscMalloc2(nnodesLoc,&nodeEdgeCount,nnodesLoc,&nodeEdgeOffset);
2026: PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2027: for (i=0; i<nedgeLoc; i++) {
2028: GetEdge(grid->eptr,i,node0,node1);
2029: node0p = grid->loc2pet[node0];
2030: node1p = grid->loc2pet[node1];
2031: if (rstart <= node0p && node0p < rstart+nnodesLoc) nodeEdgeCount[node0p-rstart]++;
2032: if (rstart <= node1p && node1p < rstart+nnodesLoc) nodeEdgeCount[node1p-rstart]++;
2033: }
2034: /* Get the offset in the node-based edge array */
2035: nodeEdgeOffset[0] = 0;
2036: for (i=0; i<nnodesLoc-1; i++) nodeEdgeOffset[i+1] = nodeEdgeOffset[i] + nodeEdgeCount[i];
2037: nodeEdgeCountAll = nodeEdgeCount[nnodesLoc-1] + nodeEdgeOffset[nnodesLoc-1];
2039: /* Pack a Vec by node of all the edges for that node. The nodes are stored by global index */
2040: VecCreateMPI(PETSC_COMM_WORLD,nodeEdgeCountAll,PETSC_DETERMINE,&VNodeEdge);
2041: PetscMemzero(nodeEdgeCount,nnodesLoc*sizeof(*nodeEdgeCount));
2042: VecGetArray(VNodeEdge,&vne);
2043: for (i=0; i<nedgeLoc; i++) {
2044: GetEdge(grid->eptr,i,node0,node1);
2045: node0p = grid->loc2pet[node0];
2046: node1p = grid->loc2pet[node1];
2047: if (rstart <= node0p && node0p < rstart+nnodesLoc) vne[nodeEdgeOffset[node0p-rstart] + nodeEdgeCount[node0p-rstart]++] = node1p;
2048: if (rstart <= node1p && node1p < rstart+nnodesLoc) vne[nodeEdgeOffset[node1p-rstart] + nodeEdgeCount[node1p-rstart]++] = node0p;
2049: }
2050: VecRestoreArray(VNodeEdge,&vne);
2051: VecGetOwnershipRange(VNodeEdge,&nodeEdgeRstart,NULL);
2053: /* Move the count and offset into a Vec so that we can use VecScatter, translating offset from local to global */
2054: VecCreate(PETSC_COMM_WORLD,&VNodeEdgeInfo);
2055: VecSetSizes(VNodeEdgeInfo,2*nnodesLoc,2*nnodes);
2056: VecSetBlockSize(VNodeEdgeInfo,2);
2057: VecSetType(VNodeEdgeInfo,VECMPI);
2059: VecGetArray(VNodeEdgeInfo,&vnei);
2060: for (i=0; i<nnodesLoc; i++) {
2061: vnei[i*2+0] = nodeEdgeCount[i]; /* Total number of edges from this vertex */
2062: vnei[i*2+1] = nodeEdgeOffset[i] + nodeEdgeRstart; /* Now the global index in the next comm round */
2063: }
2064: VecRestoreArray(VNodeEdgeInfo,&vnei);
2065: PetscFree2(nodeEdgeCount,nodeEdgeOffset);
2067: /* Create a Vec to receive the edge count and global offset for each node in owned+ghosted, get them, and clean up */
2068: VecCreate(PETSC_COMM_SELF,&VNodeEdgeInfoOv);
2069: VecSetSizes(VNodeEdgeInfoOv,2*nvertices,2*nvertices);
2070: VecSetBlockSize(VNodeEdgeInfoOv,2);
2071: VecSetType(VNodeEdgeInfoOv,VECSEQ);
2073: ISCreateBlock(PETSC_COMM_WORLD,2,nvertices,grid->loc2pet,PETSC_COPY_VALUES,&isglobal); /* Address the nodes in overlap to get info from */
2074: VecScatterCreate(VNodeEdgeInfo,isglobal,VNodeEdgeInfoOv,NULL,&neiscat);
2075: VecScatterBegin(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2076: VecScatterEnd(neiscat,VNodeEdgeInfo,VNodeEdgeInfoOv,INSERT_VALUES,SCATTER_FORWARD);
2077: VecScatterDestroy(&neiscat);
2078: VecDestroy(&VNodeEdgeInfo);
2079: ISDestroy(&isglobal);
2081: /* Create a Vec to receive the actual edges for all nodes (owned and ghosted), execute the scatter */
2082: nedgeOv = 0; /* First count the number of edges in the complete overlap */
2083: VecGetArray(VNodeEdgeInfoOv,&vnei);
2084: for (i=0; i<nvertices; i++) nedgeOv += (PetscInt)vnei[2*i+0];
2085: /* Allocate for the global indices in VNodeEdge of the edges to receive */
2086: PetscMalloc1(nedgeOv,&eIdxOv);
2087: for (i=0,cnt=0; i<nvertices; i++) {
2088: for (j=0; j<(PetscInt)vnei[2*i+0]; j++) eIdxOv[cnt++] = (PetscInt)vnei[2*i+1] + j;
2089: }
2090: VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2091: ISCreateGeneral(PETSC_COMM_WORLD,nedgeOv,eIdxOv,PETSC_USE_POINTER,&isedgeOv);
2092: VecCreateSeq(PETSC_COMM_SELF,nedgeOv,&VNodeEdgeOv);
2093: VecScatterCreate(VNodeEdge,isedgeOv,VNodeEdgeOv,NULL,&nescat);
2094: VecScatterBegin(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2095: VecScatterEnd(nescat,VNodeEdge,VNodeEdgeOv,INSERT_VALUES,SCATTER_FORWARD);
2096: VecScatterDestroy(&nescat);
2097: VecDestroy(&VNodeEdge);
2098: ISDestroy(&isedgeOv);
2099: PetscFree(eIdxOv);
2101: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of edges before pruning: %" PetscInt_FMT ", half=%" PetscInt_FMT "\n",rank,PETSC_FUNCTION_NAME,nedgeOv,nedgeOv/2);
2103: /* Create the non-scalable global-to-local index map. Yuck, but it has already been used elsewhere. */
2104: PetscMalloc1(nnodes,&p2l);
2105: for (i=0; i<nnodes; i++) p2l[i] = -1;
2106: for (i=0; i<nvertices; i++) p2l[grid->loc2pet[i]] = i;
2107: if (1) {
2108: PetscInt m = 0;
2109: for (i=0; i<nnodes; i++) m += (p2l[i] >= 0);
2110: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: number of global indices that map to local indices: %" PetscInt_FMT "; nvertices=%" PetscInt_FMT " nnodesLoc=%" PetscInt_FMT " nnodes=%" PetscInt_FMT "\n",rank,PETSC_FUNCTION_NAME,m,nvertices,nnodesLoc,nnodes);
2111: }
2113: /* Log each edge connecting nodes in owned+ghosted exactly once */
2114: VecGetArray(VNodeEdgeInfoOv,&vnei);
2115: VecGetArray(VNodeEdgeOv,&vne);
2116: /* First count the number of edges to keep */
2117: nedgeOv = 0;
2118: for (i=0,cnt=0; i<nvertices; i++) {
2119: PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2120: node0 = i;
2121: for (j=0; j<n; j++) {
2122: node1p = vne[cnt++];
2123: node1 = p2l[node1p];
2124: if (node0 < node1) nedgeOv++;
2125: }
2126: }
2127: /* Array of edges to keep */
2128: PetscMalloc1(2*nedgeOv,&eptrOv);
2129: nedgeOv = 0;
2130: for (i=0,cnt=0; i<nvertices; i++) {
2131: PetscInt n = (PetscInt)vnei[2*i+0]; /* number of nodes connected to i */
2132: node0 = i;
2133: for (j=0; j<n; j++) {
2134: node1p = vne[cnt++];
2135: node1 = p2l[node1p];
2136: if (node0 < node1) {
2137: eptrOv[2*nedgeOv+0] = node0;
2138: eptrOv[2*nedgeOv+1] = node1;
2139: nedgeOv++;
2140: }
2141: }
2142: }
2143: VecRestoreArray(VNodeEdgeInfoOv,&vnei);
2144: VecRestoreArray(VNodeEdgeOv,&vne);
2145: VecDestroy(&VNodeEdgeInfoOv);
2146: VecDestroy(&VNodeEdgeOv);
2147: PetscFree(p2l);
2149: PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] %s: nedgeLoc=%" PetscInt_FMT " nedgeOv=%" PetscInt_FMT "\n",rank,PETSC_FUNCTION_NAME,nedgeLoc,nedgeOv);
2150: PetscSynchronizedFlush(PETSC_COMM_WORLD,PETSC_STDOUT);
2152: flg = PETSC_TRUE;
2153: PetscOptionsGetBool(NULL,NULL,"-complete_overlap",&flg,NULL);
2154: if (flg) {
2155: *invertices = grid->nvertices; /* We did not change the number of vertices */
2156: *inedgeOv = nedgeOv;
2157: *ieptrOv = eptrOv;
2158: } else {
2159: *invertices = grid->nvertices;
2160: *inedgeOv = nedgeLoc;
2161: PetscFree(eptrOv);
2162: PetscMalloc1(2*nedgeLoc,&eptrOv);
2163: PetscMemcpy(eptrOv,grid->eptr,2*nedgeLoc*sizeof(PetscInt));
2164: *ieptrOv = eptrOv;
2165: }
2166: return 0;
2167: }
2169: static PetscErrorCode WritePVTU(AppCtx *user,const char *fname,PetscBool base64)
2170: {
2171: GRID *grid = user->grid;
2172: TstepCtx *tsCtx = user->tsCtx;
2173: FILE *vtu,*pvtu;
2174: char pvtu_fname[PETSC_MAX_PATH_LEN],vtu_fname[PETSC_MAX_PATH_LEN];
2175: MPI_Comm comm;
2176: PetscMPIInt rank,size;
2177: PetscInt i,nvertices = 0,nedgeLoc = 0,ncells,bs,nloc,boffset = 0,*eptr = NULL;
2178: PetscErrorCode ierr;
2179: Vec Xloc,Xploc,Xuloc;
2180: unsigned char *celltype;
2181: int *celloffset,*conn,*cellrank;
2182: const PetscScalar *x;
2183: PetscScalar *xu,*xp;
2184: const char *byte_order = PetscBinaryBigEndian() ? "BigEndian" : "LittleEndian";
2186: GridCompleteOverlap(user->grid,&nvertices,&nedgeLoc,&eptr);
2187: comm = PETSC_COMM_WORLD;
2188: MPI_Comm_rank(comm,&rank);
2189: MPI_Comm_size(comm,&size);
2190: #if defined(PETSC_USE_COMPLEX) || !defined(PETSC_USE_REAL_DOUBLE) || defined(PETSC_USE_64BIT_INDICES)
2191: SETERRQ(comm,PETSC_ERR_SUP,"This function is only implemented for scalar-type=real precision=double, 32-bit indices");
2192: #endif
2193: PetscSNPrintf(pvtu_fname,sizeof(pvtu_fname),"%s-%" PetscInt_FMT ".pvtu",fname,tsCtx->itstep);
2194: PetscSNPrintf(vtu_fname,sizeof(vtu_fname),"%s-%" PetscInt_FMT "-%" PetscInt_FMT ".vtu",fname,tsCtx->itstep,rank);
2195: PetscFOpen(comm,pvtu_fname,"w",&pvtu);
2196: PetscFPrintf(comm,pvtu,"<?xml version=\"1.0\"?>\n");
2197: PetscFPrintf(comm,pvtu,"<VTKFile type=\"PUnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2198: PetscFPrintf(comm,pvtu," <PUnstructuredGrid GhostLevel=\"0\">\n");
2199: PetscFPrintf(comm,pvtu," <PPointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2200: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" />\n");
2201: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" />\n");
2202: PetscFPrintf(comm,pvtu," </PPointData>\n");
2203: PetscFPrintf(comm,pvtu," <PCellData>\n");
2204: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" />\n");
2205: PetscFPrintf(comm,pvtu," </PCellData>\n");
2206: PetscFPrintf(comm,pvtu," <PPoints>\n");
2207: PetscFPrintf(comm,pvtu," <PDataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" />\n");
2208: PetscFPrintf(comm,pvtu," </PPoints>\n");
2209: PetscFPrintf(comm,pvtu," <PCells>\n");
2210: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" />\n");
2211: PetscFPrintf(comm,pvtu," <PDataArray type=\"Int32\" Name=\"offsets\" NumberOfComponents=\"1\" />\n");
2212: PetscFPrintf(comm,pvtu," <PDataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" />\n");
2213: PetscFPrintf(comm,pvtu," </PCells>\n");
2214: for (i=0; i<size; i++) {
2215: PetscFPrintf(comm,pvtu," <Piece Source=\"%s-%" PetscInt_FMT "-%" PetscInt_FMT ".vtu\" />\n",fname,tsCtx->itstep,i);
2216: }
2217: PetscFPrintf(comm,pvtu," </PUnstructuredGrid>\n");
2218: PetscFPrintf(comm,pvtu,"</VTKFile>\n");
2219: PetscFClose(comm,pvtu);
2221: Xloc = grid->qnodeLoc;
2222: VecScatterBegin(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2223: VecScatterEnd(grid->scatter,grid->qnode,Xloc,INSERT_VALUES,SCATTER_FORWARD);
2224: VecGetBlockSize(Xloc,&bs);
2226: VecGetSize(Xloc,&nloc);
2229: VecCreateSeq(PETSC_COMM_SELF,nvertices,&Xploc);
2231: VecCreate(PETSC_COMM_SELF,&Xuloc);
2232: VecSetSizes(Xuloc,3*nvertices,3*nvertices);
2233: VecSetBlockSize(Xuloc,3);
2234: VecSetType(Xuloc,VECSEQ);
2236: VecGetArrayRead(Xloc,&x);
2237: VecGetArray(Xploc,&xp);
2238: VecGetArray(Xuloc,&xu);
2239: for (i=0; i<nvertices; i++) {
2240: xp[i] = x[i*4+0];
2241: xu[i*3+0] = x[i*4+1];
2242: xu[i*3+1] = x[i*4+2];
2243: xu[i*3+2] = x[i*4+3];
2244: }
2245: VecRestoreArrayRead(Xloc,&x);
2247: InferLocalCellConnectivity(nvertices,nedgeLoc,eptr,&ncells,&conn);
2249: PetscFOpen(PETSC_COMM_SELF,vtu_fname,"w",&vtu);
2250: PetscFPrintf(PETSC_COMM_SELF,vtu,"<?xml version=\"1.0\"?>\n");
2251: PetscFPrintf(PETSC_COMM_SELF,vtu,"<VTKFile type=\"UnstructuredGrid\" version=\"0.1\" byte_order=\"%s\">\n",byte_order);
2252: PetscFPrintf(PETSC_COMM_SELF,vtu," <UnstructuredGrid>\n");
2253: PetscFPrintf(PETSC_COMM_SELF,vtu," <Piece NumberOfPoints=\"%" PetscInt_FMT "\" NumberOfCells=\"%" PetscInt_FMT "\">\n",nvertices,ncells);
2255: /* Solution fields */
2256: PetscFPrintf(PETSC_COMM_SELF,vtu," <PointData Scalars=\"Pressure\" Vectors=\"Velocity\">\n");
2257: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Pressure\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2258: boffset += nvertices*sizeof(PetscScalar) + sizeof(int);
2259: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Velocity\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2260: boffset += nvertices*3*sizeof(PetscScalar) + sizeof(int);
2261: PetscFPrintf(PETSC_COMM_SELF,vtu," </PointData>\n");
2262: PetscFPrintf(PETSC_COMM_SELF,vtu," <CellData>\n");
2263: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"Rank\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2264: boffset += ncells*sizeof(int) + sizeof(int);
2265: PetscFPrintf(PETSC_COMM_SELF,vtu," </CellData>\n");
2266: /* Coordinate positions */
2267: PetscFPrintf(PETSC_COMM_SELF,vtu," <Points>\n");
2268: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Float64\" Name=\"Position\" NumberOfComponents=\"3\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2269: boffset += nvertices*3*sizeof(double) + sizeof(int);
2270: PetscFPrintf(PETSC_COMM_SELF,vtu," </Points>\n");
2271: /* Cell connectivity */
2272: PetscFPrintf(PETSC_COMM_SELF,vtu," <Cells>\n");
2273: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"connectivity\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2274: boffset += ncells*4*sizeof(int) + sizeof(int);
2275: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"Int32\" Name=\"offsets\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2276: boffset += ncells*sizeof(int) + sizeof(int);
2277: PetscFPrintf(PETSC_COMM_SELF,vtu," <DataArray type=\"UInt8\" Name=\"types\" NumberOfComponents=\"1\" format=\"appended\" offset=\"%" PetscInt_FMT "\" />\n",boffset);
2278: boffset += ncells*sizeof(unsigned char) + sizeof(int);
2279: PetscFPrintf(PETSC_COMM_SELF,vtu," </Cells>\n");
2280: PetscFPrintf(PETSC_COMM_SELF,vtu," </Piece>\n");
2281: PetscFPrintf(PETSC_COMM_SELF,vtu," </UnstructuredGrid>\n");
2282: PetscFPrintf(PETSC_COMM_SELF,vtu," <AppendedData encoding=\"%s\">\n",base64 ? "base64" : "raw");
2283: PetscFPrintf(PETSC_COMM_SELF,vtu,"_");
2285: /* Write pressure */
2286: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xp,nvertices,PETSC_SCALAR,base64);
2288: /* Write velocity */
2289: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,xu,nvertices*3,PETSC_SCALAR,base64);
2291: /* Label cell rank, not a measure of computation because nothing is actually computed at cells. This is written
2292: * primarily to aid in debugging. The partition for computation should label vertices. */
2293: PetscMalloc1(ncells,&cellrank);
2294: for (i=0; i<ncells; i++) cellrank[i] = rank;
2295: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,cellrank,ncells,PETSC_INT,base64);
2296: PetscFree(cellrank);
2298: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,grid->xyz,nvertices*3,PETSC_DOUBLE,base64);
2299: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,conn,ncells*4,PETSC_INT,base64);
2300: PetscFree(conn);
2302: PetscMalloc1(ncells,&celloffset);
2303: for (i=0; i<ncells; i++) celloffset[i] = 4*(i+1);
2304: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celloffset,ncells,PETSC_INT,base64);
2305: PetscFree(celloffset);
2307: PetscMalloc1(ncells,&celltype);
2308: for (i=0; i<ncells; i++) celltype[i] = 10; /* VTK_TETRA */
2309: PetscFWrite_FUN3D(PETSC_COMM_SELF,vtu,celltype,ncells,PETSC_CHAR,base64);
2310: PetscFree(celltype);
2312: PetscFPrintf(PETSC_COMM_SELF,vtu,"\n </AppendedData>\n");
2313: PetscFPrintf(PETSC_COMM_SELF,vtu,"</VTKFile>\n");
2314: PetscFClose(PETSC_COMM_SELF,vtu);
2316: VecRestoreArray(Xploc,&xp);
2317: VecRestoreArray(Xuloc,&xu);
2318: VecDestroy(&Xploc);
2319: VecDestroy(&Xuloc);
2320: PetscFree(eptr);
2321: return 0;
2322: }
2324: /*---------------------------------------------------------------------*/
2325: int SetPetscDS(GRID *grid,TstepCtx *tsCtx)
2326: /*---------------------------------------------------------------------*/
2327: {
2328: int ierr,i,j,bs;
2329: int nnodes,jstart,jend,nbrs_diag,nbrs_offd;
2330: int nnodesLoc,nvertices;
2331: int *val_diag,*val_offd,*svertices,*loc2pet;
2332: IS isglobal,islocal;
2333: ISLocalToGlobalMapping isl2g;
2334: PetscBool flg;
2335: MPI_Comm comm = PETSC_COMM_WORLD;
2337: nnodes = grid->nnodes;
2338: nnodesLoc = grid->nnodesLoc;
2339: nvertices = grid->nvertices;
2340: loc2pet = grid->loc2pet;
2341: bs = 4;
2343: /* Set up the PETSc datastructures */
2345: VecCreate(comm,&grid->qnode);
2346: VecSetSizes(grid->qnode,bs*nnodesLoc,bs*nnodes);
2347: VecSetBlockSize(grid->qnode,bs);
2348: VecSetType(grid->qnode,VECMPI);
2350: VecDuplicate(grid->qnode,&grid->res);
2351: VecDuplicate(grid->qnode,&tsCtx->qold);
2352: VecDuplicate(grid->qnode,&tsCtx->func);
2354: VecCreate(MPI_COMM_SELF,&grid->qnodeLoc);
2355: VecSetSizes(grid->qnodeLoc,bs*nvertices,bs*nvertices);
2356: VecSetBlockSize(grid->qnodeLoc,bs);
2357: VecSetType(grid->qnodeLoc,VECSEQ);
2359: VecCreate(comm,&grid->grad);
2360: VecSetSizes(grid->grad,3*bs*nnodesLoc,3*bs*nnodes);
2361: VecSetBlockSize(grid->grad,3*bs);
2362: VecSetType(grid->grad,VECMPI);
2364: VecCreate(MPI_COMM_SELF,&grid->gradLoc);
2365: VecSetSizes(grid->gradLoc,3*bs*nvertices,3*bs*nvertices);
2366: VecSetBlockSize(grid->gradLoc,3*bs);
2367: VecSetType(grid->gradLoc,VECSEQ);
2369: /* Create Scatter between the local and global vectors */
2370: /* First create scatter for qnode */
2371: ISCreateStride(MPI_COMM_SELF,bs*nvertices,0,1,&islocal);
2372: #if defined(INTERLACING)
2373: #if defined(BLOCKING)
2374: ICALLOC(nvertices,&svertices);
2375: for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2376: ISCreateBlock(MPI_COMM_SELF,bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2377: #else
2378: ICALLOC(bs*nvertices,&svertices);
2379: for (i = 0; i < nvertices; i++)
2380: for (j = 0; j < bs; j++) svertices[j+bs*i] = j + bs*loc2pet[i];
2381: ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2382: #endif
2383: #else
2384: ICALLOC(bs*nvertices,&svertices);
2385: for (j = 0; j < bs; j++)
2386: for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2387: ISCreateGeneral(MPI_COMM_SELF,bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2388: #endif
2389: PetscFree(svertices);
2390: VecScatterCreate(grid->qnode,isglobal,grid->qnodeLoc,islocal,&grid->scatter);
2391: ISDestroy(&isglobal);
2392: ISDestroy(&islocal);
2394: /* Now create scatter for gradient vector of qnode */
2395: ISCreateStride(MPI_COMM_SELF,3*bs*nvertices,0,1,&islocal);
2396: #if defined(INTERLACING)
2397: #if defined(BLOCKING)
2398: ICALLOC(nvertices,&svertices);
2399: for (i=0; i < nvertices; i++) svertices[i] = loc2pet[i];
2400: ISCreateBlock(MPI_COMM_SELF,3*bs,nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2401: #else
2402: ICALLOC(3*bs*nvertices,&svertices);
2403: for (i = 0; i < nvertices; i++)
2404: for (j = 0; j < 3*bs; j++) svertices[j+3*bs*i] = j + 3*bs*loc2pet[i];
2405: ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2406: #endif
2407: #else
2408: ICALLOC(3*bs*nvertices,&svertices);
2409: for (j = 0; j < 3*bs; j++)
2410: for (i = 0; i < nvertices; i++) svertices[j*nvertices+i] = j*nvertices + loc2pet[i];
2411: ISCreateGeneral(MPI_COMM_SELF,3*bs*nvertices,svertices,PETSC_COPY_VALUES,&isglobal);
2412: #endif
2413: PetscFree(svertices);
2414: VecScatterCreate(grid->grad,isglobal,grid->gradLoc,islocal,&grid->gradScatter);
2415: ISDestroy(&isglobal);
2416: ISDestroy(&islocal);
2418: /* Store the number of non-zeroes per row */
2419: #if defined(INTERLACING)
2420: #if defined(BLOCKING)
2421: ICALLOC(nnodesLoc,&val_diag);
2422: ICALLOC(nnodesLoc,&val_offd);
2423: for (i = 0; i < nnodesLoc; i++) {
2424: jstart = grid->ia[i] - 1;
2425: jend = grid->ia[i+1] - 1;
2426: nbrs_diag = 0;
2427: nbrs_offd = 0;
2428: for (j = jstart; j < jend; j++) {
2429: if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2430: else nbrs_offd++;
2431: }
2432: val_diag[i] = nbrs_diag;
2433: val_offd[i] = nbrs_offd;
2434: }
2435: MatCreateBAIJ(comm,bs,bs*nnodesLoc,bs*nnodesLoc,
2436: bs*nnodes,bs*nnodes,PETSC_DEFAULT,val_diag,
2437: PETSC_DEFAULT,val_offd,&grid->A);
2438: #else
2439: ICALLOC(nnodesLoc*4,&val_diag);
2440: ICALLOC(nnodesLoc*4,&val_offd);
2441: for (i = 0; i < nnodesLoc; i++) {
2442: jstart = grid->ia[i] - 1;
2443: jend = grid->ia[i+1] - 1;
2444: nbrs_diag = 0;
2445: nbrs_offd = 0;
2446: for (j = jstart; j < jend; j++) {
2447: if ((grid->ja[j] >= rstart) && (grid->ja[j] < (rstart+nnodesLoc))) nbrs_diag++;
2448: else nbrs_offd++;
2449: }
2450: for (j = 0; j < 4; j++) {
2451: row = 4*i + j;
2452: val_diag[row] = nbrs_diag*4;
2453: val_offd[row] = nbrs_offd*4;
2454: }
2455: }
2456: MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2457: bs*nnodes,bs*nnodes,NULL,val_diag,
2458: NULL,val_offd,&grid->A);
2459: #endif
2460: PetscFree(val_diag);
2461: PetscFree(val_offd);
2463: #else
2465: ICALLOC(nnodes*4,&val_diag);
2466: ICALLOC(nnodes*4,&val_offd);
2467: for (j = 0; j < 4; j++)
2468: for (i = 0; i < nnodes; i++) {
2469: int row;
2470: row = i + j*nnodes;
2471: jstart = grid->ia[i] - 1;
2472: jend = grid->ia[i+1] - 1;
2473: nbrs_diag = jend - jstart;
2474: val_diag[row] = nbrs_diag*4;
2475: val_offd[row] = 0;
2476: }
2477: /* MatCreateSeqAIJ(MPI_COMM_SELF,nnodes*4,nnodes*4,NULL,
2478: val,&grid->A);*/
2479: MatCreateAIJ(comm,bs*nnodesLoc,bs*nnodesLoc,
2480: bs*nnodes,bs*nnodes,NULL,val_diag,
2481: NULL,val_offd,&grid->A);
2482: MatSetBlockSize(grid->A,bs);
2483: PetscFree(val_diag);
2484: PetscFree(val_offd);
2485: #endif
2487: flg = PETSC_FALSE;
2488: PetscOptionsGetBool(0,"-mem_use",&flg,NULL);
2489: if (flg) {
2490: PetscMemoryView(PETSC_VIEWER_STDOUT_WORLD,"Memory usage after allocating PETSc data structures\n");
2491: }
2493: /* Set local to global mapping for setting the matrix elements in
2494: local ordering : first set row by row mapping
2495: */
2496: ISLocalToGlobalMappingCreate(MPI_COMM_SELF,bs,nvertices,loc2pet,PETSC_COPY_VALUES,&isl2g);
2497: MatSetLocalToGlobalMapping(grid->A,isl2g,isl2g);
2498: ISLocalToGlobalMappingDestroy(&isl2g);
2499: return 0;
2500: }
2502: /*================================= CLINK ===================================*/
2503: /* */
2504: /* Used in establishing the links between FORTRAN common blocks and C */
2505: /* */
2506: /*===========================================================================*/
2507: EXTERN_C_BEGIN
2508: void f77CLINK(CINFO *p1,CRUNGE *p2,CGMCOM *p3)
2509: {
2510: c_info = p1;
2511: c_runge = p2;
2512: c_gmcom = p3;
2513: }
2514: EXTERN_C_END
2516: /*========================== SET_UP_GRID====================================*/
2517: /* */
2518: /* Allocates the memory for the fine grid */
2519: /* */
2520: /*==========================================================================*/
2521: int set_up_grid(GRID *grid)
2522: {
2523: int nnodes,nedge;
2524: int nsface,nvface,nfface,nbface;
2525: int tnode,ierr;
2526: /*int vface,lnodes,nnz,ncell,kvisc,ilu0,nsrch,ileast,ifcn,valloc;*/
2527: /*int nsnode,nvnode,nfnode; */
2528: /*int mgzero=0;*/ /* Variable so we dont allocate memory for multigrid */
2529: /*int jalloc;*/ /* If jalloc=1 allocate space for dfp and dfm */
2530: /*
2531: * stuff to read in dave's grids
2532: */
2533: /*int nnbound,nvbound,nfbound,nnfacet,nvfacet,nffacet,ntte;*/
2534: /* end of stuff */
2536: nnodes = grid->nnodes;
2537: tnode = grid->nnodes;
2538: nedge = grid->nedge;
2539: nsface = grid->nsface;
2540: nvface = grid->nvface;
2541: nfface = grid->nfface;
2542: nbface = nsface + nvface + nfface;
2544: /*ncell = grid->ncell;
2545: vface = grid->nedge;
2546: lnodes = grid->nnodes;
2547: nsnode = grid->nsnode;
2548: nvnode = grid->nvnode;
2549: nfnode = grid->nfnode;
2550: nsrch = c_gmcom->nsrch;
2551: ilu0 = c_gmcom->ilu0;
2552: ileast = grid->ileast;
2553: ifcn = c_gmcom->ifcn;
2554: jalloc = 0;
2555: kvisc = grid->jvisc;*/
2557: /* if (ilu0 >=1 && ifcn == 1) jalloc=0;*/
2559: /*
2560: * stuff to read in dave's grids
2561: */
2562: /*nnbound = grid->nnbound;
2563: nvbound = grid->nvbound;
2564: nfbound = grid->nfbound;
2565: nnfacet = grid->nnfacet;
2566: nvfacet = grid->nvfacet;
2567: nffacet = grid->nffacet;
2568: ntte = grid->ntte;*/
2569: /* end of stuff */
2571: /* if (!ileast) lnodes = 1;
2572: printf("In set_up_grid->jvisc = %d\n",grid->jvisc);
2574: if (grid->jvisc != 2 && grid->jvisc != 4 && grid->jvisc != 6)vface = 1;
2575: printf(" vface = %d \n",vface);
2576: if (grid->jvisc < 3) tnode = 1;
2577: valloc = 1;
2578: if (grid->jvisc == 0)valloc = 0;*/
2580: /*PetscPrintf(PETSC_COMM_WORLD," nsnode= %d nvnode= %d nfnode= %d\n",nsnode,nvnode,nfnode);*/
2581: /*PetscPrintf(PETSC_COMM_WORLD," nsface= %d nvface= %d nfface= %d\n",nsface,nvface,nfface);
2582: PetscPrintf(PETSC_COMM_WORLD," nbface= %d\n",nbface);*/
2583: /* Now allocate memory for the other grid arrays */
2584: /* ICALLOC(nedge*2, &grid->eptr); */
2585: ICALLOC(nsface, &grid->isface);
2586: ICALLOC(nvface, &grid->ivface);
2587: ICALLOC(nfface, &grid->ifface);
2588: /* ICALLOC(nsnode, &grid->isnode);
2589: ICALLOC(nvnode, &grid->ivnode);
2590: ICALLOC(nfnode, &grid->ifnode);*/
2591: /*ICALLOC(nnodes, &grid->clist);
2592: ICALLOC(nnodes, &grid->iupdate);
2593: ICALLOC(nsface*2, &grid->sface);
2594: ICALLOC(nvface*2, &grid->vface);
2595: ICALLOC(nfface*2, &grid->fface);
2596: ICALLOC(lnodes, &grid->icount);*/
FCALLOC(nnodes, &grid->y);
FCALLOC(nnodes, &grid->z);
FCALLOC(nnodes, &grid->area);*/
/*
* FCALLOC(nnodes*4, &grid->gradx);
* FCALLOC(nnodes*4, &grid->grady);
* FCALLOC(nnodes*4, &grid->gradz);
* FCALLOC(nnodes, &grid->cdt);
*/
/*
* FCALLOC(nnodes*4, &grid->qnode);
* FCALLOC(nnodes*4, &grid->dq);
* FCALLOC(nnodes*4, &grid->res);
* FCALLOC(jalloc*nnodes*4*4,&grid->A);
* FCALLOC(nnodes*4, &grid->B);
* FCALLOC(jalloc*nedge*4*4,&grid->dfp);
* FCALLOC(jalloc*nedge*4*4,&grid->dfm);
*/
FCALLOC(nsnode, &grid->syn);
FCALLOC(nsnode, &grid->szn);
FCALLOC(nsnode, &grid->sa);
FCALLOC(nvnode, &grid->vxn);
FCALLOC(nvnode, &grid->vyn);
FCALLOC(nvnode, &grid->vzn);
FCALLOC(nvnode, &grid->va);
FCALLOC(nfnode, &grid->fxn);
FCALLOC(nfnode, &grid->fyn);
FCALLOC(nfnode, &grid->fzn);
FCALLOC(nfnode, &grid->fa);
FCALLOC(nedge, &grid->xn);
FCALLOC(nedge, &grid->yn);
FCALLOC(nedge, &grid->zn);
FCALLOC(nedge, &grid->rl);*/
line2633">2633: FCALLOC(nbface*15,&grid-&
line2634">2634: FCALLOC(nbface*15,&grid-&
line2635">2635: FCALLOC(nbface*15,&grid-&
/*
* FCALLOC(nnodes*4, &grid->phi);
* FCALLOC(nnodes, &grid->r11);
* FCALLOC(nnodes, &grid->r12);
* FCALLOC(nnodes, &grid->r13);
* FCALLOC(nnodes, &grid->r22);
* FCALLOC(nnodes, &grid->r23);
* FCALLOC(nnodes, &grid->r33);
*/
/*
* Allocate memory for viscous length scale if turbulent
*/
line2648">2648: if (grid->jvisc >
line2649">2649: FCALLOC(tnode, &grid->
line2650">2650: FCALLOC(nnodes, &grid->t
line2651">2651: FCALLOC(nnodes, &grid->
line2652">2652: FCALLOC(tnode, &grid->tu
line2653">2653: FCALLOC(nedge, &grid->
line2654">2654: FCALLOC(nedge, &grid->
line2655">2655:
/*
* Allocate memory for MG transfer
*/
/*
ICALLOC(mgzero*nsface, &grid->isford);
ICALLOC(mgzero*nvface, &grid->ivford);
ICALLOC(mgzero*nfface, &grid->ifford);
ICALLOC(mgzero*nnodes, &grid->nflag);
ICALLOC(mgzero*nnodes, &grid->nnext);
ICALLOC(mgzero*nnodes, &grid->nneigh);
ICALLOC(mgzero*ncell, &grid->ctag);
ICALLOC(mgzero*ncell, &grid->csearch);
ICALLOC(valloc*ncell*4, &grid->c2n);
ICALLOC(valloc*ncell*6, &grid->c2e);
grid->c2c = (int*)grid->dfp;
ICALLOC(ncell*4, &grid->c2c);
ICALLOC(nnodes, &grid->cenc);
if (grid->iup == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefup);
FCALLOC(mgzero*nnodes*3, &grid->rcoefup);
}
if (grid->idown == 1) {
ICALLOC(mgzero*nnodes*3, &grid->icoefdn);
FCALLOC(mgzero*nnodes*3, &grid->rcoefdn);
}
FCALLOC(nnodes*4, &grid->ff);
FCALLOC(tnode, &grid->turbff);
*/
/*
* If using GMRES (nsrch>0) allocate memory
*/
/* NoEq = 0;
if (nsrch > 0)NoEq = 4*nnodes;
if (nsrch < 0)NoEq = nnodes;
FCALLOC(NoEq, &grid->AP);
FCALLOC(NoEq, &grid->Xgm);
FCALLOC(NoEq, &grid->temr);
FCALLOC((abs(nsrch)+1)*NoEq,&grid->Fgm);
*/
/*
* stuff to read in dave's grids
*/
/*
ICALLOC(nnbound, &grid->ncolorn);
ICALLOC(nnbound*100,&grid->countn);
ICALLOC(nvbound, &grid->ncolorv);
ICALLOC(nvbound*100,&grid->countv);
ICALLOC(nfbound, &grid->ncolorf);
ICALLOC(nfbound*100,&grid->countf);
*/
/*ICALLOC(nnbound, &grid->nntet);
ICALLOC(nnbound, &grid->nnpts);
ICALLOC(nvbound, &grid->nvtet);
ICALLOC(nvbound, &grid->nvpts);
ICALLOC(nfbound, &grid->nftet);
ICALLOC(nfbound, &grid->nfpts);
ICALLOC(nnfacet*4,&grid->f2ntn);
ICALLOC(nvfacet*4,&grid->f2ntv);
ICALLOC(nffacet*4,&grid->f2ntf);*/
line2715">2715: return
line2716">2716
/*========================== WRITE_FINE_GRID ================================*/
/* */
/* Write memory locations and other information for the fine grid */
/* */
/*===========================================================================*/
line2723">2723: int write_fine_grid(GRID *grid)
line2724">2724
line2725">2725: FILE *
/* open file for output */
/* call the output frame.out */
line2731">2731: fprintf(output,"information for fine grid\n"
line2732">2732: fprintf(output,"\n"
line2733">2733: fprintf(output," address of fine grid = %p\n",(void*
line2735">2735: fprintf(output,"grid.nnodes = %d\n",grid->n
line2736">2736: fprintf(output,"grid.ncell = %d\n",grid->
line2737">2737: fprintf(output,"grid.nedge = %d\n",grid->
line2738">2738: fprintf(output,"grid.nsface = %d\n",grid->n
line2739">2739: fprintf(output,"grid.nvface = %d\n",grid->n
line2740">2740: fprintf(output,"grid.nfface = %d\n",grid->n
line2741">2741: fprintf(output,"grid.nsnode = %d\n",grid->n
line2742">2742: fprintf(output,"grid.nvnode = %d\n",grid->n
line2743">2743: fprintf(output,"grid.nfnode = %d\n",grid->n
/*
fprintf(output,"grid.eptr = %p\n",grid->eptr);
fprintf(output,"grid.isface = %p\n",grid->isface);
fprintf(output,"grid.ivface = %p\n",grid->ivface);
fprintf(output,"grid.ifface = %p\n",grid->ifface);
fprintf(output,"grid.isnode = %p\n",grid->isnode);
fprintf(output,"grid.ivnode = %p\n",grid->ivnode);
fprintf(output,"grid.ifnode = %p\n",grid->ifnode);
fprintf(output,"grid.c2n = %p\n",grid->c2n);
fprintf(output,"grid.c2e = %p\n",grid->c2e);
fprintf(output,"grid.xyz = %p\n",grid->xyz);
*/
/*fprintf(output,"grid.y = %p\n",grid->xyz);
fprintf(output,"grid.z = %p\n",grid->z);*/
/*
fprintf(output,"grid.area = %p\n",grid->area);
fprintf(output,"grid.qnode = %p\n",grid->qnode);
*/
/*
fprintf(output,"grid.gradx = %p\n",grid->gradx);
fprintf(output,"grid.grady = %p\n",grid->grady);
fprintf(output,"grid.gradz = %p\n",grid->gradz);
*/
/*
fprintf(output,"grid.cdt = %p\n",grid->cdt);
fprintf(output,"grid.sxn = %p\n",grid->sxn);
fprintf(output,"grid.syn = %p\n",grid->syn);
fprintf(output,"grid.szn = %p\n",grid->szn);
fprintf(output,"grid.vxn = %p\n",grid->vxn);
fprintf(output,"grid.vyn = %p\n",grid->vyn);
fprintf(output,"grid.vzn = %p\n",grid->vzn);
fprintf(output,"grid.fxn = %p\n",grid->fxn);
fprintf(output,"grid.fyn = %p\n",grid->fyn);
fprintf(output,"grid.fzn = %p\n",grid->fzn);
fprintf(output,"grid.xyzn = %p\n",grid->xyzn);
*/
/*fprintf(output,"grid.yn = %p\n",grid->yn);
fprintf(output,"grid.zn = %p\n",grid->zn);
fprintf(output,"grid.rl = %p\n",grid->rl);*/
line2783">2783: fclose(o
line2784">2784: return
line2785">2785
line2787">2787: #if defined(_OPENMP) && defined(HAVE_EDGE_COLORING)
line2788">2788: int EdgeColoring(int nnodes,int nedge,int *e2n,int *eperm,int *ncle,int *counte)
line2789">2789
line2790">2790: int ncolore = *nc
line2791">2791: int iedg = 0,ib = 0,ie = nedge,ta
line2792">2792: int i
line2793">2793: in
line2794">2794: ICALLOC(nnodes,&am
line2795">2795: while (ib <
line2796">2796: for (i = 0; i < nnodes; i++) tag[
line2797">2797: counte[ncolor
line2798">2798: for (i = ib; i < ie;
line2799">2799: n1 =
line2800">2800: n2 = e2n[i+
line2801">2801: tagcount = tag[n1]+t
/* If tagcount = 0 then this edge belongs in this color */
line2803">2803: if (!tagc
line2804">2804: tag[n1]
line2805">2805: tag[n2]
line2806">2806: e2n[i] = e2n
line2807">2807: e2n[i+nedge] = e2n[iedg+
line2808">2808: e2n[iedg]
line2809">2809: e2n[iedg+nedge
line2810">2810: n1 = ep
line2811">2811: eperm[i] = eperm
line2812">2812: eperm[iedg]
line2813">2813:
line2814">2814: counte[ncolor
line2815">2815:
line2816">2816:
line2817">2817: ib
line2818">2818: nco
line2819">2819:
line2820">2820: *ncle = n
line2821">2821: return
line2822">2822
line2823">2823: #endif
line2824">2824: #if defined(PARCH_IRIX64) && defined(USE_HW_COUNTERS)
line2825">2825: int EventCountersBegin(int *gen_start,PetscScalar *time_start_counters)
line2826">2826
line2828">2828: PetscTime(&time_start_cou
line2829">2829: return
line2830">2830
line2832">2832: int EventCountersEnd(int gen_start,PetscScalar time_start_counters)
line2833">2833
line2834">2834: int gen_rea
line2835">2835: PetscScalar time_read_co
line2836">2836: long long _counter0,_co
line2839">2839: PetscTime(&&time_read_cou
line2841">2841: counter0 += _co
line2842">2842: counter1 += _co
line2843">2843: time_counters += time_read_counters-time_start_co
line2844">2844: return
line2845">2845
line2846">2846: #endif