Actual source code: sf.c

  1: #include <petsc/private/sfimpl.h>
  2: #include <petsc/private/hashseti.h>
  3: #include <petsc/private/viewerimpl.h>
  4: #include <petscctable.h>

  6: #if defined(PETSC_HAVE_CUDA)
  7:   #include <cuda_runtime.h>
  8: #endif

 10: #if defined(PETSC_HAVE_HIP)
 11:   #include <hip/hip_runtime.h>
 12: #endif

 14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
 15: void PetscSFCheckGraphSet(PetscSF,int);
 16: #else
 17: #if defined(PETSC_USE_DEBUG)
 18: #  define PetscSFCheckGraphSet(sf,arg) do {                          \
 19:     if (PetscUnlikely(!(sf)->graphset))                              \
 20:       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
 21:   } while (0)
 22: #else
 23: #  define PetscSFCheckGraphSet(sf,arg) do {} while (0)
 24: #endif
 25: #endif

 27: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};

 29: /*@
 30:    PetscSFCreate - create a star forest communication context

 32:    Collective

 34:    Input Parameter:
 35: .  comm - communicator on which the star forest will operate

 37:    Output Parameter:
 38: .  sf - new star forest context

 40:    Options Database Keys:
 41: +  -sf_type basic     -Use MPI persistent Isend/Irecv for communication (Default)
 42: .  -sf_type window    -Use MPI-3 one-sided window for communication
 43: -  -sf_type neighbor  -Use MPI-3 neighborhood collectives for communication

 45:    Level: intermediate

 47:    Notes:
 48:    When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
 49:    MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
 50:    SFs are optimized and they have better performance than general SFs.

 52: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
 53: @*/
 54: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
 55: {
 56:   PetscSF        b;

 59:   PetscSFInitializePackage();

 61:   PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);

 63:   b->nroots    = -1;
 64:   b->nleaves   = -1;
 65:   b->minleaf   = PETSC_MAX_INT;
 66:   b->maxleaf   = PETSC_MIN_INT;
 67:   b->nranks    = -1;
 68:   b->rankorder = PETSC_TRUE;
 69:   b->ingroup   = MPI_GROUP_NULL;
 70:   b->outgroup  = MPI_GROUP_NULL;
 71:   b->graphset  = PETSC_FALSE;
 72: #if defined(PETSC_HAVE_DEVICE)
 73:   b->use_gpu_aware_mpi    = use_gpu_aware_mpi;
 74:   b->use_stream_aware_mpi = PETSC_FALSE;
 75:   b->unknown_input_stream= PETSC_FALSE;
 76:   #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
 77:     b->backend = PETSCSF_BACKEND_KOKKOS;
 78:   #elif defined(PETSC_HAVE_CUDA)
 79:     b->backend = PETSCSF_BACKEND_CUDA;
 80:   #elif defined(PETSC_HAVE_HIP)
 81:     b->backend = PETSCSF_BACKEND_HIP;
 82:   #endif

 84:   #if defined(PETSC_HAVE_NVSHMEM)
 85:     b->use_nvshmem     = PETSC_FALSE; /* Default is not to try NVSHMEM */
 86:     b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
 87:     PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);
 88:     PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);
 89:   #endif
 90: #endif
 91:   b->vscat.from_n = -1;
 92:   b->vscat.to_n   = -1;
 93:   b->vscat.unit   = MPIU_SCALAR;
 94:  *sf = b;
 95:   return 0;
 96: }

 98: /*@
 99:    PetscSFReset - Reset a star forest so that different sizes or neighbors can be used

101:    Collective

103:    Input Parameter:
104: .  sf - star forest

106:    Level: advanced

108: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
109: @*/
110: PetscErrorCode PetscSFReset(PetscSF sf)
111: {
113:   if (sf->ops->Reset) (*sf->ops->Reset)(sf);
114:   sf->nroots   = -1;
115:   sf->nleaves  = -1;
116:   sf->minleaf  = PETSC_MAX_INT;
117:   sf->maxleaf  = PETSC_MIN_INT;
118:   sf->mine     = NULL;
119:   sf->remote   = NULL;
120:   sf->graphset = PETSC_FALSE;
121:   PetscFree(sf->mine_alloc);
122:   PetscFree(sf->remote_alloc);
123:   sf->nranks = -1;
124:   PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
125:   sf->degreeknown = PETSC_FALSE;
126:   PetscFree(sf->degree);
127:   if (sf->ingroup  != MPI_GROUP_NULL) MPI_Group_free(&sf->ingroup);
128:   if (sf->outgroup != MPI_GROUP_NULL) MPI_Group_free(&sf->outgroup);
129:   if (sf->multi) sf->multi->multi = NULL;
130:   PetscSFDestroy(&sf->multi);
131:   PetscLayoutDestroy(&sf->map);

133:  #if defined(PETSC_HAVE_DEVICE)
134:   for (PetscInt i=0; i<2; i++) PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);
135:  #endif

137:   sf->setupcalled = PETSC_FALSE;
138:   return 0;
139: }

141: /*@C
142:    PetscSFSetType - Set the PetscSF communication implementation

144:    Collective on PetscSF

146:    Input Parameters:
147: +  sf - the PetscSF context
148: -  type - a known method

150:    Options Database Key:
151: .  -sf_type <type> - Sets the method; use -help for a list
152:    of available methods (for instance, window, basic, neighbor)

154:    Notes:
155:    See "include/petscsf.h" for available methods (for instance)
156: +    PETSCSFWINDOW - MPI-2/3 one-sided
157: -    PETSCSFBASIC - basic implementation using MPI-1 two-sided

159:   Level: intermediate

161: .seealso: PetscSFType, PetscSFCreate()
162: @*/
163: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
164: {
165:   PetscBool      match;
166:   PetscErrorCode (*r)(PetscSF);


171:   PetscObjectTypeCompare((PetscObject)sf,type,&match);
172:   if (match) return 0;

174:   PetscFunctionListFind(PetscSFList,type,&r);
176:   /* Destroy the previous PetscSF implementation context */
177:   if (sf->ops->Destroy) (*(sf)->ops->Destroy)(sf);
178:   PetscMemzero(sf->ops,sizeof(*sf->ops));
179:   PetscObjectChangeTypeName((PetscObject)sf,type);
180:   (*r)(sf);
181:   return 0;
182: }

184: /*@C
185:   PetscSFGetType - Get the PetscSF communication implementation

187:   Not Collective

189:   Input Parameter:
190: . sf  - the PetscSF context

192:   Output Parameter:
193: . type - the PetscSF type name

195:   Level: intermediate

197: .seealso: PetscSFSetType(), PetscSFCreate()
198: @*/
199: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
200: {
203:   *type = ((PetscObject)sf)->type_name;
204:   return 0;
205: }

207: /*@C
208:    PetscSFDestroy - destroy star forest

210:    Collective

212:    Input Parameter:
213: .  sf - address of star forest

215:    Level: intermediate

217: .seealso: PetscSFCreate(), PetscSFReset()
218: @*/
219: PetscErrorCode PetscSFDestroy(PetscSF *sf)
220: {
221:   if (!*sf) return 0;
223:   if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return 0;}
224:   PetscSFReset(*sf);
225:   if ((*sf)->ops->Destroy) (*(*sf)->ops->Destroy)(*sf);
226:   PetscSFDestroy(&(*sf)->vscat.lsf);
227:   if ((*sf)->vscat.bs > 1) MPI_Type_free(&(*sf)->vscat.unit);
228:   PetscHeaderDestroy(sf);
229:   return 0;
230: }

232: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
233: {
234:   PetscInt           i, nleaves;
235:   PetscMPIInt        size;
236:   const PetscInt    *ilocal;
237:   const PetscSFNode *iremote;

239:   if (!sf->graphset || !PetscDefined(USE_DEBUG)) return 0;
240:   PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
241:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
242:   for (i = 0; i < nleaves; i++) {
243:     const PetscInt rank = iremote[i].rank;
244:     const PetscInt remote = iremote[i].index;
245:     const PetscInt leaf = ilocal ? ilocal[i] : i;
249:   }
250:   return 0;
251: }

253: /*@
254:    PetscSFSetUp - set up communication structures

256:    Collective

258:    Input Parameter:
259: .  sf - star forest communication object

261:    Level: beginner

263: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
264: @*/
265: PetscErrorCode PetscSFSetUp(PetscSF sf)
266: {
268:   PetscSFCheckGraphSet(sf,1);
269:   if (sf->setupcalled) return 0;
270:   PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
271:   PetscSFCheckGraphValid_Private(sf);
272:   if (!((PetscObject)sf)->type_name) PetscSFSetType(sf,PETSCSFBASIC); /* Zero all sf->ops */
273:   if (sf->ops->SetUp) (*sf->ops->SetUp)(sf);
274: #if defined(PETSC_HAVE_CUDA)
275:   if (sf->backend == PETSCSF_BACKEND_CUDA) {
276:     sf->ops->Malloc = PetscSFMalloc_CUDA;
277:     sf->ops->Free   = PetscSFFree_CUDA;
278:   }
279: #endif
280: #if defined(PETSC_HAVE_HIP)
281:   if (sf->backend == PETSCSF_BACKEND_HIP) {
282:     sf->ops->Malloc = PetscSFMalloc_HIP;
283:     sf->ops->Free   = PetscSFFree_HIP;
284:   }
285: #endif

287: #
288: #if defined(PETSC_HAVE_KOKKOS)
289:   if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
290:     sf->ops->Malloc = PetscSFMalloc_Kokkos;
291:     sf->ops->Free   = PetscSFFree_Kokkos;
292:   }
293: #endif
294:   PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
295:   sf->setupcalled = PETSC_TRUE;
296:   return 0;
297: }

299: /*@
300:    PetscSFSetFromOptions - set PetscSF options using the options database

302:    Logically Collective

304:    Input Parameter:
305: .  sf - star forest

307:    Options Database Keys:
308: +  -sf_type               - implementation type, see PetscSFSetType()
309: .  -sf_rank_order         - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
310: .  -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
311:                             use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
312:                             If true, this option only works with -use_gpu_aware_mpi 1.
313: .  -sf_use_stream_aware_mpi  - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
314:                                If true, this option only works with -use_gpu_aware_mpi 1.

316: -  -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
317:                               On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
318:                               the only available is kokkos.

320:    Level: intermediate
321: @*/
322: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
323: {
324:   PetscSFType    deft;
325:   char           type[256];
327:   PetscBool      flg;

330:   PetscObjectOptionsBegin((PetscObject)sf);
331:   deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
332:   PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
333:   PetscSFSetType(sf,flg ? type : deft);
334:   PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
335:  #if defined(PETSC_HAVE_DEVICE)
336:   {
337:     char        backendstr[32] = {0};
338:     PetscBool   isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
339:     /* Change the defaults set in PetscSFCreate() with command line options */
340:     PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);
341:     PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
342:     PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
343:     PetscStrcasecmp("cuda",backendstr,&isCuda);
344:     PetscStrcasecmp("kokkos",backendstr,&isKokkos);
345:     PetscStrcasecmp("hip",backendstr,&isHip);
346:   #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
347:     if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
348:     else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
349:     else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
351:   #elif defined(PETSC_HAVE_KOKKOS)
353:    #endif
354:   }
355:  #endif
356:   if (sf->ops->SetFromOptions) (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);
357:   PetscOptionsEnd();
358:   return 0;
359: }

361: /*@
362:    PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order

364:    Logically Collective

366:    Input Parameters:
367: +  sf - star forest
368: -  flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)

370:    Level: advanced

372: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
373: @*/
374: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
375: {
379:   sf->rankorder = flg;
380:   return 0;
381: }

383: /*@C
384:    PetscSFSetGraph - Set a parallel star forest

386:    Collective

388:    Input Parameters:
389: +  sf - star forest
390: .  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
391: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
392: .  ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
393: during setup in debug mode)
394: .  localmode - copy mode for ilocal
395: .  iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
396: during setup in debug mode)
397: -  remotemode - copy mode for iremote

399:    Level: intermediate

401:    Notes:
402:    Leaf indices in ilocal must be unique, otherwise an error occurs.

404:    Input arrays ilocal and iremote follow the PetscCopyMode semantics.
405:    In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
406:    PETSc might modify the respective array;
407:    if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
408:    Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).

410:    Fortran Notes:
411:    In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.

413:    Developer Notes:
414:    We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
415:    This also allows to compare leaf sets of two SFs easily.

417: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
418: @*/
419: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
420: {
421:   PetscBool       unique, contiguous;


431:   if (sf->nroots >= 0) { /* Reset only if graph already set */
432:     PetscSFReset(sf);
433:   }

435:   PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);

437:   sf->nroots  = nroots;
438:   sf->nleaves = nleaves;

440:   if (localmode == PETSC_COPY_VALUES && ilocal) {
441:     PetscInt *tlocal = NULL;

443:     PetscMalloc1(nleaves,&tlocal);
444:     PetscArraycpy(tlocal,ilocal,nleaves);
445:     ilocal = tlocal;
446:   }
447:   if (remotemode == PETSC_COPY_VALUES) {
448:     PetscSFNode *tremote = NULL;

450:     PetscMalloc1(nleaves,&tremote);
451:     PetscArraycpy(tremote,iremote,nleaves);
452:     iremote = tremote;
453:   }

455:   if (nleaves && ilocal) {
456:     PetscSFNode   work;

458:     PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);
459:     PetscSortedCheckDupsInt(nleaves, ilocal, &unique);
460:     unique = PetscNot(unique);
462:     sf->minleaf = ilocal[0];
463:     sf->maxleaf = ilocal[nleaves-1];
464:     contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
465:   } else {
466:     sf->minleaf = 0;
467:     sf->maxleaf = nleaves - 1;
468:     unique      = PETSC_TRUE;
469:     contiguous  = PETSC_TRUE;
470:   }

472:   if (contiguous) {
473:     if (localmode == PETSC_USE_POINTER) {
474:       ilocal = NULL;
475:     } else {
476:       PetscFree(ilocal);
477:     }
478:   }
479:   sf->mine            = ilocal;
480:   if (localmode == PETSC_USE_POINTER) {
481:     sf->mine_alloc    = NULL;
482:   } else {
483:     sf->mine_alloc    = ilocal;
484:   }
485:   sf->remote          = iremote;
486:   if (remotemode == PETSC_USE_POINTER) {
487:     sf->remote_alloc  = NULL;
488:   } else {
489:     sf->remote_alloc  = iremote;
490:   }
491:   PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
492:   sf->graphset = PETSC_TRUE;
493:   return 0;
494: }

496: /*@
497:   PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern

499:   Collective

501:   Input Parameters:
502: + sf      - The PetscSF
503: . map     - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
504: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL

506:   Notes:
507:   It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
508:   n and N are local and global sizes of x respectively.

510:   With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
511:   sequential vectors y on all ranks.

513:   With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
514:   sequential vector y on rank 0.

516:   In above cases, entries of x are roots and entries of y are leaves.

518:   With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
519:   creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
520:   of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
521:   not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
522:   items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.

524:   In this case, roots and leaves are symmetric.

526:   Level: intermediate
527:  @*/
528: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
529: {
530:   MPI_Comm       comm;
531:   PetscInt       n,N,res[2];
532:   PetscMPIInt    rank,size;
533:   PetscSFType    type;

537:   PetscObjectGetComm((PetscObject)sf, &comm);
539:   MPI_Comm_rank(comm,&rank);
540:   MPI_Comm_size(comm,&size);

542:   if (pattern == PETSCSF_PATTERN_ALLTOALL) {
543:     type = PETSCSFALLTOALL;
544:     PetscLayoutCreate(comm,&sf->map);
545:     PetscLayoutSetLocalSize(sf->map,size);
546:     PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
547:     PetscLayoutSetUp(sf->map);
548:   } else {
549:     PetscLayoutGetLocalSize(map,&n);
550:     PetscLayoutGetSize(map,&N);
551:     res[0] = n;
552:     res[1] = -n;
553:     /* Check if n are same over all ranks so that we can optimize it */
554:     MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
555:     if (res[0] == -res[1]) { /* same n */
556:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER  : PETSCSFGATHER;
557:     } else {
558:       type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
559:     }
560:     PetscLayoutReference(map,&sf->map);
561:   }
562:   PetscSFSetType(sf,type);

564:   sf->pattern = pattern;
565:   sf->mine    = NULL; /* Contiguous */

567:   /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
568:      Also set other easy stuff.
569:    */
570:   if (pattern == PETSCSF_PATTERN_ALLGATHER) {
571:     sf->nleaves      = N;
572:     sf->nroots       = n;
573:     sf->nranks       = size;
574:     sf->minleaf      = 0;
575:     sf->maxleaf      = N - 1;
576:   } else if (pattern == PETSCSF_PATTERN_GATHER) {
577:     sf->nleaves      = rank ? 0 : N;
578:     sf->nroots       = n;
579:     sf->nranks       = rank ? 0 : size;
580:     sf->minleaf      = 0;
581:     sf->maxleaf      = rank ? -1 : N - 1;
582:   } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
583:     sf->nleaves      = size;
584:     sf->nroots       = size;
585:     sf->nranks       = size;
586:     sf->minleaf      = 0;
587:     sf->maxleaf      = size - 1;
588:   }
589:   sf->ndranks  = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
590:   sf->graphset = PETSC_TRUE;
591:   return 0;
592: }

594: /*@
595:    PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map

597:    Collective

599:    Input Parameter:
600: .  sf - star forest to invert

602:    Output Parameter:
603: .  isf - inverse of sf

605:    Level: advanced

607:    Notes:
608:    All roots must have degree 1.

610:    The local space may be a permutation, but cannot be sparse.

612: .seealso: PetscSFSetGraph()
613: @*/
614: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
615: {
616:   PetscMPIInt    rank;
617:   PetscInt       i,nroots,nleaves,maxlocal,count,*newilocal;
618:   const PetscInt *ilocal;
619:   PetscSFNode    *roots,*leaves;

622:   PetscSFCheckGraphSet(sf,1);

625:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
626:   maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */

628:   MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
629:   PetscMalloc2(nroots,&roots,maxlocal,&leaves);
630:   for (i=0; i<maxlocal; i++) {
631:     leaves[i].rank  = rank;
632:     leaves[i].index = i;
633:   }
634:   for (i=0; i <nroots; i++) {
635:     roots[i].rank  = -1;
636:     roots[i].index = -1;
637:   }
638:   PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
639:   PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);

641:   /* Check whether our leaves are sparse */
642:   for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
643:   if (count == nroots) newilocal = NULL;
644:   else {                        /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
645:     PetscMalloc1(count,&newilocal);
646:     for (i=0,count=0; i<nroots; i++) {
647:       if (roots[i].rank >= 0) {
648:         newilocal[count]   = i;
649:         roots[count].rank  = roots[i].rank;
650:         roots[count].index = roots[i].index;
651:         count++;
652:       }
653:     }
654:   }

656:   PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
657:   PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
658:   PetscFree2(roots,leaves);
659:   return 0;
660: }

662: /*@
663:    PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph

665:    Collective

667:    Input Parameters:
668: +  sf - communication object to duplicate
669: -  opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)

671:    Output Parameter:
672: .  newsf - new communication object

674:    Level: beginner

676: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
677: @*/
678: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
679: {
680:   PetscSFType    type;
681:   MPI_Datatype   dtype=MPIU_SCALAR;

686:   PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
687:   PetscSFGetType(sf,&type);
688:   if (type) PetscSFSetType(*newsf,type);
689:   if (opt == PETSCSF_DUPLICATE_GRAPH) {
690:     PetscSFCheckGraphSet(sf,1);
691:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
692:       PetscInt          nroots,nleaves;
693:       const PetscInt    *ilocal;
694:       const PetscSFNode *iremote;
695:       PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
696:       PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES);
697:     } else {
698:       PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
699:     }
700:   }
701:   /* Since oldtype is committed, so is newtype, according to MPI */
702:   if (sf->vscat.bs > 1) MPI_Type_dup(sf->vscat.unit,&dtype);
703:   (*newsf)->vscat.bs     = sf->vscat.bs;
704:   (*newsf)->vscat.unit   = dtype;
705:   (*newsf)->vscat.to_n   = sf->vscat.to_n;
706:   (*newsf)->vscat.from_n = sf->vscat.from_n;
707:   /* Do not copy lsf. Build it on demand since it is rarely used */

709: #if defined(PETSC_HAVE_DEVICE)
710:   (*newsf)->backend              = sf->backend;
711:   (*newsf)->unknown_input_stream= sf->unknown_input_stream;
712:   (*newsf)->use_gpu_aware_mpi    = sf->use_gpu_aware_mpi;
713:   (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
714: #endif
715:   if (sf->ops->Duplicate) (*sf->ops->Duplicate)(sf,opt,*newsf);
716:   /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
717:   return 0;
718: }

720: /*@C
721:    PetscSFGetGraph - Get the graph specifying a parallel star forest

723:    Not Collective

725:    Input Parameter:
726: .  sf - star forest

728:    Output Parameters:
729: +  nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
730: .  nleaves - number of leaf vertices on the current process, each of these references a root on any process
731: .  ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
732: -  iremote - remote locations of root vertices for each leaf on the current process

734:    Notes:
735:      We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet

737:      The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
738:      see its manpage for details.

740:    Fortran Notes:
741:      The returned iremote array is a copy and must be deallocated after use. Consequently, if you
742:      want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.

744:      To check for a NULL ilocal use
745: $      if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then

747:    Level: intermediate

749: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
750: @*/
751: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
752: {
754:   if (sf->ops->GetGraph) {
755:     (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
756:   } else {
757:     if (nroots) *nroots = sf->nroots;
758:     if (nleaves) *nleaves = sf->nleaves;
759:     if (ilocal) *ilocal = sf->mine;
760:     if (iremote) *iremote = sf->remote;
761:   }
762:   return 0;
763: }

765: /*@
766:    PetscSFGetLeafRange - Get the active leaf ranges

768:    Not Collective

770:    Input Parameter:
771: .  sf - star forest

773:    Output Parameters:
774: +  minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
775: -  maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.

777:    Level: developer

779: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
780: @*/
781: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
782: {
784:   PetscSFCheckGraphSet(sf,1);
785:   if (minleaf) *minleaf = sf->minleaf;
786:   if (maxleaf) *maxleaf = sf->maxleaf;
787:   return 0;
788: }

790: /*@C
791:    PetscSFViewFromOptions - View from Options

793:    Collective on PetscSF

795:    Input Parameters:
796: +  A - the star forest
797: .  obj - Optional object
798: -  name - command line option

800:    Level: intermediate
801: .seealso:  PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
802: @*/
803: PetscErrorCode  PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
804: {
806:   PetscObjectViewFromOptions((PetscObject)A,obj,name);
807:   return 0;
808: }

810: /*@C
811:    PetscSFView - view a star forest

813:    Collective

815:    Input Parameters:
816: +  sf - star forest
817: -  viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD

819:    Level: beginner

821: .seealso: PetscSFCreate(), PetscSFSetGraph()
822: @*/
823: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
824: {
825:   PetscBool         iascii;
826:   PetscViewerFormat format;

829:   if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);
832:   if (sf->graphset) PetscSFSetUp(sf);
833:   PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
834:   if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
835:     PetscMPIInt rank;
836:     PetscInt    ii,i,j;

838:     PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
839:     PetscViewerASCIIPushTab(viewer);
840:     if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
841:       if (!sf->graphset) {
842:         PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
843:         PetscViewerASCIIPopTab(viewer);
844:         return 0;
845:       }
846:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
847:       PetscViewerASCIIPushSynchronized(viewer);
848:       PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);
849:       for (i=0; i<sf->nleaves; i++) {
850:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
851:       }
852:       PetscViewerFlush(viewer);
853:       PetscViewerGetFormat(viewer,&format);
854:       if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
855:         PetscMPIInt *tmpranks,*perm;
856:         PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
857:         PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
858:         for (i=0; i<sf->nranks; i++) perm[i] = i;
859:         PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
860:         PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
861:         for (ii=0; ii<sf->nranks; ii++) {
862:           i = perm[ii];
863:           PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
864:           for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
865:             PetscViewerASCIISynchronizedPrintf(viewer,"[%d]    %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);
866:           }
867:         }
868:         PetscFree2(tmpranks,perm);
869:       }
870:       PetscViewerFlush(viewer);
871:       PetscViewerASCIIPopSynchronized(viewer);
872:     }
873:     PetscViewerASCIIPopTab(viewer);
874:   }
875:   if (sf->ops->View) (*sf->ops->View)(sf,viewer);
876:   return 0;
877: }

879: /*@C
880:    PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process

882:    Not Collective

884:    Input Parameter:
885: .  sf - star forest

887:    Output Parameters:
888: +  nranks - number of ranks referenced by local part
889: .  ranks - array of ranks
890: .  roffset - offset in rmine/rremote for each rank (length nranks+1)
891: .  rmine - concatenated array holding local indices referencing each remote rank
892: -  rremote - concatenated array holding remote indices referenced for each remote rank

894:    Level: developer

896: .seealso: PetscSFGetLeafRanks()
897: @*/
898: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
899: {
902:   if (sf->ops->GetRootRanks) {
903:     (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
904:   } else {
905:     /* The generic implementation */
906:     if (nranks)  *nranks  = sf->nranks;
907:     if (ranks)   *ranks   = sf->ranks;
908:     if (roffset) *roffset = sf->roffset;
909:     if (rmine)   *rmine   = sf->rmine;
910:     if (rremote) *rremote = sf->rremote;
911:   }
912:   return 0;
913: }

915: /*@C
916:    PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process

918:    Not Collective

920:    Input Parameter:
921: .  sf - star forest

923:    Output Parameters:
924: +  niranks - number of leaf ranks referencing roots on this process
925: .  iranks - array of ranks
926: .  ioffset - offset in irootloc for each rank (length niranks+1)
927: -  irootloc - concatenated array holding local indices of roots referenced by each leaf rank

929:    Level: developer

931: .seealso: PetscSFGetRootRanks()
932: @*/
933: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
934: {
937:   if (sf->ops->GetLeafRanks) {
938:     (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
939:   } else {
940:     PetscSFType type;
941:     PetscSFGetType(sf,&type);
942:     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
943:   }
944:   return 0;
945: }

947: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
948:   PetscInt i;
949:   for (i=0; i<n; i++) {
950:     if (needle == list[i]) return PETSC_TRUE;
951:   }
952:   return PETSC_FALSE;
953: }

955: /*@C
956:    PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.

958:    Collective

960:    Input Parameters:
961: +  sf - PetscSF to set up; PetscSFSetGraph() must have been called
962: -  dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)

964:    Level: developer

966: .seealso: PetscSFGetRootRanks()
967: @*/
968: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
969: {
970:   PetscTable         table;
971:   PetscTablePosition pos;
972:   PetscMPIInt        size,groupsize,*groupranks;
973:   PetscInt           *rcount,*ranks;
974:   PetscInt           i, irank = -1,orank = -1;

977:   PetscSFCheckGraphSet(sf,1);
978:   MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
979:   PetscTableCreate(10,size,&table);
980:   for (i=0; i<sf->nleaves; i++) {
981:     /* Log 1-based rank */
982:     PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
983:   }
984:   PetscTableGetCount(table,&sf->nranks);
985:   PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
986:   PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
987:   PetscTableGetHeadPosition(table,&pos);
988:   for (i=0; i<sf->nranks; i++) {
989:     PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
990:     ranks[i]--;             /* Convert back to 0-based */
991:   }
992:   PetscTableDestroy(&table);

994:   /* We expect that dgroup is reliably "small" while nranks could be large */
995:   {
996:     MPI_Group group = MPI_GROUP_NULL;
997:     PetscMPIInt *dgroupranks;
998:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
999:     MPI_Group_size(dgroup,&groupsize);
1000:     PetscMalloc1(groupsize,&dgroupranks);
1001:     PetscMalloc1(groupsize,&groupranks);
1002:     for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1003:     if (groupsize) MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);
1004:     MPI_Group_free(&group);
1005:     PetscFree(dgroupranks);
1006:   }

1008:   /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1009:   for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1010:     for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1011:       if (InList(ranks[i],groupsize,groupranks)) break;
1012:     }
1013:     for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1014:       if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1015:     }
1016:     if (sf->ndranks < i) {                         /* Swap ranks[sf->ndranks] with ranks[i] */
1017:       PetscInt tmprank,tmpcount;

1019:       tmprank             = ranks[i];
1020:       tmpcount            = rcount[i];
1021:       ranks[i]            = ranks[sf->ndranks];
1022:       rcount[i]           = rcount[sf->ndranks];
1023:       ranks[sf->ndranks]  = tmprank;
1024:       rcount[sf->ndranks] = tmpcount;
1025:       sf->ndranks++;
1026:     }
1027:   }
1028:   PetscFree(groupranks);
1029:   PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1030:   PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1031:   sf->roffset[0] = 0;
1032:   for (i=0; i<sf->nranks; i++) {
1033:     PetscMPIIntCast(ranks[i],sf->ranks+i);
1034:     sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1035:     rcount[i]        = 0;
1036:   }
1037:   for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1038:     /* short circuit */
1039:     if (orank != sf->remote[i].rank) {
1040:       /* Search for index of iremote[i].rank in sf->ranks */
1041:       PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1042:       if (irank < 0) {
1043:         PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1044:         if (irank >= 0) irank += sf->ndranks;
1045:       }
1046:       orank = sf->remote[i].rank;
1047:     }
1049:     sf->rmine[sf->roffset[irank] + rcount[irank]]   = sf->mine ? sf->mine[i] : i;
1050:     sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1051:     rcount[irank]++;
1052:   }
1053:   PetscFree2(rcount,ranks);
1054:   return 0;
1055: }

1057: /*@C
1058:    PetscSFGetGroups - gets incoming and outgoing process groups

1060:    Collective

1062:    Input Parameter:
1063: .  sf - star forest

1065:    Output Parameters:
1066: +  incoming - group of origin processes for incoming edges (leaves that reference my roots)
1067: -  outgoing - group of destination processes for outgoing edges (roots that I reference)

1069:    Level: developer

1071: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1072: @*/
1073: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1074: {
1075:   MPI_Group      group = MPI_GROUP_NULL;

1078:   if (sf->ingroup == MPI_GROUP_NULL) {
1079:     PetscInt       i;
1080:     const PetscInt *indegree;
1081:     PetscMPIInt    rank,*outranks,*inranks;
1082:     PetscSFNode    *remote;
1083:     PetscSF        bgcount;

1085:     /* Compute the number of incoming ranks */
1086:     PetscMalloc1(sf->nranks,&remote);
1087:     for (i=0; i<sf->nranks; i++) {
1088:       remote[i].rank  = sf->ranks[i];
1089:       remote[i].index = 0;
1090:     }
1091:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1092:     PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1093:     PetscSFComputeDegreeBegin(bgcount,&indegree);
1094:     PetscSFComputeDegreeEnd(bgcount,&indegree);
1095:     /* Enumerate the incoming ranks */
1096:     PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1097:     MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1098:     for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1099:     PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1100:     PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1101:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1102:     MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1103:     MPI_Group_free(&group);
1104:     PetscFree2(inranks,outranks);
1105:     PetscSFDestroy(&bgcount);
1106:   }
1107:   *incoming = sf->ingroup;

1109:   if (sf->outgroup == MPI_GROUP_NULL) {
1110:     MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1111:     MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1112:     MPI_Group_free(&group);
1113:   }
1114:   *outgoing = sf->outgroup;
1115:   return 0;
1116: }

1118: /*@
1119:    PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters

1121:    Collective

1123:    Input Parameter:
1124: .  sf - star forest that may contain roots with 0 or with more than 1 vertex

1126:    Output Parameter:
1127: .  multi - star forest with split roots, such that each root has degree exactly 1

1129:    Level: developer

1131:    Notes:

1133:    In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1134:    directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1135:    edge, it is a candidate for future optimization that might involve its removal.

1137: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1138: @*/
1139: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1140: {
1143:   if (sf->nroots < 0) {         /* Graph has not been set yet; why do we need this? */
1144:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1145:     *multi = sf->multi;
1146:     sf->multi->multi = sf->multi;
1147:     return 0;
1148:   }
1149:   if (!sf->multi) {
1150:     const PetscInt *indegree;
1151:     PetscInt       i,*inoffset,*outones,*outoffset,maxlocal;
1152:     PetscSFNode    *remote;
1153:     maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1154:     PetscSFComputeDegreeBegin(sf,&indegree);
1155:     PetscSFComputeDegreeEnd(sf,&indegree);
1156:     PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1157:     inoffset[0] = 0;
1158:     for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1159:     for (i=0; i<maxlocal; i++) outones[i] = 1;
1160:     PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1161:     PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1162:     for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1163:     if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1164:       for (i=0; i<sf->nroots; i++) {
1166:       }
1167:     }
1168:     PetscMalloc1(sf->nleaves,&remote);
1169:     for (i=0; i<sf->nleaves; i++) {
1170:       remote[i].rank  = sf->remote[i].rank;
1171:       remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1172:     }
1173:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1174:     sf->multi->multi = sf->multi;
1175:     PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1176:     if (sf->rankorder) {        /* Sort the ranks */
1177:       PetscMPIInt rank;
1178:       PetscInt    *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1179:       PetscSFNode *newremote;
1180:       MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1181:       for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1182:       PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1183:       for (i=0; i<maxlocal; i++) outranks[i] = rank;
1184:       PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1185:       PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1186:       /* Sort the incoming ranks at each vertex, build the inverse map */
1187:       for (i=0; i<sf->nroots; i++) {
1188:         PetscInt j;
1189:         for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1190:         PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1191:         for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1192:       }
1193:       PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1194:       PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1195:       PetscMalloc1(sf->nleaves,&newremote);
1196:       for (i=0; i<sf->nleaves; i++) {
1197:         newremote[i].rank  = sf->remote[i].rank;
1198:         newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1199:       }
1200:       PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1201:       PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1202:     }
1203:     PetscFree3(inoffset,outones,outoffset);
1204:   }
1205:   *multi = sf->multi;
1206:   return 0;
1207: }

1209: /*@C
1210:    PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices

1212:    Collective

1214:    Input Parameters:
1215: +  sf - original star forest
1216: .  nselected  - number of selected roots on this process
1217: -  selected   - indices of the selected roots on this process

1219:    Output Parameter:
1220: .  esf - new star forest

1222:    Level: advanced

1224:    Note:
1225:    To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1226:    be done by calling PetscSFGetGraph().

1228: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1229: @*/
1230: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1231: {
1232:   PetscInt          i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1233:   const PetscInt    *ilocal;
1234:   signed char       *rootdata,*leafdata,*leafmem;
1235:   const PetscSFNode *iremote;
1236:   PetscSFNode       *new_iremote;
1237:   MPI_Comm          comm;

1240:   PetscSFCheckGraphSet(sf,1);

1244:   PetscSFSetUp(sf);
1245:   PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1246:   PetscObjectGetComm((PetscObject)sf,&comm);
1247:   PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);

1249:   if (PetscDefined(USE_DEBUG)) {  /* Error out if selected[] has dups or  out of range indices */
1250:     PetscBool dups;
1253:     for (i=0; i<nselected; i++)
1255:   }

1257:   if (sf->ops->CreateEmbeddedRootSF) {
1258:     (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);
1259:   } else {
1260:     /* A generic version of creating embedded sf */
1261:     PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1262:     maxlocal = maxleaf - minleaf + 1;
1263:     PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1264:     leafdata = leafmem - minleaf;
1265:     /* Tag selected roots and bcast to leaves */
1266:     for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1267:     PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1268:     PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);

1270:     /* Build esf with leaves that are still connected */
1271:     esf_nleaves = 0;
1272:     for (i=0; i<nleaves; i++) {
1273:       j = ilocal ? ilocal[i] : i;
1274:       /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1275:          with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1276:       */
1277:       esf_nleaves += (leafdata[j] ? 1 : 0);
1278:     }
1279:     PetscMalloc1(esf_nleaves,&new_ilocal);
1280:     PetscMalloc1(esf_nleaves,&new_iremote);
1281:     for (i=n=0; i<nleaves; i++) {
1282:       j = ilocal ? ilocal[i] : i;
1283:       if (leafdata[j]) {
1284:         new_ilocal[n]        = j;
1285:         new_iremote[n].rank  = iremote[i].rank;
1286:         new_iremote[n].index = iremote[i].index;
1287:         ++n;
1288:       }
1289:     }
1290:     PetscSFCreate(comm,esf);
1291:     PetscSFSetFromOptions(*esf);
1292:     PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1293:     PetscFree2(rootdata,leafmem);
1294:   }
1295:   PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1296:   return 0;
1297: }

1299: /*@C
1300:   PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices

1302:   Collective

1304:   Input Parameters:
1305: + sf - original star forest
1306: . nselected  - number of selected leaves on this process
1307: - selected   - indices of the selected leaves on this process

1309:   Output Parameter:
1310: .  newsf - new star forest

1312:   Level: advanced

1314: .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1315: @*/
1316: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1317: {
1318:   const PetscSFNode *iremote;
1319:   PetscSFNode       *new_iremote;
1320:   const PetscInt    *ilocal;
1321:   PetscInt          i,nroots,*leaves,*new_ilocal;
1322:   MPI_Comm          comm;

1325:   PetscSFCheckGraphSet(sf,1);

1329:   /* Uniq selected[] and put results in leaves[] */
1330:   PetscObjectGetComm((PetscObject)sf,&comm);
1331:   PetscMalloc1(nselected,&leaves);
1332:   PetscArraycpy(leaves,selected,nselected);
1333:   PetscSortedRemoveDupsInt(&nselected,leaves);

1336:   /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1337:   if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1338:     (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1339:   } else {
1340:     PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1341:     PetscMalloc1(nselected,&new_ilocal);
1342:     PetscMalloc1(nselected,&new_iremote);
1343:     for (i=0; i<nselected; ++i) {
1344:       const PetscInt l     = leaves[i];
1345:       new_ilocal[i]        = ilocal ? ilocal[l] : l;
1346:       new_iremote[i].rank  = iremote[l].rank;
1347:       new_iremote[i].index = iremote[l].index;
1348:     }
1349:     PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1350:     PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1351:   }
1352:   PetscFree(leaves);
1353:   return 0;
1354: }

1356: /*@C
1357:    PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()

1359:    Collective on PetscSF

1361:    Input Parameters:
1362: +  sf - star forest on which to communicate
1363: .  unit - data type associated with each node
1364: .  rootdata - buffer to broadcast
1365: -  op - operation to use for reduction

1367:    Output Parameter:
1368: .  leafdata - buffer to be reduced with values from each leaf's respective root

1370:    Level: intermediate

1372:    Notes:
1373:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1374:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1375:     use PetscSFBcastWithMemTypeBegin() instead.
1376: .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1377: @*/
1378: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1379: {
1380:   PetscMemType   rootmtype,leafmtype;

1383:   PetscSFSetUp(sf);
1384:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
1385:   PetscGetMemType(rootdata,&rootmtype);
1386:   PetscGetMemType(leafdata,&leafmtype);
1387:   (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1388:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
1389:   return 0;
1390: }

1392: /*@C
1393:    PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()

1395:    Collective on PetscSF

1397:    Input Parameters:
1398: +  sf - star forest on which to communicate
1399: .  unit - data type associated with each node
1400: .  rootmtype - memory type of rootdata
1401: .  rootdata - buffer to broadcast
1402: .  leafmtype - memory type of leafdata
1403: -  op - operation to use for reduction

1405:    Output Parameter:
1406: .  leafdata - buffer to be reduced with values from each leaf's respective root

1408:    Level: intermediate

1410: .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1411: @*/
1412: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1413: {
1415:   PetscSFSetUp(sf);
1416:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
1417:   (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1418:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
1419:   return 0;
1420: }

1422: /*@C
1423:    PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()

1425:    Collective

1427:    Input Parameters:
1428: +  sf - star forest
1429: .  unit - data type
1430: .  rootdata - buffer to broadcast
1431: -  op - operation to use for reduction

1433:    Output Parameter:
1434: .  leafdata - buffer to be reduced with values from each leaf's respective root

1436:    Level: intermediate

1438: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1439: @*/
1440: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1441: {
1443:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);
1444:   (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);
1445:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);
1446:   return 0;
1447: }

1449: /*@C
1450:    PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()

1452:    Collective

1454:    Input Parameters:
1455: +  sf - star forest
1456: .  unit - data type
1457: .  leafdata - values to reduce
1458: -  op - reduction operation

1460:    Output Parameter:
1461: .  rootdata - result of reduction of values from all leaves of each root

1463:    Level: intermediate

1465:    Notes:
1466:     When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1467:     are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1468:     use PetscSFReduceWithMemTypeBegin() instead.

1470: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1471: @*/
1472: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1473: {
1474:   PetscMemType   rootmtype,leafmtype;

1477:   PetscSFSetUp(sf);
1478:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1479:   PetscGetMemType(rootdata,&rootmtype);
1480:   PetscGetMemType(leafdata,&leafmtype);
1481:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1482:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1483:   return 0;
1484: }

1486: /*@C
1487:    PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()

1489:    Collective

1491:    Input Parameters:
1492: +  sf - star forest
1493: .  unit - data type
1494: .  leafmtype - memory type of leafdata
1495: .  leafdata - values to reduce
1496: .  rootmtype - memory type of rootdata
1497: -  op - reduction operation

1499:    Output Parameter:
1500: .  rootdata - result of reduction of values from all leaves of each root

1502:    Level: intermediate

1504: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1505: @*/
1506: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1507: {
1509:   PetscSFSetUp(sf);
1510:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1511:   (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1512:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1513:   return 0;
1514: }

1516: /*@C
1517:    PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()

1519:    Collective

1521:    Input Parameters:
1522: +  sf - star forest
1523: .  unit - data type
1524: .  leafdata - values to reduce
1525: -  op - reduction operation

1527:    Output Parameter:
1528: .  rootdata - result of reduction of values from all leaves of each root

1530:    Level: intermediate

1532: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1533: @*/
1534: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1535: {
1537:   if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1538:   (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1539:   if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1540:   return 0;
1541: }

1543: /*@C
1544:    PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1546:    Collective

1548:    Input Parameters:
1549: +  sf - star forest
1550: .  unit - data type
1551: .  leafdata - leaf values to use in reduction
1552: -  op - operation to use for reduction

1554:    Output Parameters:
1555: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1556: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1558:    Level: advanced

1560:    Note:
1561:    The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1562:    might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1563:    not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1564:    integers.

1566: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1567: @*/
1568: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1569: {
1570:   PetscMemType   rootmtype,leafmtype,leafupdatemtype;

1573:   PetscSFSetUp(sf);
1574:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1575:   PetscGetMemType(rootdata,&rootmtype);
1576:   PetscGetMemType(leafdata,&leafmtype);
1577:   PetscGetMemType(leafupdate,&leafupdatemtype);
1579:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1580:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1581:   return 0;
1582: }

1584: /*@C
1585:    PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()

1587:    Collective

1589:    Input Parameters:
1590: +  sf - star forest
1591: .  unit - data type
1592: .  rootmtype - memory type of rootdata
1593: .  leafmtype - memory type of leafdata
1594: .  leafdata - leaf values to use in reduction
1595: .  leafupdatemtype - memory type of leafupdate
1596: -  op - operation to use for reduction

1598:    Output Parameters:
1599: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1600: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1602:    Level: advanced

1604:    Note: See PetscSFFetchAndOpBegin() for more details.

1606: .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1607: @*/
1608: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1609: {
1611:   PetscSFSetUp(sf);
1612:   PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1614:   (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1615:   PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1616:   return 0;
1617: }

1619: /*@C
1620:    PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value

1622:    Collective

1624:    Input Parameters:
1625: +  sf - star forest
1626: .  unit - data type
1627: .  leafdata - leaf values to use in reduction
1628: -  op - operation to use for reduction

1630:    Output Parameters:
1631: +  rootdata - root values to be updated, input state is seen by first process to perform an update
1632: -  leafupdate - state at each leaf's respective root immediately prior to my atomic update

1634:    Level: advanced

1636: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1637: @*/
1638: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1639: {
1641:   PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1642:   (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1643:   PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1644:   return 0;
1645: }

1647: /*@C
1648:    PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()

1650:    Collective

1652:    Input Parameter:
1653: .  sf - star forest

1655:    Output Parameter:
1656: .  degree - degree of each root vertex

1658:    Level: advanced

1660:    Notes:
1661:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1663: .seealso: PetscSFGatherBegin()
1664: @*/
1665: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1666: {
1668:   PetscSFCheckGraphSet(sf,1);
1670:   if (!sf->degreeknown) {
1671:     PetscInt i, nroots = sf->nroots, maxlocal;
1673:     maxlocal = sf->maxleaf-sf->minleaf+1;
1674:     PetscMalloc1(nroots,&sf->degree);
1675:     PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1676:     for (i=0; i<nroots; i++) sf->degree[i] = 0;
1677:     for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1678:     PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1679:   }
1680:   *degree = NULL;
1681:   return 0;
1682: }

1684: /*@C
1685:    PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()

1687:    Collective

1689:    Input Parameter:
1690: .  sf - star forest

1692:    Output Parameter:
1693: .  degree - degree of each root vertex

1695:    Level: developer

1697:    Notes:
1698:    The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.

1700: .seealso:
1701: @*/
1702: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1703: {
1705:   PetscSFCheckGraphSet(sf,1);
1707:   if (!sf->degreeknown) {
1709:     PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1710:     PetscFree(sf->degreetmp);
1711:     sf->degreeknown = PETSC_TRUE;
1712:   }
1713:   *degree = sf->degree;
1714:   return 0;
1715: }

1717: /*@C
1718:    PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1719:    Each multi-root is assigned index of the corresponding original root.

1721:    Collective

1723:    Input Parameters:
1724: +  sf - star forest
1725: -  degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()

1727:    Output Parameters:
1728: +  nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1729: -  multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots

1731:    Level: developer

1733:    Notes:
1734:    The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.

1736: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1737: @*/
1738: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1739: {
1740:   PetscSF             msf;
1741:   PetscInt            i, j, k, nroots, nmroots;

1744:   PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1748:   PetscSFGetMultiSF(sf,&msf);
1749:   PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1750:   PetscMalloc1(nmroots, multiRootsOrigNumbering);
1751:   for (i=0,j=0,k=0; i<nroots; i++) {
1752:     if (!degree[i]) continue;
1753:     for (j=0; j<degree[i]; j++,k++) {
1754:       (*multiRootsOrigNumbering)[k] = i;
1755:     }
1756:   }
1758:   if (nMultiRoots) *nMultiRoots = nmroots;
1759:   return 0;
1760: }

1762: /*@C
1763:    PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()

1765:    Collective

1767:    Input Parameters:
1768: +  sf - star forest
1769: .  unit - data type
1770: -  leafdata - leaf data to gather to roots

1772:    Output Parameter:
1773: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1775:    Level: intermediate

1777: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1778: @*/
1779: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1780: {
1781:   PetscSF        multi = NULL;

1784:   PetscSFSetUp(sf);
1785:   PetscSFGetMultiSF(sf,&multi);
1786:   PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1787:   return 0;
1788: }

1790: /*@C
1791:    PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()

1793:    Collective

1795:    Input Parameters:
1796: +  sf - star forest
1797: .  unit - data type
1798: -  leafdata - leaf data to gather to roots

1800:    Output Parameter:
1801: .  multirootdata - root buffer to gather into, amount of space per root is equal to its degree

1803:    Level: intermediate

1805: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1806: @*/
1807: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1808: {
1809:   PetscSF        multi = NULL;

1812:   PetscSFGetMultiSF(sf,&multi);
1813:   PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1814:   return 0;
1815: }

1817: /*@C
1818:    PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()

1820:    Collective

1822:    Input Parameters:
1823: +  sf - star forest
1824: .  unit - data type
1825: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1827:    Output Parameter:
1828: .  leafdata - leaf data to be update with personal data from each respective root

1830:    Level: intermediate

1832: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1833: @*/
1834: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1835: {
1836:   PetscSF        multi = NULL;

1839:   PetscSFSetUp(sf);
1840:   PetscSFGetMultiSF(sf,&multi);
1841:   PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1842:   return 0;
1843: }

1845: /*@C
1846:    PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()

1848:    Collective

1850:    Input Parameters:
1851: +  sf - star forest
1852: .  unit - data type
1853: -  multirootdata - root buffer to send to each leaf, one unit of data per leaf

1855:    Output Parameter:
1856: .  leafdata - leaf data to be update with personal data from each respective root

1858:    Level: intermediate

1860: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1861: @*/
1862: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1863: {
1864:   PetscSF        multi = NULL;

1867:   PetscSFGetMultiSF(sf,&multi);
1868:   PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1869:   return 0;
1870: }

1872: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1873: {
1874:   PetscInt        i, n, nleaves;
1875:   const PetscInt *ilocal = NULL;
1876:   PetscHSetI      seen;

1878:   if (PetscDefined(USE_DEBUG)) {
1879:     PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1880:     PetscHSetICreate(&seen);
1881:     for (i = 0; i < nleaves; i++) {
1882:       const PetscInt leaf = ilocal ? ilocal[i] : i;
1883:       PetscHSetIAdd(seen,leaf);
1884:     }
1885:     PetscHSetIGetSize(seen,&n);
1887:     PetscHSetIDestroy(&seen);
1888:   }
1889:   return 0;
1890: }

1892: /*@
1893:   PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view

1895:   Input Parameters:
1896: + sfA - The first PetscSF
1897: - sfB - The second PetscSF

1899:   Output Parameters:
1900: . sfBA - The composite SF

1902:   Level: developer

1904:   Notes:
1905:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1906:   forests, i.e. the same leaf is not connected with different roots.

1908:   sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1909:   a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1910:   nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1911:   Bcast on sfA, then a Bcast on sfB, on connected nodes.

1913: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1914: @*/
1915: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1916: {
1917:   const PetscSFNode *remotePointsA,*remotePointsB;
1918:   PetscSFNode       *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1919:   const PetscInt    *localPointsA,*localPointsB;
1920:   PetscInt          *localPointsBA;
1921:   PetscInt          i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1922:   PetscBool         denseB;

1925:   PetscSFCheckGraphSet(sfA,1);
1927:   PetscSFCheckGraphSet(sfB,2);
1930:   PetscSFCheckLeavesUnique_Private(sfA);
1931:   PetscSFCheckLeavesUnique_Private(sfB);

1933:   PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1934:   PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1935:   if (localPointsA) {
1936:     PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1937:     for (i=0; i<numRootsB; i++) {
1938:       reorderedRemotePointsA[i].rank = -1;
1939:       reorderedRemotePointsA[i].index = -1;
1940:     }
1941:     for (i=0; i<numLeavesA; i++) {
1942:       if (localPointsA[i] >= numRootsB) continue;
1943:       reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1944:     }
1945:     remotePointsA = reorderedRemotePointsA;
1946:   }
1947:   PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1948:   PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1949:   PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
1950:   PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
1951:   PetscFree(reorderedRemotePointsA);

1953:   denseB = (PetscBool)!localPointsB;
1954:   for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1955:     if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1956:     else numLeavesBA++;
1957:   }
1958:   if (denseB) {
1959:     localPointsBA  = NULL;
1960:     remotePointsBA = leafdataB;
1961:   } else {
1962:     PetscMalloc1(numLeavesBA,&localPointsBA);
1963:     PetscMalloc1(numLeavesBA,&remotePointsBA);
1964:     for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1965:       const PetscInt l = localPointsB ? localPointsB[i] : i;

1967:       if (leafdataB[l-minleaf].rank == -1) continue;
1968:       remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1969:       localPointsBA[numLeavesBA] = l;
1970:       numLeavesBA++;
1971:     }
1972:     PetscFree(leafdataB);
1973:   }
1974:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1975:   PetscSFSetFromOptions(*sfBA);
1976:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1977:   return 0;
1978: }

1980: /*@
1981:   PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one

1983:   Input Parameters:
1984: + sfA - The first PetscSF
1985: - sfB - The second PetscSF

1987:   Output Parameters:
1988: . sfBA - The composite SF.

1990:   Level: developer

1992:   Notes:
1993:   Currently, the two SFs must be defined on congruent communicators and they must be true star
1994:   forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1995:   second SF must have a degree of 1, i.e., no roots have more than one leaf connected.

1997:   sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1998:   a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1999:   roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2000:   a Reduce on sfB, on connected roots.

2002: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2003: @*/
2004: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2005: {
2006:   const PetscSFNode *remotePointsA,*remotePointsB;
2007:   PetscSFNode       *remotePointsBA;
2008:   const PetscInt    *localPointsA,*localPointsB;
2009:   PetscSFNode       *reorderedRemotePointsA = NULL;
2010:   PetscInt          i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2011:   MPI_Op            op;
2012: #if defined(PETSC_USE_64BIT_INDICES)
2013:   PetscBool         iswin;
2014: #endif

2017:   PetscSFCheckGraphSet(sfA,1);
2019:   PetscSFCheckGraphSet(sfB,2);
2022:   PetscSFCheckLeavesUnique_Private(sfA);
2023:   PetscSFCheckLeavesUnique_Private(sfB);

2025:   PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2026:   PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);

2028:   /* TODO: Check roots of sfB have degree of 1 */
2029:   /* Once we implement it, we can replace the MPI_MAXLOC
2030:      with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2031:      We use MPI_MAXLOC only to have a deterministic output from this routine if
2032:      the root condition is not meet.
2033:    */
2034:   op = MPI_MAXLOC;
2035: #if defined(PETSC_USE_64BIT_INDICES)
2036:   /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2037:   PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2038:   if (iswin) op = MPI_REPLACE;
2039: #endif

2041:   PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2042:   PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2043:   for (i=0; i<maxleaf - minleaf + 1; i++) {
2044:     reorderedRemotePointsA[i].rank = -1;
2045:     reorderedRemotePointsA[i].index = -1;
2046:   }
2047:   if (localPointsA) {
2048:     for (i=0; i<numLeavesA; i++) {
2049:       if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2050:       reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2051:     }
2052:   } else {
2053:     for (i=0; i<numLeavesA; i++) {
2054:       if (i > maxleaf || i < minleaf) continue;
2055:       reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2056:     }
2057:   }

2059:   PetscMalloc1(numRootsB,&localPointsBA);
2060:   PetscMalloc1(numRootsB,&remotePointsBA);
2061:   for (i=0; i<numRootsB; i++) {
2062:     remotePointsBA[i].rank = -1;
2063:     remotePointsBA[i].index = -1;
2064:   }

2066:   PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2067:   PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2068:   PetscFree(reorderedRemotePointsA);
2069:   for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2070:     if (remotePointsBA[i].rank == -1) continue;
2071:     remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2072:     remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2073:     localPointsBA[numLeavesBA] = i;
2074:     numLeavesBA++;
2075:   }
2076:   PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2077:   PetscSFSetFromOptions(*sfBA);
2078:   PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2079:   return 0;
2080: }

2082: /*
2083:   PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF

2085:   Input Parameters:
2086: . sf - The global PetscSF

2088:   Output Parameters:
2089: . out - The local PetscSF
2090:  */
2091: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2092: {
2093:   MPI_Comm           comm;
2094:   PetscMPIInt        myrank;
2095:   const PetscInt     *ilocal;
2096:   const PetscSFNode  *iremote;
2097:   PetscInt           i,j,nroots,nleaves,lnleaves,*lilocal;
2098:   PetscSFNode        *liremote;
2099:   PetscSF            lsf;

2102:   if (sf->ops->CreateLocalSF) {
2103:     (*sf->ops->CreateLocalSF)(sf,out);
2104:   } else {
2105:     /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2106:     PetscObjectGetComm((PetscObject)sf,&comm);
2107:     MPI_Comm_rank(comm,&myrank);

2109:     /* Find out local edges and build a local SF */
2110:     PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2111:     for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2112:     PetscMalloc1(lnleaves,&lilocal);
2113:     PetscMalloc1(lnleaves,&liremote);

2115:     for (i=j=0; i<nleaves; i++) {
2116:       if (iremote[i].rank == (PetscInt)myrank) {
2117:         lilocal[j]        = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2118:         liremote[j].rank  = 0; /* rank in PETSC_COMM_SELF */
2119:         liremote[j].index = iremote[i].index;
2120:         j++;
2121:       }
2122:     }
2123:     PetscSFCreate(PETSC_COMM_SELF,&lsf);
2124:     PetscSFSetFromOptions(lsf);
2125:     PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2126:     PetscSFSetUp(lsf);
2127:     *out = lsf;
2128:   }
2129:   return 0;
2130: }

2132: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2133: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2134: {
2135:   PetscMemType       rootmtype,leafmtype;

2138:   PetscSFSetUp(sf);
2139:   PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
2140:   PetscGetMemType(rootdata,&rootmtype);
2141:   PetscGetMemType(leafdata,&leafmtype);
2142:   if (sf->ops->BcastToZero) {
2143:     (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2144:   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2145:   PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
2146:   return 0;
2147: }

2149: /*@
2150:   PetscSFConcatenate - concatenate multiple SFs into one

2152:   Input Parameters:
2153: + comm - the communicator
2154: . nsfs - the number of input PetscSF
2155: . sfs  - the array of input PetscSF
2156: . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2157: - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage

2159:   Output Parameters:
2160: . newsf - The resulting PetscSF

2162:   Level: developer

2164:   Notes:
2165:   The communicator of all SFs in sfs must be comm.

2167:   The offsets in leafOffsets are added to the original leaf indices.

2169:   If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2170:   In this case, NULL is also passed as ilocal to the resulting SF.

2172:   If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2173:   In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).

2175: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2176: @*/
2177: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2178: {
2179:   PetscInt            i, s, nLeaves, nRoots;
2180:   PetscInt           *leafArrayOffsets;
2181:   PetscInt           *ilocal_new;
2182:   PetscSFNode        *iremote_new;
2183:   PetscInt           *rootOffsets;
2184:   PetscBool           all_ilocal_null = PETSC_FALSE;
2185:   PetscMPIInt         rank;

2187:   {
2188:     PetscSF dummy; /* just to have a PetscObject on comm for input validation */

2190:     PetscSFCreate(comm,&dummy);
2193:     for (i=0; i<nsfs; i++) {
2196:     }
2200:     PetscSFDestroy(&dummy);
2201:   }
2202:   if (!nsfs) {
2203:     PetscSFCreate(comm, newsf);
2204:     PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
2205:     return 0;
2206:   }
2207:   MPI_Comm_rank(comm, &rank);

2209:   PetscCalloc1(nsfs+1, &rootOffsets);
2210:   if (shareRoots) {
2211:     PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);
2212:     if (PetscDefined(USE_DEBUG)) {
2213:       for (s=1; s<nsfs; s++) {
2214:         PetscInt nr;

2216:         PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2218:       }
2219:     }
2220:   } else {
2221:     for (s=0; s<nsfs; s++) {
2222:       PetscInt nr;

2224:       PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2225:       rootOffsets[s+1] = rootOffsets[s] + nr;
2226:     }
2227:     nRoots = rootOffsets[nsfs];
2228:   }

2230:   /* Calculate leaf array offsets and automatic root offsets */
2231:   PetscMalloc1(nsfs+1,&leafArrayOffsets);
2232:   leafArrayOffsets[0] = 0;
2233:   for (s=0; s<nsfs; s++) {
2234:     PetscInt        nl;

2236:     PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);
2237:     leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2238:   }
2239:   nLeaves = leafArrayOffsets[nsfs];

2241:   if (!leafOffsets) {
2242:     all_ilocal_null = PETSC_TRUE;
2243:     for (s=0; s<nsfs; s++) {
2244:       const PetscInt *ilocal;

2246:       PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);
2247:       if (ilocal) {
2248:         all_ilocal_null = PETSC_FALSE;
2249:         break;
2250:       }
2251:     }
2253:   }

2255:   /* Renumber and concatenate local leaves */
2256:   ilocal_new = NULL;
2257:   if (!all_ilocal_null) {
2258:     PetscMalloc1(nLeaves, &ilocal_new);
2259:     for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2260:     for (s = 0; s<nsfs; s++) {
2261:       const PetscInt   *ilocal;
2262:       PetscInt         *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2263:       PetscInt          i, nleaves_l;

2265:       PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);
2266:       for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2267:     }
2268:   }

2270:   /* Renumber and concatenate remote roots */
2271:   PetscMalloc1(nLeaves, &iremote_new);
2272:   for (i = 0; i < nLeaves; i++) {
2273:     iremote_new[i].rank   = -1;
2274:     iremote_new[i].index  = -1;
2275:   }
2276:   for (s = 0; s<nsfs; s++) {
2277:     PetscInt            i, nl, nr;
2278:     PetscSF             tmp_sf;
2279:     const PetscSFNode  *iremote;
2280:     PetscSFNode        *tmp_rootdata;
2281:     PetscSFNode        *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];

2283:     PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);
2284:     PetscSFCreate(comm, &tmp_sf);
2285:     /* create helper SF with contiguous leaves */
2286:     PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES);
2287:     PetscSFSetUp(tmp_sf);
2288:     PetscMalloc1(nr, &tmp_rootdata);
2289:     for (i = 0; i < nr; i++) {
2290:       tmp_rootdata[i].index = i + rootOffsets[s];
2291:       tmp_rootdata[i].rank  = (PetscInt) rank;
2292:     }
2293:     PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2294:     PetscSFBcastEnd(  tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2295:     PetscSFDestroy(&tmp_sf);
2296:     PetscFree(tmp_rootdata);
2297:   }

2299:   /* Build the new SF */
2300:   PetscSFCreate(comm, newsf);
2301:   PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);
2302:   PetscSFSetUp(*newsf);
2303:   PetscFree(rootOffsets);
2304:   PetscFree(leafArrayOffsets);
2305:   return 0;
2306: }