Actual source code: sf.c
1: #include <petsc/private/sfimpl.h>
2: #include <petsc/private/hashseti.h>
3: #include <petsc/private/viewerimpl.h>
4: #include <petscctable.h>
6: #if defined(PETSC_HAVE_CUDA)
7: #include <cuda_runtime.h>
8: #endif
10: #if defined(PETSC_HAVE_HIP)
11: #include <hip/hip_runtime.h>
12: #endif
14: #if defined(PETSC_CLANG_STATIC_ANALYZER)
15: void PetscSFCheckGraphSet(PetscSF,int);
16: #else
17: #if defined(PETSC_USE_DEBUG)
18: # define PetscSFCheckGraphSet(sf,arg) do { \
19: if (PetscUnlikely(!(sf)->graphset)) \
20: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call PetscSFSetGraph() or PetscSFSetGraphWithPattern() on argument %d \"%s\" before %s()",(arg),#sf,PETSC_FUNCTION_NAME); \
21: } while (0)
22: #else
23: # define PetscSFCheckGraphSet(sf,arg) do {} while (0)
24: #endif
25: #endif
27: const char *const PetscSFDuplicateOptions[] = {"CONFONLY","RANKS","GRAPH","PetscSFDuplicateOption","PETSCSF_DUPLICATE_",NULL};
29: /*@
30: PetscSFCreate - create a star forest communication context
32: Collective
34: Input Parameter:
35: . comm - communicator on which the star forest will operate
37: Output Parameter:
38: . sf - new star forest context
40: Options Database Keys:
41: + -sf_type basic -Use MPI persistent Isend/Irecv for communication (Default)
42: . -sf_type window -Use MPI-3 one-sided window for communication
43: - -sf_type neighbor -Use MPI-3 neighborhood collectives for communication
45: Level: intermediate
47: Notes:
48: When one knows the communication graph is one of the predefined graph, such as MPI_Alltoall, MPI_Allgatherv,
49: MPI_Gatherv, one can create a PetscSF and then set its graph with PetscSFSetGraphWithPattern(). These special
50: SFs are optimized and they have better performance than general SFs.
52: .seealso: PetscSFSetGraph(), PetscSFSetGraphWithPattern(), PetscSFDestroy()
53: @*/
54: PetscErrorCode PetscSFCreate(MPI_Comm comm,PetscSF *sf)
55: {
56: PetscSF b;
59: PetscSFInitializePackage();
61: PetscHeaderCreate(b,PETSCSF_CLASSID,"PetscSF","Star Forest","PetscSF",comm,PetscSFDestroy,PetscSFView);
63: b->nroots = -1;
64: b->nleaves = -1;
65: b->minleaf = PETSC_MAX_INT;
66: b->maxleaf = PETSC_MIN_INT;
67: b->nranks = -1;
68: b->rankorder = PETSC_TRUE;
69: b->ingroup = MPI_GROUP_NULL;
70: b->outgroup = MPI_GROUP_NULL;
71: b->graphset = PETSC_FALSE;
72: #if defined(PETSC_HAVE_DEVICE)
73: b->use_gpu_aware_mpi = use_gpu_aware_mpi;
74: b->use_stream_aware_mpi = PETSC_FALSE;
75: b->unknown_input_stream= PETSC_FALSE;
76: #if defined(PETSC_HAVE_KOKKOS) /* Prefer kokkos over cuda*/
77: b->backend = PETSCSF_BACKEND_KOKKOS;
78: #elif defined(PETSC_HAVE_CUDA)
79: b->backend = PETSCSF_BACKEND_CUDA;
80: #elif defined(PETSC_HAVE_HIP)
81: b->backend = PETSCSF_BACKEND_HIP;
82: #endif
84: #if defined(PETSC_HAVE_NVSHMEM)
85: b->use_nvshmem = PETSC_FALSE; /* Default is not to try NVSHMEM */
86: b->use_nvshmem_get = PETSC_FALSE; /* Default is to use nvshmem_put based protocol */
87: PetscOptionsGetBool(NULL,NULL,"-use_nvshmem",&b->use_nvshmem,NULL);
88: PetscOptionsGetBool(NULL,NULL,"-use_nvshmem_get",&b->use_nvshmem_get,NULL);
89: #endif
90: #endif
91: b->vscat.from_n = -1;
92: b->vscat.to_n = -1;
93: b->vscat.unit = MPIU_SCALAR;
94: *sf = b;
95: return 0;
96: }
98: /*@
99: PetscSFReset - Reset a star forest so that different sizes or neighbors can be used
101: Collective
103: Input Parameter:
104: . sf - star forest
106: Level: advanced
108: .seealso: PetscSFCreate(), PetscSFSetGraph(), PetscSFDestroy()
109: @*/
110: PetscErrorCode PetscSFReset(PetscSF sf)
111: {
113: if (sf->ops->Reset) (*sf->ops->Reset)(sf);
114: sf->nroots = -1;
115: sf->nleaves = -1;
116: sf->minleaf = PETSC_MAX_INT;
117: sf->maxleaf = PETSC_MIN_INT;
118: sf->mine = NULL;
119: sf->remote = NULL;
120: sf->graphset = PETSC_FALSE;
121: PetscFree(sf->mine_alloc);
122: PetscFree(sf->remote_alloc);
123: sf->nranks = -1;
124: PetscFree4(sf->ranks,sf->roffset,sf->rmine,sf->rremote);
125: sf->degreeknown = PETSC_FALSE;
126: PetscFree(sf->degree);
127: if (sf->ingroup != MPI_GROUP_NULL) MPI_Group_free(&sf->ingroup);
128: if (sf->outgroup != MPI_GROUP_NULL) MPI_Group_free(&sf->outgroup);
129: if (sf->multi) sf->multi->multi = NULL;
130: PetscSFDestroy(&sf->multi);
131: PetscLayoutDestroy(&sf->map);
133: #if defined(PETSC_HAVE_DEVICE)
134: for (PetscInt i=0; i<2; i++) PetscSFFree(sf,PETSC_MEMTYPE_DEVICE,sf->rmine_d[i]);
135: #endif
137: sf->setupcalled = PETSC_FALSE;
138: return 0;
139: }
141: /*@C
142: PetscSFSetType - Set the PetscSF communication implementation
144: Collective on PetscSF
146: Input Parameters:
147: + sf - the PetscSF context
148: - type - a known method
150: Options Database Key:
151: . -sf_type <type> - Sets the method; use -help for a list
152: of available methods (for instance, window, basic, neighbor)
154: Notes:
155: See "include/petscsf.h" for available methods (for instance)
156: + PETSCSFWINDOW - MPI-2/3 one-sided
157: - PETSCSFBASIC - basic implementation using MPI-1 two-sided
159: Level: intermediate
161: .seealso: PetscSFType, PetscSFCreate()
162: @*/
163: PetscErrorCode PetscSFSetType(PetscSF sf,PetscSFType type)
164: {
165: PetscBool match;
166: PetscErrorCode (*r)(PetscSF);
171: PetscObjectTypeCompare((PetscObject)sf,type,&match);
172: if (match) return 0;
174: PetscFunctionListFind(PetscSFList,type,&r);
176: /* Destroy the previous PetscSF implementation context */
177: if (sf->ops->Destroy) (*(sf)->ops->Destroy)(sf);
178: PetscMemzero(sf->ops,sizeof(*sf->ops));
179: PetscObjectChangeTypeName((PetscObject)sf,type);
180: (*r)(sf);
181: return 0;
182: }
184: /*@C
185: PetscSFGetType - Get the PetscSF communication implementation
187: Not Collective
189: Input Parameter:
190: . sf - the PetscSF context
192: Output Parameter:
193: . type - the PetscSF type name
195: Level: intermediate
197: .seealso: PetscSFSetType(), PetscSFCreate()
198: @*/
199: PetscErrorCode PetscSFGetType(PetscSF sf, PetscSFType *type)
200: {
203: *type = ((PetscObject)sf)->type_name;
204: return 0;
205: }
207: /*@C
208: PetscSFDestroy - destroy star forest
210: Collective
212: Input Parameter:
213: . sf - address of star forest
215: Level: intermediate
217: .seealso: PetscSFCreate(), PetscSFReset()
218: @*/
219: PetscErrorCode PetscSFDestroy(PetscSF *sf)
220: {
221: if (!*sf) return 0;
223: if (--((PetscObject)(*sf))->refct > 0) {*sf = NULL; return 0;}
224: PetscSFReset(*sf);
225: if ((*sf)->ops->Destroy) (*(*sf)->ops->Destroy)(*sf);
226: PetscSFDestroy(&(*sf)->vscat.lsf);
227: if ((*sf)->vscat.bs > 1) MPI_Type_free(&(*sf)->vscat.unit);
228: PetscHeaderDestroy(sf);
229: return 0;
230: }
232: static PetscErrorCode PetscSFCheckGraphValid_Private(PetscSF sf)
233: {
234: PetscInt i, nleaves;
235: PetscMPIInt size;
236: const PetscInt *ilocal;
237: const PetscSFNode *iremote;
239: if (!sf->graphset || !PetscDefined(USE_DEBUG)) return 0;
240: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,&iremote);
241: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
242: for (i = 0; i < nleaves; i++) {
243: const PetscInt rank = iremote[i].rank;
244: const PetscInt remote = iremote[i].index;
245: const PetscInt leaf = ilocal ? ilocal[i] : i;
249: }
250: return 0;
251: }
253: /*@
254: PetscSFSetUp - set up communication structures
256: Collective
258: Input Parameter:
259: . sf - star forest communication object
261: Level: beginner
263: .seealso: PetscSFSetFromOptions(), PetscSFSetType()
264: @*/
265: PetscErrorCode PetscSFSetUp(PetscSF sf)
266: {
268: PetscSFCheckGraphSet(sf,1);
269: if (sf->setupcalled) return 0;
270: PetscLogEventBegin(PETSCSF_SetUp,sf,0,0,0);
271: PetscSFCheckGraphValid_Private(sf);
272: if (!((PetscObject)sf)->type_name) PetscSFSetType(sf,PETSCSFBASIC); /* Zero all sf->ops */
273: if (sf->ops->SetUp) (*sf->ops->SetUp)(sf);
274: #if defined(PETSC_HAVE_CUDA)
275: if (sf->backend == PETSCSF_BACKEND_CUDA) {
276: sf->ops->Malloc = PetscSFMalloc_CUDA;
277: sf->ops->Free = PetscSFFree_CUDA;
278: }
279: #endif
280: #if defined(PETSC_HAVE_HIP)
281: if (sf->backend == PETSCSF_BACKEND_HIP) {
282: sf->ops->Malloc = PetscSFMalloc_HIP;
283: sf->ops->Free = PetscSFFree_HIP;
284: }
285: #endif
287: #
288: #if defined(PETSC_HAVE_KOKKOS)
289: if (sf->backend == PETSCSF_BACKEND_KOKKOS) {
290: sf->ops->Malloc = PetscSFMalloc_Kokkos;
291: sf->ops->Free = PetscSFFree_Kokkos;
292: }
293: #endif
294: PetscLogEventEnd(PETSCSF_SetUp,sf,0,0,0);
295: sf->setupcalled = PETSC_TRUE;
296: return 0;
297: }
299: /*@
300: PetscSFSetFromOptions - set PetscSF options using the options database
302: Logically Collective
304: Input Parameter:
305: . sf - star forest
307: Options Database Keys:
308: + -sf_type - implementation type, see PetscSFSetType()
309: . -sf_rank_order - sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise
310: . -sf_use_default_stream - Assume callers of SF computed the input root/leafdata with the default cuda stream. SF will also
311: use the default stream to process data. Therefore, no stream synchronization is needed between SF and its caller (default: true).
312: If true, this option only works with -use_gpu_aware_mpi 1.
313: . -sf_use_stream_aware_mpi - Assume the underlying MPI is cuda-stream aware and SF won't sync streams for send/recv buffers passed to MPI (default: false).
314: If true, this option only works with -use_gpu_aware_mpi 1.
316: - -sf_backend cuda | hip | kokkos -Select the device backend SF uses. Currently SF has these backends: cuda, hip and Kokkos.
317: On CUDA (HIP) devices, one can choose cuda (hip) or kokkos with the default being kokkos. On other devices,
318: the only available is kokkos.
320: Level: intermediate
321: @*/
322: PetscErrorCode PetscSFSetFromOptions(PetscSF sf)
323: {
324: PetscSFType deft;
325: char type[256];
327: PetscBool flg;
330: PetscObjectOptionsBegin((PetscObject)sf);
331: deft = ((PetscObject)sf)->type_name ? ((PetscObject)sf)->type_name : PETSCSFBASIC;
332: PetscOptionsFList("-sf_type","PetscSF implementation type","PetscSFSetType",PetscSFList,deft,type,sizeof(type),&flg);
333: PetscSFSetType(sf,flg ? type : deft);
334: PetscOptionsBool("-sf_rank_order","sort composite points for gathers and scatters in rank order, gathers are non-deterministic otherwise","PetscSFSetRankOrder",sf->rankorder,&sf->rankorder,NULL);
335: #if defined(PETSC_HAVE_DEVICE)
336: {
337: char backendstr[32] = {0};
338: PetscBool isCuda = PETSC_FALSE,isHip = PETSC_FALSE,isKokkos = PETSC_FALSE,set;
339: /* Change the defaults set in PetscSFCreate() with command line options */
340: PetscOptionsBool("-sf_unknown_input_stream","SF root/leafdata is computed on arbitary streams unknown to SF","PetscSFSetFromOptions",sf->unknown_input_stream,&sf->unknown_input_stream,NULL);
341: PetscOptionsBool("-sf_use_stream_aware_mpi","Assume the underlying MPI is cuda-stream aware","PetscSFSetFromOptions",sf->use_stream_aware_mpi,&sf->use_stream_aware_mpi,NULL);
342: PetscOptionsString("-sf_backend","Select the device backend SF uses","PetscSFSetFromOptions",NULL,backendstr,sizeof(backendstr),&set);
343: PetscStrcasecmp("cuda",backendstr,&isCuda);
344: PetscStrcasecmp("kokkos",backendstr,&isKokkos);
345: PetscStrcasecmp("hip",backendstr,&isHip);
346: #if defined(PETSC_HAVE_CUDA) || defined(PETSC_HAVE_HIP)
347: if (isCuda) sf->backend = PETSCSF_BACKEND_CUDA;
348: else if (isKokkos) sf->backend = PETSCSF_BACKEND_KOKKOS;
349: else if (isHip) sf->backend = PETSCSF_BACKEND_HIP;
351: #elif defined(PETSC_HAVE_KOKKOS)
353: #endif
354: }
355: #endif
356: if (sf->ops->SetFromOptions) (*sf->ops->SetFromOptions)(PetscOptionsObject,sf);
357: PetscOptionsEnd();
358: return 0;
359: }
361: /*@
362: PetscSFSetRankOrder - sort multi-points for gathers and scatters by rank order
364: Logically Collective
366: Input Parameters:
367: + sf - star forest
368: - flg - PETSC_TRUE to sort, PETSC_FALSE to skip sorting (lower setup cost, but non-deterministic)
370: Level: advanced
372: .seealso: PetscSFGatherBegin(), PetscSFScatterBegin()
373: @*/
374: PetscErrorCode PetscSFSetRankOrder(PetscSF sf,PetscBool flg)
375: {
379: sf->rankorder = flg;
380: return 0;
381: }
383: /*@C
384: PetscSFSetGraph - Set a parallel star forest
386: Collective
388: Input Parameters:
389: + sf - star forest
390: . nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
391: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
392: . ilocal - locations of leaves in leafdata buffers, pass NULL for contiguous storage (locations must be >= 0, enforced
393: during setup in debug mode)
394: . localmode - copy mode for ilocal
395: . iremote - remote locations of root vertices for each leaf on the current process (locations must be >= 0, enforced
396: during setup in debug mode)
397: - remotemode - copy mode for iremote
399: Level: intermediate
401: Notes:
402: Leaf indices in ilocal must be unique, otherwise an error occurs.
404: Input arrays ilocal and iremote follow the PetscCopyMode semantics.
405: In particular, if localmode/remotemode is PETSC_OWN_POINTER or PETSC_USE_POINTER,
406: PETSc might modify the respective array;
407: if PETSC_USE_POINTER, the user must delete the array after PetscSFDestroy().
408: Only if PETSC_COPY_VALUES is used, the respective array is guaranteed to stay intact and a const array can be passed (but a cast to non-const is needed).
410: Fortran Notes:
411: In Fortran you must use PETSC_COPY_VALUES for localmode and remotemode.
413: Developer Notes:
414: We sort leaves to check for duplicates and contiguousness and to find minleaf/maxleaf.
415: This also allows to compare leaf sets of two SFs easily.
417: .seealso: PetscSFCreate(), PetscSFView(), PetscSFGetGraph()
418: @*/
419: PetscErrorCode PetscSFSetGraph(PetscSF sf,PetscInt nroots,PetscInt nleaves,PetscInt *ilocal,PetscCopyMode localmode,PetscSFNode *iremote,PetscCopyMode remotemode)
420: {
421: PetscBool unique, contiguous;
431: if (sf->nroots >= 0) { /* Reset only if graph already set */
432: PetscSFReset(sf);
433: }
435: PetscLogEventBegin(PETSCSF_SetGraph,sf,0,0,0);
437: sf->nroots = nroots;
438: sf->nleaves = nleaves;
440: if (localmode == PETSC_COPY_VALUES && ilocal) {
441: PetscInt *tlocal = NULL;
443: PetscMalloc1(nleaves,&tlocal);
444: PetscArraycpy(tlocal,ilocal,nleaves);
445: ilocal = tlocal;
446: }
447: if (remotemode == PETSC_COPY_VALUES) {
448: PetscSFNode *tremote = NULL;
450: PetscMalloc1(nleaves,&tremote);
451: PetscArraycpy(tremote,iremote,nleaves);
452: iremote = tremote;
453: }
455: if (nleaves && ilocal) {
456: PetscSFNode work;
458: PetscSortIntWithDataArray(nleaves, ilocal, iremote, sizeof(PetscSFNode), &work);
459: PetscSortedCheckDupsInt(nleaves, ilocal, &unique);
460: unique = PetscNot(unique);
462: sf->minleaf = ilocal[0];
463: sf->maxleaf = ilocal[nleaves-1];
464: contiguous = (PetscBool) (unique && ilocal[0] == 0 && ilocal[nleaves-1] == nleaves-1);
465: } else {
466: sf->minleaf = 0;
467: sf->maxleaf = nleaves - 1;
468: unique = PETSC_TRUE;
469: contiguous = PETSC_TRUE;
470: }
472: if (contiguous) {
473: if (localmode == PETSC_USE_POINTER) {
474: ilocal = NULL;
475: } else {
476: PetscFree(ilocal);
477: }
478: }
479: sf->mine = ilocal;
480: if (localmode == PETSC_USE_POINTER) {
481: sf->mine_alloc = NULL;
482: } else {
483: sf->mine_alloc = ilocal;
484: }
485: sf->remote = iremote;
486: if (remotemode == PETSC_USE_POINTER) {
487: sf->remote_alloc = NULL;
488: } else {
489: sf->remote_alloc = iremote;
490: }
491: PetscLogEventEnd(PETSCSF_SetGraph,sf,0,0,0);
492: sf->graphset = PETSC_TRUE;
493: return 0;
494: }
496: /*@
497: PetscSFSetGraphWithPattern - Sets the graph of an SF with a specific pattern
499: Collective
501: Input Parameters:
502: + sf - The PetscSF
503: . map - Layout of roots over all processes (insignificant when pattern is PETSCSF_PATTERN_ALLTOALL)
504: - pattern - One of PETSCSF_PATTERN_ALLGATHER, PETSCSF_PATTERN_GATHER, PETSCSF_PATTERN_ALLTOALL
506: Notes:
507: It is easier to explain PetscSFPattern using vectors. Suppose we have an MPI vector x and its layout is map.
508: n and N are local and global sizes of x respectively.
510: With PETSCSF_PATTERN_ALLGATHER, the routine creates a graph that if one does Bcast on it, it will copy x to
511: sequential vectors y on all ranks.
513: With PETSCSF_PATTERN_GATHER, the routine creates a graph that if one does Bcast on it, it will copy x to a
514: sequential vector y on rank 0.
516: In above cases, entries of x are roots and entries of y are leaves.
518: With PETSCSF_PATTERN_ALLTOALL, map is insignificant. Suppose NP is size of sf's communicator. The routine
519: creates a graph that every rank has NP leaves and NP roots. On rank i, its leaf j is connected to root i
520: of rank j. Here 0 <=i,j<NP. It is a kind of MPI_Alltoall with sendcount/recvcount being 1. Note that it does
521: not mean one can not send multiple items. One just needs to create a new MPI datatype for the mulptiple data
522: items with MPI_Type_contiguous() and use that as the <unit> argument in SF routines.
524: In this case, roots and leaves are symmetric.
526: Level: intermediate
527: @*/
528: PetscErrorCode PetscSFSetGraphWithPattern(PetscSF sf,PetscLayout map,PetscSFPattern pattern)
529: {
530: MPI_Comm comm;
531: PetscInt n,N,res[2];
532: PetscMPIInt rank,size;
533: PetscSFType type;
537: PetscObjectGetComm((PetscObject)sf, &comm);
539: MPI_Comm_rank(comm,&rank);
540: MPI_Comm_size(comm,&size);
542: if (pattern == PETSCSF_PATTERN_ALLTOALL) {
543: type = PETSCSFALLTOALL;
544: PetscLayoutCreate(comm,&sf->map);
545: PetscLayoutSetLocalSize(sf->map,size);
546: PetscLayoutSetSize(sf->map,((PetscInt)size)*size);
547: PetscLayoutSetUp(sf->map);
548: } else {
549: PetscLayoutGetLocalSize(map,&n);
550: PetscLayoutGetSize(map,&N);
551: res[0] = n;
552: res[1] = -n;
553: /* Check if n are same over all ranks so that we can optimize it */
554: MPIU_Allreduce(MPI_IN_PLACE,res,2,MPIU_INT,MPI_MAX,comm);
555: if (res[0] == -res[1]) { /* same n */
556: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHER : PETSCSFGATHER;
557: } else {
558: type = (pattern == PETSCSF_PATTERN_ALLGATHER) ? PETSCSFALLGATHERV : PETSCSFGATHERV;
559: }
560: PetscLayoutReference(map,&sf->map);
561: }
562: PetscSFSetType(sf,type);
564: sf->pattern = pattern;
565: sf->mine = NULL; /* Contiguous */
567: /* Set nleaves, nroots here in case user calls PetscSFGetGraph, which is legal to call even before PetscSFSetUp is called.
568: Also set other easy stuff.
569: */
570: if (pattern == PETSCSF_PATTERN_ALLGATHER) {
571: sf->nleaves = N;
572: sf->nroots = n;
573: sf->nranks = size;
574: sf->minleaf = 0;
575: sf->maxleaf = N - 1;
576: } else if (pattern == PETSCSF_PATTERN_GATHER) {
577: sf->nleaves = rank ? 0 : N;
578: sf->nroots = n;
579: sf->nranks = rank ? 0 : size;
580: sf->minleaf = 0;
581: sf->maxleaf = rank ? -1 : N - 1;
582: } else if (pattern == PETSCSF_PATTERN_ALLTOALL) {
583: sf->nleaves = size;
584: sf->nroots = size;
585: sf->nranks = size;
586: sf->minleaf = 0;
587: sf->maxleaf = size - 1;
588: }
589: sf->ndranks = 0; /* We do not need to separate out distinguished ranks for patterned graphs to improve communication performance */
590: sf->graphset = PETSC_TRUE;
591: return 0;
592: }
594: /*@
595: PetscSFCreateInverseSF - given a PetscSF in which all vertices have degree 1, creates the inverse map
597: Collective
599: Input Parameter:
600: . sf - star forest to invert
602: Output Parameter:
603: . isf - inverse of sf
605: Level: advanced
607: Notes:
608: All roots must have degree 1.
610: The local space may be a permutation, but cannot be sparse.
612: .seealso: PetscSFSetGraph()
613: @*/
614: PetscErrorCode PetscSFCreateInverseSF(PetscSF sf,PetscSF *isf)
615: {
616: PetscMPIInt rank;
617: PetscInt i,nroots,nleaves,maxlocal,count,*newilocal;
618: const PetscInt *ilocal;
619: PetscSFNode *roots,*leaves;
622: PetscSFCheckGraphSet(sf,1);
625: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,NULL);
626: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
628: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
629: PetscMalloc2(nroots,&roots,maxlocal,&leaves);
630: for (i=0; i<maxlocal; i++) {
631: leaves[i].rank = rank;
632: leaves[i].index = i;
633: }
634: for (i=0; i <nroots; i++) {
635: roots[i].rank = -1;
636: roots[i].index = -1;
637: }
638: PetscSFReduceBegin(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
639: PetscSFReduceEnd(sf,MPIU_2INT,leaves,roots,MPI_REPLACE);
641: /* Check whether our leaves are sparse */
642: for (i=0,count=0; i<nroots; i++) if (roots[i].rank >= 0) count++;
643: if (count == nroots) newilocal = NULL;
644: else { /* Index for sparse leaves and compact "roots" array (which is to become our leaves). */
645: PetscMalloc1(count,&newilocal);
646: for (i=0,count=0; i<nroots; i++) {
647: if (roots[i].rank >= 0) {
648: newilocal[count] = i;
649: roots[count].rank = roots[i].rank;
650: roots[count].index = roots[i].index;
651: count++;
652: }
653: }
654: }
656: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,isf);
657: PetscSFSetGraph(*isf,maxlocal,count,newilocal,PETSC_OWN_POINTER,roots,PETSC_COPY_VALUES);
658: PetscFree2(roots,leaves);
659: return 0;
660: }
662: /*@
663: PetscSFDuplicate - duplicate a PetscSF, optionally preserving rank connectivity and graph
665: Collective
667: Input Parameters:
668: + sf - communication object to duplicate
669: - opt - PETSCSF_DUPLICATE_CONFONLY, PETSCSF_DUPLICATE_RANKS, or PETSCSF_DUPLICATE_GRAPH (see PetscSFDuplicateOption)
671: Output Parameter:
672: . newsf - new communication object
674: Level: beginner
676: .seealso: PetscSFCreate(), PetscSFSetType(), PetscSFSetGraph()
677: @*/
678: PetscErrorCode PetscSFDuplicate(PetscSF sf,PetscSFDuplicateOption opt,PetscSF *newsf)
679: {
680: PetscSFType type;
681: MPI_Datatype dtype=MPIU_SCALAR;
686: PetscSFCreate(PetscObjectComm((PetscObject)sf),newsf);
687: PetscSFGetType(sf,&type);
688: if (type) PetscSFSetType(*newsf,type);
689: if (opt == PETSCSF_DUPLICATE_GRAPH) {
690: PetscSFCheckGraphSet(sf,1);
691: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
692: PetscInt nroots,nleaves;
693: const PetscInt *ilocal;
694: const PetscSFNode *iremote;
695: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
696: PetscSFSetGraph(*newsf,nroots,nleaves,(PetscInt*)ilocal,PETSC_COPY_VALUES,(PetscSFNode*)iremote,PETSC_COPY_VALUES);
697: } else {
698: PetscSFSetGraphWithPattern(*newsf,sf->map,sf->pattern);
699: }
700: }
701: /* Since oldtype is committed, so is newtype, according to MPI */
702: if (sf->vscat.bs > 1) MPI_Type_dup(sf->vscat.unit,&dtype);
703: (*newsf)->vscat.bs = sf->vscat.bs;
704: (*newsf)->vscat.unit = dtype;
705: (*newsf)->vscat.to_n = sf->vscat.to_n;
706: (*newsf)->vscat.from_n = sf->vscat.from_n;
707: /* Do not copy lsf. Build it on demand since it is rarely used */
709: #if defined(PETSC_HAVE_DEVICE)
710: (*newsf)->backend = sf->backend;
711: (*newsf)->unknown_input_stream= sf->unknown_input_stream;
712: (*newsf)->use_gpu_aware_mpi = sf->use_gpu_aware_mpi;
713: (*newsf)->use_stream_aware_mpi = sf->use_stream_aware_mpi;
714: #endif
715: if (sf->ops->Duplicate) (*sf->ops->Duplicate)(sf,opt,*newsf);
716: /* Don't do PetscSFSetUp() since the new sf's graph might have not been set. */
717: return 0;
718: }
720: /*@C
721: PetscSFGetGraph - Get the graph specifying a parallel star forest
723: Not Collective
725: Input Parameter:
726: . sf - star forest
728: Output Parameters:
729: + nroots - number of root vertices on the current process (these are possible targets for other process to attach leaves)
730: . nleaves - number of leaf vertices on the current process, each of these references a root on any process
731: . ilocal - locations of leaves in leafdata buffers (if returned value is NULL, it means leaves are in contiguous storage)
732: - iremote - remote locations of root vertices for each leaf on the current process
734: Notes:
735: We are not currently requiring that the graph is set, thus returning nroots=-1 if it has not been set yet
737: The returned ilocal/iremote might contain values in different order than the input ones in PetscSFSetGraph(),
738: see its manpage for details.
740: Fortran Notes:
741: The returned iremote array is a copy and must be deallocated after use. Consequently, if you
742: want to update the graph, you must call PetscSFSetGraph() after modifying the iremote array.
744: To check for a NULL ilocal use
745: $ if (loc(ilocal) == loc(PETSC_NULL_INTEGER)) then
747: Level: intermediate
749: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph()
750: @*/
751: PetscErrorCode PetscSFGetGraph(PetscSF sf,PetscInt *nroots,PetscInt *nleaves,const PetscInt **ilocal,const PetscSFNode **iremote)
752: {
754: if (sf->ops->GetGraph) {
755: (sf->ops->GetGraph)(sf,nroots,nleaves,ilocal,iremote);
756: } else {
757: if (nroots) *nroots = sf->nroots;
758: if (nleaves) *nleaves = sf->nleaves;
759: if (ilocal) *ilocal = sf->mine;
760: if (iremote) *iremote = sf->remote;
761: }
762: return 0;
763: }
765: /*@
766: PetscSFGetLeafRange - Get the active leaf ranges
768: Not Collective
770: Input Parameter:
771: . sf - star forest
773: Output Parameters:
774: + minleaf - minimum active leaf on this process. Return 0 if there are no leaves.
775: - maxleaf - maximum active leaf on this process. Return -1 if there are no leaves.
777: Level: developer
779: .seealso: PetscSFCreate(), PetscSFView(), PetscSFSetGraph(), PetscSFGetGraph()
780: @*/
781: PetscErrorCode PetscSFGetLeafRange(PetscSF sf,PetscInt *minleaf,PetscInt *maxleaf)
782: {
784: PetscSFCheckGraphSet(sf,1);
785: if (minleaf) *minleaf = sf->minleaf;
786: if (maxleaf) *maxleaf = sf->maxleaf;
787: return 0;
788: }
790: /*@C
791: PetscSFViewFromOptions - View from Options
793: Collective on PetscSF
795: Input Parameters:
796: + A - the star forest
797: . obj - Optional object
798: - name - command line option
800: Level: intermediate
801: .seealso: PetscSF, PetscSFView, PetscObjectViewFromOptions(), PetscSFCreate()
802: @*/
803: PetscErrorCode PetscSFViewFromOptions(PetscSF A,PetscObject obj,const char name[])
804: {
806: PetscObjectViewFromOptions((PetscObject)A,obj,name);
807: return 0;
808: }
810: /*@C
811: PetscSFView - view a star forest
813: Collective
815: Input Parameters:
816: + sf - star forest
817: - viewer - viewer to display graph, for example PETSC_VIEWER_STDOUT_WORLD
819: Level: beginner
821: .seealso: PetscSFCreate(), PetscSFSetGraph()
822: @*/
823: PetscErrorCode PetscSFView(PetscSF sf,PetscViewer viewer)
824: {
825: PetscBool iascii;
826: PetscViewerFormat format;
829: if (!viewer) PetscViewerASCIIGetStdout(PetscObjectComm((PetscObject)sf),&viewer);
832: if (sf->graphset) PetscSFSetUp(sf);
833: PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);
834: if (iascii && viewer->format != PETSC_VIEWER_ASCII_MATLAB) {
835: PetscMPIInt rank;
836: PetscInt ii,i,j;
838: PetscObjectPrintClassNamePrefixType((PetscObject)sf,viewer);
839: PetscViewerASCIIPushTab(viewer);
840: if (sf->pattern == PETSCSF_PATTERN_GENERAL) {
841: if (!sf->graphset) {
842: PetscViewerASCIIPrintf(viewer,"PetscSFSetGraph() has not been called yet\n");
843: PetscViewerASCIIPopTab(viewer);
844: return 0;
845: }
846: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
847: PetscViewerASCIIPushSynchronized(viewer);
848: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Number of roots=%" PetscInt_FMT ", leaves=%" PetscInt_FMT ", remote ranks=%" PetscInt_FMT "\n",rank,sf->nroots,sf->nleaves,sf->nranks);
849: for (i=0; i<sf->nleaves; i++) {
850: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- (%" PetscInt_FMT ",%" PetscInt_FMT ")\n",rank,sf->mine ? sf->mine[i] : i,sf->remote[i].rank,sf->remote[i].index);
851: }
852: PetscViewerFlush(viewer);
853: PetscViewerGetFormat(viewer,&format);
854: if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
855: PetscMPIInt *tmpranks,*perm;
856: PetscMalloc2(sf->nranks,&tmpranks,sf->nranks,&perm);
857: PetscArraycpy(tmpranks,sf->ranks,sf->nranks);
858: for (i=0; i<sf->nranks; i++) perm[i] = i;
859: PetscSortMPIIntWithArray(sf->nranks,tmpranks,perm);
860: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Roots referenced by my leaves, by rank\n",rank);
861: for (ii=0; ii<sf->nranks; ii++) {
862: i = perm[ii];
863: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %d: %" PetscInt_FMT " edges\n",rank,sf->ranks[i],sf->roffset[i+1]-sf->roffset[i]);
864: for (j=sf->roffset[i]; j<sf->roffset[i+1]; j++) {
865: PetscViewerASCIISynchronizedPrintf(viewer,"[%d] %" PetscInt_FMT " <- %" PetscInt_FMT "\n",rank,sf->rmine[j],sf->rremote[j]);
866: }
867: }
868: PetscFree2(tmpranks,perm);
869: }
870: PetscViewerFlush(viewer);
871: PetscViewerASCIIPopSynchronized(viewer);
872: }
873: PetscViewerASCIIPopTab(viewer);
874: }
875: if (sf->ops->View) (*sf->ops->View)(sf,viewer);
876: return 0;
877: }
879: /*@C
880: PetscSFGetRootRanks - Get root ranks and number of vertices referenced by leaves on this process
882: Not Collective
884: Input Parameter:
885: . sf - star forest
887: Output Parameters:
888: + nranks - number of ranks referenced by local part
889: . ranks - array of ranks
890: . roffset - offset in rmine/rremote for each rank (length nranks+1)
891: . rmine - concatenated array holding local indices referencing each remote rank
892: - rremote - concatenated array holding remote indices referenced for each remote rank
894: Level: developer
896: .seealso: PetscSFGetLeafRanks()
897: @*/
898: PetscErrorCode PetscSFGetRootRanks(PetscSF sf,PetscInt *nranks,const PetscMPIInt **ranks,const PetscInt **roffset,const PetscInt **rmine,const PetscInt **rremote)
899: {
902: if (sf->ops->GetRootRanks) {
903: (sf->ops->GetRootRanks)(sf,nranks,ranks,roffset,rmine,rremote);
904: } else {
905: /* The generic implementation */
906: if (nranks) *nranks = sf->nranks;
907: if (ranks) *ranks = sf->ranks;
908: if (roffset) *roffset = sf->roffset;
909: if (rmine) *rmine = sf->rmine;
910: if (rremote) *rremote = sf->rremote;
911: }
912: return 0;
913: }
915: /*@C
916: PetscSFGetLeafRanks - Get leaf ranks referencing roots on this process
918: Not Collective
920: Input Parameter:
921: . sf - star forest
923: Output Parameters:
924: + niranks - number of leaf ranks referencing roots on this process
925: . iranks - array of ranks
926: . ioffset - offset in irootloc for each rank (length niranks+1)
927: - irootloc - concatenated array holding local indices of roots referenced by each leaf rank
929: Level: developer
931: .seealso: PetscSFGetRootRanks()
932: @*/
933: PetscErrorCode PetscSFGetLeafRanks(PetscSF sf,PetscInt *niranks,const PetscMPIInt **iranks,const PetscInt **ioffset,const PetscInt **irootloc)
934: {
937: if (sf->ops->GetLeafRanks) {
938: (sf->ops->GetLeafRanks)(sf,niranks,iranks,ioffset,irootloc);
939: } else {
940: PetscSFType type;
941: PetscSFGetType(sf,&type);
942: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFGetLeafRanks() is not supported on this StarForest type: %s", type);
943: }
944: return 0;
945: }
947: static PetscBool InList(PetscMPIInt needle,PetscMPIInt n,const PetscMPIInt *list) {
948: PetscInt i;
949: for (i=0; i<n; i++) {
950: if (needle == list[i]) return PETSC_TRUE;
951: }
952: return PETSC_FALSE;
953: }
955: /*@C
956: PetscSFSetUpRanks - Set up data structures associated with ranks; this is for internal use by PetscSF implementations.
958: Collective
960: Input Parameters:
961: + sf - PetscSF to set up; PetscSFSetGraph() must have been called
962: - dgroup - MPI_Group of ranks to be distinguished (e.g., for self or shared memory exchange)
964: Level: developer
966: .seealso: PetscSFGetRootRanks()
967: @*/
968: PetscErrorCode PetscSFSetUpRanks(PetscSF sf,MPI_Group dgroup)
969: {
970: PetscTable table;
971: PetscTablePosition pos;
972: PetscMPIInt size,groupsize,*groupranks;
973: PetscInt *rcount,*ranks;
974: PetscInt i, irank = -1,orank = -1;
977: PetscSFCheckGraphSet(sf,1);
978: MPI_Comm_size(PetscObjectComm((PetscObject)sf),&size);
979: PetscTableCreate(10,size,&table);
980: for (i=0; i<sf->nleaves; i++) {
981: /* Log 1-based rank */
982: PetscTableAdd(table,sf->remote[i].rank+1,1,ADD_VALUES);
983: }
984: PetscTableGetCount(table,&sf->nranks);
985: PetscMalloc4(sf->nranks,&sf->ranks,sf->nranks+1,&sf->roffset,sf->nleaves,&sf->rmine,sf->nleaves,&sf->rremote);
986: PetscMalloc2(sf->nranks,&rcount,sf->nranks,&ranks);
987: PetscTableGetHeadPosition(table,&pos);
988: for (i=0; i<sf->nranks; i++) {
989: PetscTableGetNext(table,&pos,&ranks[i],&rcount[i]);
990: ranks[i]--; /* Convert back to 0-based */
991: }
992: PetscTableDestroy(&table);
994: /* We expect that dgroup is reliably "small" while nranks could be large */
995: {
996: MPI_Group group = MPI_GROUP_NULL;
997: PetscMPIInt *dgroupranks;
998: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
999: MPI_Group_size(dgroup,&groupsize);
1000: PetscMalloc1(groupsize,&dgroupranks);
1001: PetscMalloc1(groupsize,&groupranks);
1002: for (i=0; i<groupsize; i++) dgroupranks[i] = i;
1003: if (groupsize) MPI_Group_translate_ranks(dgroup,groupsize,dgroupranks,group,groupranks);
1004: MPI_Group_free(&group);
1005: PetscFree(dgroupranks);
1006: }
1008: /* Partition ranks[] into distinguished (first sf->ndranks) followed by non-distinguished */
1009: for (sf->ndranks=0,i=sf->nranks; sf->ndranks<i;) {
1010: for (i--; sf->ndranks<i; i--) { /* Scan i backward looking for distinguished rank */
1011: if (InList(ranks[i],groupsize,groupranks)) break;
1012: }
1013: for (; sf->ndranks<=i; sf->ndranks++) { /* Scan sf->ndranks forward looking for non-distinguished rank */
1014: if (!InList(ranks[sf->ndranks],groupsize,groupranks)) break;
1015: }
1016: if (sf->ndranks < i) { /* Swap ranks[sf->ndranks] with ranks[i] */
1017: PetscInt tmprank,tmpcount;
1019: tmprank = ranks[i];
1020: tmpcount = rcount[i];
1021: ranks[i] = ranks[sf->ndranks];
1022: rcount[i] = rcount[sf->ndranks];
1023: ranks[sf->ndranks] = tmprank;
1024: rcount[sf->ndranks] = tmpcount;
1025: sf->ndranks++;
1026: }
1027: }
1028: PetscFree(groupranks);
1029: PetscSortIntWithArray(sf->ndranks,ranks,rcount);
1030: PetscSortIntWithArray(sf->nranks-sf->ndranks,ranks+sf->ndranks,rcount+sf->ndranks);
1031: sf->roffset[0] = 0;
1032: for (i=0; i<sf->nranks; i++) {
1033: PetscMPIIntCast(ranks[i],sf->ranks+i);
1034: sf->roffset[i+1] = sf->roffset[i] + rcount[i];
1035: rcount[i] = 0;
1036: }
1037: for (i=0, irank = -1, orank = -1; i<sf->nleaves; i++) {
1038: /* short circuit */
1039: if (orank != sf->remote[i].rank) {
1040: /* Search for index of iremote[i].rank in sf->ranks */
1041: PetscFindMPIInt(sf->remote[i].rank,sf->ndranks,sf->ranks,&irank);
1042: if (irank < 0) {
1043: PetscFindMPIInt(sf->remote[i].rank,sf->nranks-sf->ndranks,sf->ranks+sf->ndranks,&irank);
1044: if (irank >= 0) irank += sf->ndranks;
1045: }
1046: orank = sf->remote[i].rank;
1047: }
1049: sf->rmine[sf->roffset[irank] + rcount[irank]] = sf->mine ? sf->mine[i] : i;
1050: sf->rremote[sf->roffset[irank] + rcount[irank]] = sf->remote[i].index;
1051: rcount[irank]++;
1052: }
1053: PetscFree2(rcount,ranks);
1054: return 0;
1055: }
1057: /*@C
1058: PetscSFGetGroups - gets incoming and outgoing process groups
1060: Collective
1062: Input Parameter:
1063: . sf - star forest
1065: Output Parameters:
1066: + incoming - group of origin processes for incoming edges (leaves that reference my roots)
1067: - outgoing - group of destination processes for outgoing edges (roots that I reference)
1069: Level: developer
1071: .seealso: PetscSFGetWindow(), PetscSFRestoreWindow()
1072: @*/
1073: PetscErrorCode PetscSFGetGroups(PetscSF sf,MPI_Group *incoming,MPI_Group *outgoing)
1074: {
1075: MPI_Group group = MPI_GROUP_NULL;
1078: if (sf->ingroup == MPI_GROUP_NULL) {
1079: PetscInt i;
1080: const PetscInt *indegree;
1081: PetscMPIInt rank,*outranks,*inranks;
1082: PetscSFNode *remote;
1083: PetscSF bgcount;
1085: /* Compute the number of incoming ranks */
1086: PetscMalloc1(sf->nranks,&remote);
1087: for (i=0; i<sf->nranks; i++) {
1088: remote[i].rank = sf->ranks[i];
1089: remote[i].index = 0;
1090: }
1091: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,&bgcount);
1092: PetscSFSetGraph(bgcount,1,sf->nranks,NULL,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1093: PetscSFComputeDegreeBegin(bgcount,&indegree);
1094: PetscSFComputeDegreeEnd(bgcount,&indegree);
1095: /* Enumerate the incoming ranks */
1096: PetscMalloc2(indegree[0],&inranks,sf->nranks,&outranks);
1097: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1098: for (i=0; i<sf->nranks; i++) outranks[i] = rank;
1099: PetscSFGatherBegin(bgcount,MPI_INT,outranks,inranks);
1100: PetscSFGatherEnd(bgcount,MPI_INT,outranks,inranks);
1101: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1102: MPI_Group_incl(group,indegree[0],inranks,&sf->ingroup);
1103: MPI_Group_free(&group);
1104: PetscFree2(inranks,outranks);
1105: PetscSFDestroy(&bgcount);
1106: }
1107: *incoming = sf->ingroup;
1109: if (sf->outgroup == MPI_GROUP_NULL) {
1110: MPI_Comm_group(PetscObjectComm((PetscObject)sf),&group);
1111: MPI_Group_incl(group,sf->nranks,sf->ranks,&sf->outgroup);
1112: MPI_Group_free(&group);
1113: }
1114: *outgoing = sf->outgroup;
1115: return 0;
1116: }
1118: /*@
1119: PetscSFGetMultiSF - gets the inner SF implementing gathers and scatters
1121: Collective
1123: Input Parameter:
1124: . sf - star forest that may contain roots with 0 or with more than 1 vertex
1126: Output Parameter:
1127: . multi - star forest with split roots, such that each root has degree exactly 1
1129: Level: developer
1131: Notes:
1133: In most cases, users should use PetscSFGatherBegin() and PetscSFScatterBegin() instead of manipulating multi
1134: directly. Since multi satisfies the stronger condition that each entry in the global space has exactly one incoming
1135: edge, it is a candidate for future optimization that might involve its removal.
1137: .seealso: PetscSFSetGraph(), PetscSFGatherBegin(), PetscSFScatterBegin(), PetscSFComputeMultiRootOriginalNumbering()
1138: @*/
1139: PetscErrorCode PetscSFGetMultiSF(PetscSF sf,PetscSF *multi)
1140: {
1143: if (sf->nroots < 0) { /* Graph has not been set yet; why do we need this? */
1144: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1145: *multi = sf->multi;
1146: sf->multi->multi = sf->multi;
1147: return 0;
1148: }
1149: if (!sf->multi) {
1150: const PetscInt *indegree;
1151: PetscInt i,*inoffset,*outones,*outoffset,maxlocal;
1152: PetscSFNode *remote;
1153: maxlocal = sf->maxleaf+1; /* TODO: We should use PetscSFGetLeafRange() */
1154: PetscSFComputeDegreeBegin(sf,&indegree);
1155: PetscSFComputeDegreeEnd(sf,&indegree);
1156: PetscMalloc3(sf->nroots+1,&inoffset,maxlocal,&outones,maxlocal,&outoffset);
1157: inoffset[0] = 0;
1158: for (i=0; i<sf->nroots; i++) inoffset[i+1] = inoffset[i] + indegree[i];
1159: for (i=0; i<maxlocal; i++) outones[i] = 1;
1160: PetscSFFetchAndOpBegin(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1161: PetscSFFetchAndOpEnd(sf,MPIU_INT,inoffset,outones,outoffset,MPI_SUM);
1162: for (i=0; i<sf->nroots; i++) inoffset[i] -= indegree[i]; /* Undo the increment */
1163: if (PetscDefined(USE_DEBUG)) { /* Check that the expected number of increments occurred */
1164: for (i=0; i<sf->nroots; i++) {
1166: }
1167: }
1168: PetscMalloc1(sf->nleaves,&remote);
1169: for (i=0; i<sf->nleaves; i++) {
1170: remote[i].rank = sf->remote[i].rank;
1171: remote[i].index = outoffset[sf->mine ? sf->mine[i] : i];
1172: }
1173: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_RANKS,&sf->multi);
1174: sf->multi->multi = sf->multi;
1175: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,remote,PETSC_OWN_POINTER);
1176: if (sf->rankorder) { /* Sort the ranks */
1177: PetscMPIInt rank;
1178: PetscInt *inranks,*newoffset,*outranks,*newoutoffset,*tmpoffset,maxdegree;
1179: PetscSFNode *newremote;
1180: MPI_Comm_rank(PetscObjectComm((PetscObject)sf),&rank);
1181: for (i=0,maxdegree=0; i<sf->nroots; i++) maxdegree = PetscMax(maxdegree,indegree[i]);
1182: PetscMalloc5(sf->multi->nroots,&inranks,sf->multi->nroots,&newoffset,maxlocal,&outranks,maxlocal,&newoutoffset,maxdegree,&tmpoffset);
1183: for (i=0; i<maxlocal; i++) outranks[i] = rank;
1184: PetscSFReduceBegin(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1185: PetscSFReduceEnd(sf->multi,MPIU_INT,outranks,inranks,MPI_REPLACE);
1186: /* Sort the incoming ranks at each vertex, build the inverse map */
1187: for (i=0; i<sf->nroots; i++) {
1188: PetscInt j;
1189: for (j=0; j<indegree[i]; j++) tmpoffset[j] = j;
1190: PetscSortIntWithArray(indegree[i],inranks+inoffset[i],tmpoffset);
1191: for (j=0; j<indegree[i]; j++) newoffset[inoffset[i] + tmpoffset[j]] = inoffset[i] + j;
1192: }
1193: PetscSFBcastBegin(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1194: PetscSFBcastEnd(sf->multi,MPIU_INT,newoffset,newoutoffset,MPI_REPLACE);
1195: PetscMalloc1(sf->nleaves,&newremote);
1196: for (i=0; i<sf->nleaves; i++) {
1197: newremote[i].rank = sf->remote[i].rank;
1198: newremote[i].index = newoutoffset[sf->mine ? sf->mine[i] : i];
1199: }
1200: PetscSFSetGraph(sf->multi,inoffset[sf->nroots],sf->nleaves,sf->mine,PETSC_COPY_VALUES,newremote,PETSC_OWN_POINTER);
1201: PetscFree5(inranks,newoffset,outranks,newoutoffset,tmpoffset);
1202: }
1203: PetscFree3(inoffset,outones,outoffset);
1204: }
1205: *multi = sf->multi;
1206: return 0;
1207: }
1209: /*@C
1210: PetscSFCreateEmbeddedRootSF - removes edges from all but the selected roots, does not remap indices
1212: Collective
1214: Input Parameters:
1215: + sf - original star forest
1216: . nselected - number of selected roots on this process
1217: - selected - indices of the selected roots on this process
1219: Output Parameter:
1220: . esf - new star forest
1222: Level: advanced
1224: Note:
1225: To use the new PetscSF, it may be necessary to know the indices of the leaves that are still participating. This can
1226: be done by calling PetscSFGetGraph().
1228: .seealso: PetscSFSetGraph(), PetscSFGetGraph()
1229: @*/
1230: PetscErrorCode PetscSFCreateEmbeddedRootSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *esf)
1231: {
1232: PetscInt i,j,n,nroots,nleaves,esf_nleaves,*new_ilocal,minleaf,maxleaf,maxlocal;
1233: const PetscInt *ilocal;
1234: signed char *rootdata,*leafdata,*leafmem;
1235: const PetscSFNode *iremote;
1236: PetscSFNode *new_iremote;
1237: MPI_Comm comm;
1240: PetscSFCheckGraphSet(sf,1);
1244: PetscSFSetUp(sf);
1245: PetscLogEventBegin(PETSCSF_EmbedSF,sf,0,0,0);
1246: PetscObjectGetComm((PetscObject)sf,&comm);
1247: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
1249: if (PetscDefined(USE_DEBUG)) { /* Error out if selected[] has dups or out of range indices */
1250: PetscBool dups;
1253: for (i=0; i<nselected; i++)
1255: }
1257: if (sf->ops->CreateEmbeddedRootSF) {
1258: (*sf->ops->CreateEmbeddedRootSF)(sf,nselected,selected,esf);
1259: } else {
1260: /* A generic version of creating embedded sf */
1261: PetscSFGetLeafRange(sf,&minleaf,&maxleaf);
1262: maxlocal = maxleaf - minleaf + 1;
1263: PetscCalloc2(nroots,&rootdata,maxlocal,&leafmem);
1264: leafdata = leafmem - minleaf;
1265: /* Tag selected roots and bcast to leaves */
1266: for (i=0; i<nselected; i++) rootdata[selected[i]] = 1;
1267: PetscSFBcastBegin(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1268: PetscSFBcastEnd(sf,MPI_SIGNED_CHAR,rootdata,leafdata,MPI_REPLACE);
1270: /* Build esf with leaves that are still connected */
1271: esf_nleaves = 0;
1272: for (i=0; i<nleaves; i++) {
1273: j = ilocal ? ilocal[i] : i;
1274: /* esf_nleaves += leafdata[j] should work in theory, but failed with SFWindow bugs
1275: with PetscSFBcast. See https://gitlab.com/petsc/petsc/issues/555
1276: */
1277: esf_nleaves += (leafdata[j] ? 1 : 0);
1278: }
1279: PetscMalloc1(esf_nleaves,&new_ilocal);
1280: PetscMalloc1(esf_nleaves,&new_iremote);
1281: for (i=n=0; i<nleaves; i++) {
1282: j = ilocal ? ilocal[i] : i;
1283: if (leafdata[j]) {
1284: new_ilocal[n] = j;
1285: new_iremote[n].rank = iremote[i].rank;
1286: new_iremote[n].index = iremote[i].index;
1287: ++n;
1288: }
1289: }
1290: PetscSFCreate(comm,esf);
1291: PetscSFSetFromOptions(*esf);
1292: PetscSFSetGraph(*esf,nroots,esf_nleaves,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1293: PetscFree2(rootdata,leafmem);
1294: }
1295: PetscLogEventEnd(PETSCSF_EmbedSF,sf,0,0,0);
1296: return 0;
1297: }
1299: /*@C
1300: PetscSFCreateEmbeddedLeafSF - removes edges from all but the selected leaves, does not remap indices
1302: Collective
1304: Input Parameters:
1305: + sf - original star forest
1306: . nselected - number of selected leaves on this process
1307: - selected - indices of the selected leaves on this process
1309: Output Parameter:
1310: . newsf - new star forest
1312: Level: advanced
1314: .seealso: PetscSFCreateEmbeddedRootSF(), PetscSFSetGraph(), PetscSFGetGraph()
1315: @*/
1316: PetscErrorCode PetscSFCreateEmbeddedLeafSF(PetscSF sf,PetscInt nselected,const PetscInt *selected,PetscSF *newsf)
1317: {
1318: const PetscSFNode *iremote;
1319: PetscSFNode *new_iremote;
1320: const PetscInt *ilocal;
1321: PetscInt i,nroots,*leaves,*new_ilocal;
1322: MPI_Comm comm;
1325: PetscSFCheckGraphSet(sf,1);
1329: /* Uniq selected[] and put results in leaves[] */
1330: PetscObjectGetComm((PetscObject)sf,&comm);
1331: PetscMalloc1(nselected,&leaves);
1332: PetscArraycpy(leaves,selected,nselected);
1333: PetscSortedRemoveDupsInt(&nselected,leaves);
1336: /* Optimize the routine only when sf is setup and hence we can reuse sf's communication pattern */
1337: if (sf->setupcalled && sf->ops->CreateEmbeddedLeafSF) {
1338: (*sf->ops->CreateEmbeddedLeafSF)(sf,nselected,leaves,newsf);
1339: } else {
1340: PetscSFGetGraph(sf,&nroots,NULL,&ilocal,&iremote);
1341: PetscMalloc1(nselected,&new_ilocal);
1342: PetscMalloc1(nselected,&new_iremote);
1343: for (i=0; i<nselected; ++i) {
1344: const PetscInt l = leaves[i];
1345: new_ilocal[i] = ilocal ? ilocal[l] : l;
1346: new_iremote[i].rank = iremote[l].rank;
1347: new_iremote[i].index = iremote[l].index;
1348: }
1349: PetscSFDuplicate(sf,PETSCSF_DUPLICATE_CONFONLY,newsf);
1350: PetscSFSetGraph(*newsf,nroots,nselected,new_ilocal,PETSC_OWN_POINTER,new_iremote,PETSC_OWN_POINTER);
1351: }
1352: PetscFree(leaves);
1353: return 0;
1354: }
1356: /*@C
1357: PetscSFBcastBegin - begin pointwise broadcast with root value being reduced to leaf value, to be concluded with call to PetscSFBcastEnd()
1359: Collective on PetscSF
1361: Input Parameters:
1362: + sf - star forest on which to communicate
1363: . unit - data type associated with each node
1364: . rootdata - buffer to broadcast
1365: - op - operation to use for reduction
1367: Output Parameter:
1368: . leafdata - buffer to be reduced with values from each leaf's respective root
1370: Level: intermediate
1372: Notes:
1373: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1374: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1375: use PetscSFBcastWithMemTypeBegin() instead.
1376: .seealso: PetscSFBcastEnd(), PetscSFBcastWithMemTypeBegin()
1377: @*/
1378: PetscErrorCode PetscSFBcastBegin(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1379: {
1380: PetscMemType rootmtype,leafmtype;
1383: PetscSFSetUp(sf);
1384: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
1385: PetscGetMemType(rootdata,&rootmtype);
1386: PetscGetMemType(leafdata,&leafmtype);
1387: (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1388: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
1389: return 0;
1390: }
1392: /*@C
1393: PetscSFBcastWithMemTypeBegin - begin pointwise broadcast with root value being reduced to leaf value with explicit memory types, to be concluded with call to PetscSFBcastEnd()
1395: Collective on PetscSF
1397: Input Parameters:
1398: + sf - star forest on which to communicate
1399: . unit - data type associated with each node
1400: . rootmtype - memory type of rootdata
1401: . rootdata - buffer to broadcast
1402: . leafmtype - memory type of leafdata
1403: - op - operation to use for reduction
1405: Output Parameter:
1406: . leafdata - buffer to be reduced with values from each leaf's respective root
1408: Level: intermediate
1410: .seealso: PetscSFBcastEnd(), PetscSFBcastBegin()
1411: @*/
1412: PetscErrorCode PetscSFBcastWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,const void *rootdata,PetscMemType leafmtype,void *leafdata,MPI_Op op)
1413: {
1415: PetscSFSetUp(sf);
1416: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
1417: (*sf->ops->BcastBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,op);
1418: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
1419: return 0;
1420: }
1422: /*@C
1423: PetscSFBcastEnd - end a broadcast & reduce operation started with PetscSFBcastBegin()
1425: Collective
1427: Input Parameters:
1428: + sf - star forest
1429: . unit - data type
1430: . rootdata - buffer to broadcast
1431: - op - operation to use for reduction
1433: Output Parameter:
1434: . leafdata - buffer to be reduced with values from each leaf's respective root
1436: Level: intermediate
1438: .seealso: PetscSFSetGraph(), PetscSFReduceEnd()
1439: @*/
1440: PetscErrorCode PetscSFBcastEnd(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata,MPI_Op op)
1441: {
1443: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_BcastEnd,sf,0,0,0);
1444: (*sf->ops->BcastEnd)(sf,unit,rootdata,leafdata,op);
1445: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_BcastEnd,sf,0,0,0);
1446: return 0;
1447: }
1449: /*@C
1450: PetscSFReduceBegin - begin reduction of leafdata into rootdata, to be completed with call to PetscSFReduceEnd()
1452: Collective
1454: Input Parameters:
1455: + sf - star forest
1456: . unit - data type
1457: . leafdata - values to reduce
1458: - op - reduction operation
1460: Output Parameter:
1461: . rootdata - result of reduction of values from all leaves of each root
1463: Level: intermediate
1465: Notes:
1466: When petsc is configured with device support, it will use its own mechanism to figure out whether the given data pointers
1467: are host pointers or device pointers, which may incur a noticable cost. If you already knew the info, you should
1468: use PetscSFReduceWithMemTypeBegin() instead.
1470: .seealso: PetscSFBcastBegin(), PetscSFReduceWithMemTypeBegin()
1471: @*/
1472: PetscErrorCode PetscSFReduceBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1473: {
1474: PetscMemType rootmtype,leafmtype;
1477: PetscSFSetUp(sf);
1478: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1479: PetscGetMemType(rootdata,&rootmtype);
1480: PetscGetMemType(leafdata,&leafmtype);
1481: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1482: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1483: return 0;
1484: }
1486: /*@C
1487: PetscSFReduceWithMemTypeBegin - begin reduction of leafdata into rootdata with explicit memory types, to be completed with call to PetscSFReduceEnd()
1489: Collective
1491: Input Parameters:
1492: + sf - star forest
1493: . unit - data type
1494: . leafmtype - memory type of leafdata
1495: . leafdata - values to reduce
1496: . rootmtype - memory type of rootdata
1497: - op - reduction operation
1499: Output Parameter:
1500: . rootdata - result of reduction of values from all leaves of each root
1502: Level: intermediate
1504: .seealso: PetscSFBcastBegin(), PetscSFReduceBegin()
1505: @*/
1506: PetscErrorCode PetscSFReduceWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType leafmtype,const void *leafdata,PetscMemType rootmtype,void *rootdata,MPI_Op op)
1507: {
1509: PetscSFSetUp(sf);
1510: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceBegin,sf,0,0,0);
1511: (sf->ops->ReduceBegin)(sf,unit,leafmtype,leafdata,rootmtype,rootdata,op);
1512: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceBegin,sf,0,0,0);
1513: return 0;
1514: }
1516: /*@C
1517: PetscSFReduceEnd - end a reduction operation started with PetscSFReduceBegin()
1519: Collective
1521: Input Parameters:
1522: + sf - star forest
1523: . unit - data type
1524: . leafdata - values to reduce
1525: - op - reduction operation
1527: Output Parameter:
1528: . rootdata - result of reduction of values from all leaves of each root
1530: Level: intermediate
1532: .seealso: PetscSFSetGraph(), PetscSFBcastEnd()
1533: @*/
1534: PetscErrorCode PetscSFReduceEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *rootdata,MPI_Op op)
1535: {
1537: if (!sf->vscat.logging) PetscLogEventBegin(PETSCSF_ReduceEnd,sf,0,0,0);
1538: (*sf->ops->ReduceEnd)(sf,unit,leafdata,rootdata,op);
1539: if (!sf->vscat.logging) PetscLogEventEnd(PETSCSF_ReduceEnd,sf,0,0,0);
1540: return 0;
1541: }
1543: /*@C
1544: PetscSFFetchAndOpBegin - begin operation that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1546: Collective
1548: Input Parameters:
1549: + sf - star forest
1550: . unit - data type
1551: . leafdata - leaf values to use in reduction
1552: - op - operation to use for reduction
1554: Output Parameters:
1555: + rootdata - root values to be updated, input state is seen by first process to perform an update
1556: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1558: Level: advanced
1560: Note:
1561: The update is only atomic at the granularity provided by the hardware. Different roots referenced by the same process
1562: might be updated in a different order. Furthermore, if a composite type is used for the unit datatype, atomicity is
1563: not guaranteed across the whole vertex. Therefore, this function is mostly only used with primitive types such as
1564: integers.
1566: .seealso: PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1567: @*/
1568: PetscErrorCode PetscSFFetchAndOpBegin(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1569: {
1570: PetscMemType rootmtype,leafmtype,leafupdatemtype;
1573: PetscSFSetUp(sf);
1574: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1575: PetscGetMemType(rootdata,&rootmtype);
1576: PetscGetMemType(leafdata,&leafmtype);
1577: PetscGetMemType(leafupdate,&leafupdatemtype);
1579: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1580: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1581: return 0;
1582: }
1584: /*@C
1585: PetscSFFetchAndOpWithMemTypeBegin - begin operation with explicit memory types that fetches values from root and updates atomically by applying operation using my leaf value, to be completed with PetscSFFetchAndOpEnd()
1587: Collective
1589: Input Parameters:
1590: + sf - star forest
1591: . unit - data type
1592: . rootmtype - memory type of rootdata
1593: . leafmtype - memory type of leafdata
1594: . leafdata - leaf values to use in reduction
1595: . leafupdatemtype - memory type of leafupdate
1596: - op - operation to use for reduction
1598: Output Parameters:
1599: + rootdata - root values to be updated, input state is seen by first process to perform an update
1600: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1602: Level: advanced
1604: Note: See PetscSFFetchAndOpBegin() for more details.
1606: .seealso: PetscSFFetchAndOpBegin(),PetscSFComputeDegreeBegin(), PetscSFReduceBegin(), PetscSFSetGraph()
1607: @*/
1608: PetscErrorCode PetscSFFetchAndOpWithMemTypeBegin(PetscSF sf,MPI_Datatype unit,PetscMemType rootmtype,void *rootdata,PetscMemType leafmtype,const void *leafdata,PetscMemType leafupdatemtype,void *leafupdate,MPI_Op op)
1609: {
1611: PetscSFSetUp(sf);
1612: PetscLogEventBegin(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1614: (*sf->ops->FetchAndOpBegin)(sf,unit,rootmtype,rootdata,leafmtype,leafdata,leafupdate,op);
1615: PetscLogEventEnd(PETSCSF_FetchAndOpBegin,sf,0,0,0);
1616: return 0;
1617: }
1619: /*@C
1620: PetscSFFetchAndOpEnd - end operation started in matching call to PetscSFFetchAndOpBegin() to fetch values from roots and update atomically by applying operation using my leaf value
1622: Collective
1624: Input Parameters:
1625: + sf - star forest
1626: . unit - data type
1627: . leafdata - leaf values to use in reduction
1628: - op - operation to use for reduction
1630: Output Parameters:
1631: + rootdata - root values to be updated, input state is seen by first process to perform an update
1632: - leafupdate - state at each leaf's respective root immediately prior to my atomic update
1634: Level: advanced
1636: .seealso: PetscSFComputeDegreeEnd(), PetscSFReduceEnd(), PetscSFSetGraph()
1637: @*/
1638: PetscErrorCode PetscSFFetchAndOpEnd(PetscSF sf,MPI_Datatype unit,void *rootdata,const void *leafdata,void *leafupdate,MPI_Op op)
1639: {
1641: PetscLogEventBegin(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1642: (*sf->ops->FetchAndOpEnd)(sf,unit,rootdata,leafdata,leafupdate,op);
1643: PetscLogEventEnd(PETSCSF_FetchAndOpEnd,sf,0,0,0);
1644: return 0;
1645: }
1647: /*@C
1648: PetscSFComputeDegreeBegin - begin computation of degree for each root vertex, to be completed with PetscSFComputeDegreeEnd()
1650: Collective
1652: Input Parameter:
1653: . sf - star forest
1655: Output Parameter:
1656: . degree - degree of each root vertex
1658: Level: advanced
1660: Notes:
1661: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1663: .seealso: PetscSFGatherBegin()
1664: @*/
1665: PetscErrorCode PetscSFComputeDegreeBegin(PetscSF sf,const PetscInt **degree)
1666: {
1668: PetscSFCheckGraphSet(sf,1);
1670: if (!sf->degreeknown) {
1671: PetscInt i, nroots = sf->nroots, maxlocal;
1673: maxlocal = sf->maxleaf-sf->minleaf+1;
1674: PetscMalloc1(nroots,&sf->degree);
1675: PetscMalloc1(PetscMax(maxlocal,1),&sf->degreetmp); /* allocate at least one entry, see check in PetscSFComputeDegreeEnd() */
1676: for (i=0; i<nroots; i++) sf->degree[i] = 0;
1677: for (i=0; i<maxlocal; i++) sf->degreetmp[i] = 1;
1678: PetscSFReduceBegin(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1679: }
1680: *degree = NULL;
1681: return 0;
1682: }
1684: /*@C
1685: PetscSFComputeDegreeEnd - complete computation of degree for each root vertex, started with PetscSFComputeDegreeBegin()
1687: Collective
1689: Input Parameter:
1690: . sf - star forest
1692: Output Parameter:
1693: . degree - degree of each root vertex
1695: Level: developer
1697: Notes:
1698: The returned array is owned by PetscSF and automatically freed by PetscSFDestroy(). Hence no need to call PetscFree() on it.
1700: .seealso:
1701: @*/
1702: PetscErrorCode PetscSFComputeDegreeEnd(PetscSF sf,const PetscInt **degree)
1703: {
1705: PetscSFCheckGraphSet(sf,1);
1707: if (!sf->degreeknown) {
1709: PetscSFReduceEnd(sf,MPIU_INT,sf->degreetmp-sf->minleaf,sf->degree,MPI_SUM);
1710: PetscFree(sf->degreetmp);
1711: sf->degreeknown = PETSC_TRUE;
1712: }
1713: *degree = sf->degree;
1714: return 0;
1715: }
1717: /*@C
1718: PetscSFComputeMultiRootOriginalNumbering - Returns original numbering of multi-roots (roots of multi-SF returned by PetscSFGetMultiSF()).
1719: Each multi-root is assigned index of the corresponding original root.
1721: Collective
1723: Input Parameters:
1724: + sf - star forest
1725: - degree - degree of each root vertex, computed with PetscSFComputeDegreeBegin()/PetscSFComputeDegreeEnd()
1727: Output Parameters:
1728: + nMultiRoots - (optional) number of multi-roots (roots of multi-SF)
1729: - multiRootsOrigNumbering - original indices of multi-roots; length of this array is nMultiRoots
1731: Level: developer
1733: Notes:
1734: The returned array multiRootsOrigNumbering is newly allocated and should be destroyed with PetscFree() when no longer needed.
1736: .seealso: PetscSFComputeDegreeBegin(), PetscSFComputeDegreeEnd(), PetscSFGetMultiSF()
1737: @*/
1738: PetscErrorCode PetscSFComputeMultiRootOriginalNumbering(PetscSF sf, const PetscInt degree[], PetscInt *nMultiRoots, PetscInt *multiRootsOrigNumbering[])
1739: {
1740: PetscSF msf;
1741: PetscInt i, j, k, nroots, nmroots;
1744: PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);
1748: PetscSFGetMultiSF(sf,&msf);
1749: PetscSFGetGraph(msf, &nmroots, NULL, NULL, NULL);
1750: PetscMalloc1(nmroots, multiRootsOrigNumbering);
1751: for (i=0,j=0,k=0; i<nroots; i++) {
1752: if (!degree[i]) continue;
1753: for (j=0; j<degree[i]; j++,k++) {
1754: (*multiRootsOrigNumbering)[k] = i;
1755: }
1756: }
1758: if (nMultiRoots) *nMultiRoots = nmroots;
1759: return 0;
1760: }
1762: /*@C
1763: PetscSFGatherBegin - begin pointwise gather of all leaves into multi-roots, to be completed with PetscSFGatherEnd()
1765: Collective
1767: Input Parameters:
1768: + sf - star forest
1769: . unit - data type
1770: - leafdata - leaf data to gather to roots
1772: Output Parameter:
1773: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1775: Level: intermediate
1777: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1778: @*/
1779: PetscErrorCode PetscSFGatherBegin(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1780: {
1781: PetscSF multi = NULL;
1784: PetscSFSetUp(sf);
1785: PetscSFGetMultiSF(sf,&multi);
1786: PetscSFReduceBegin(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1787: return 0;
1788: }
1790: /*@C
1791: PetscSFGatherEnd - ends pointwise gather operation that was started with PetscSFGatherBegin()
1793: Collective
1795: Input Parameters:
1796: + sf - star forest
1797: . unit - data type
1798: - leafdata - leaf data to gather to roots
1800: Output Parameter:
1801: . multirootdata - root buffer to gather into, amount of space per root is equal to its degree
1803: Level: intermediate
1805: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1806: @*/
1807: PetscErrorCode PetscSFGatherEnd(PetscSF sf,MPI_Datatype unit,const void *leafdata,void *multirootdata)
1808: {
1809: PetscSF multi = NULL;
1812: PetscSFGetMultiSF(sf,&multi);
1813: PetscSFReduceEnd(multi,unit,leafdata,multirootdata,MPI_REPLACE);
1814: return 0;
1815: }
1817: /*@C
1818: PetscSFScatterBegin - begin pointwise scatter operation from multi-roots to leaves, to be completed with PetscSFScatterEnd()
1820: Collective
1822: Input Parameters:
1823: + sf - star forest
1824: . unit - data type
1825: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1827: Output Parameter:
1828: . leafdata - leaf data to be update with personal data from each respective root
1830: Level: intermediate
1832: .seealso: PetscSFComputeDegreeBegin(), PetscSFScatterBegin()
1833: @*/
1834: PetscErrorCode PetscSFScatterBegin(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1835: {
1836: PetscSF multi = NULL;
1839: PetscSFSetUp(sf);
1840: PetscSFGetMultiSF(sf,&multi);
1841: PetscSFBcastBegin(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1842: return 0;
1843: }
1845: /*@C
1846: PetscSFScatterEnd - ends pointwise scatter operation that was started with PetscSFScatterBegin()
1848: Collective
1850: Input Parameters:
1851: + sf - star forest
1852: . unit - data type
1853: - multirootdata - root buffer to send to each leaf, one unit of data per leaf
1855: Output Parameter:
1856: . leafdata - leaf data to be update with personal data from each respective root
1858: Level: intermediate
1860: .seealso: PetscSFComputeDegreeEnd(), PetscSFScatterEnd()
1861: @*/
1862: PetscErrorCode PetscSFScatterEnd(PetscSF sf,MPI_Datatype unit,const void *multirootdata,void *leafdata)
1863: {
1864: PetscSF multi = NULL;
1867: PetscSFGetMultiSF(sf,&multi);
1868: PetscSFBcastEnd(multi,unit,multirootdata,leafdata,MPI_REPLACE);
1869: return 0;
1870: }
1872: static PetscErrorCode PetscSFCheckLeavesUnique_Private(PetscSF sf)
1873: {
1874: PetscInt i, n, nleaves;
1875: const PetscInt *ilocal = NULL;
1876: PetscHSetI seen;
1878: if (PetscDefined(USE_DEBUG)) {
1879: PetscSFGetGraph(sf,NULL,&nleaves,&ilocal,NULL);
1880: PetscHSetICreate(&seen);
1881: for (i = 0; i < nleaves; i++) {
1882: const PetscInt leaf = ilocal ? ilocal[i] : i;
1883: PetscHSetIAdd(seen,leaf);
1884: }
1885: PetscHSetIGetSize(seen,&n);
1887: PetscHSetIDestroy(&seen);
1888: }
1889: return 0;
1890: }
1892: /*@
1893: PetscSFCompose - Compose a new PetscSF by putting the second SF under the first one in a top (roots) down (leaves) view
1895: Input Parameters:
1896: + sfA - The first PetscSF
1897: - sfB - The second PetscSF
1899: Output Parameters:
1900: . sfBA - The composite SF
1902: Level: developer
1904: Notes:
1905: Currently, the two SFs must be defined on congruent communicators and they must be true star
1906: forests, i.e. the same leaf is not connected with different roots.
1908: sfA's leaf space and sfB's root space might be partially overlapped. The composition builds
1909: a graph with sfA's roots and sfB's leaves only when there is a path between them. Unconnected
1910: nodes (roots or leaves) are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a
1911: Bcast on sfA, then a Bcast on sfB, on connected nodes.
1913: .seealso: PetscSF, PetscSFComposeInverse(), PetscSFGetGraph(), PetscSFSetGraph()
1914: @*/
1915: PetscErrorCode PetscSFCompose(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
1916: {
1917: const PetscSFNode *remotePointsA,*remotePointsB;
1918: PetscSFNode *remotePointsBA=NULL,*reorderedRemotePointsA = NULL,*leafdataB;
1919: const PetscInt *localPointsA,*localPointsB;
1920: PetscInt *localPointsBA;
1921: PetscInt i,numRootsA,numLeavesA,numRootsB,numLeavesB,minleaf,maxleaf,numLeavesBA;
1922: PetscBool denseB;
1925: PetscSFCheckGraphSet(sfA,1);
1927: PetscSFCheckGraphSet(sfB,2);
1930: PetscSFCheckLeavesUnique_Private(sfA);
1931: PetscSFCheckLeavesUnique_Private(sfB);
1933: PetscSFGetGraph(sfA,&numRootsA,&numLeavesA,&localPointsA,&remotePointsA);
1934: PetscSFGetGraph(sfB,&numRootsB,&numLeavesB,&localPointsB,&remotePointsB);
1935: if (localPointsA) {
1936: PetscMalloc1(numRootsB,&reorderedRemotePointsA);
1937: for (i=0; i<numRootsB; i++) {
1938: reorderedRemotePointsA[i].rank = -1;
1939: reorderedRemotePointsA[i].index = -1;
1940: }
1941: for (i=0; i<numLeavesA; i++) {
1942: if (localPointsA[i] >= numRootsB) continue;
1943: reorderedRemotePointsA[localPointsA[i]] = remotePointsA[i];
1944: }
1945: remotePointsA = reorderedRemotePointsA;
1946: }
1947: PetscSFGetLeafRange(sfB,&minleaf,&maxleaf);
1948: PetscMalloc1(maxleaf-minleaf+1,&leafdataB);
1949: PetscSFBcastBegin(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
1950: PetscSFBcastEnd(sfB,MPIU_2INT,remotePointsA,leafdataB-minleaf,MPI_REPLACE);
1951: PetscFree(reorderedRemotePointsA);
1953: denseB = (PetscBool)!localPointsB;
1954: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1955: if (leafdataB[localPointsB ? localPointsB[i]-minleaf : i].rank == -1) denseB = PETSC_FALSE;
1956: else numLeavesBA++;
1957: }
1958: if (denseB) {
1959: localPointsBA = NULL;
1960: remotePointsBA = leafdataB;
1961: } else {
1962: PetscMalloc1(numLeavesBA,&localPointsBA);
1963: PetscMalloc1(numLeavesBA,&remotePointsBA);
1964: for (i=0,numLeavesBA=0; i<numLeavesB; i++) {
1965: const PetscInt l = localPointsB ? localPointsB[i] : i;
1967: if (leafdataB[l-minleaf].rank == -1) continue;
1968: remotePointsBA[numLeavesBA] = leafdataB[l-minleaf];
1969: localPointsBA[numLeavesBA] = l;
1970: numLeavesBA++;
1971: }
1972: PetscFree(leafdataB);
1973: }
1974: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
1975: PetscSFSetFromOptions(*sfBA);
1976: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
1977: return 0;
1978: }
1980: /*@
1981: PetscSFComposeInverse - Compose a new PetscSF by putting the inverse of the second SF under the first one
1983: Input Parameters:
1984: + sfA - The first PetscSF
1985: - sfB - The second PetscSF
1987: Output Parameters:
1988: . sfBA - The composite SF.
1990: Level: developer
1992: Notes:
1993: Currently, the two SFs must be defined on congruent communicators and they must be true star
1994: forests, i.e. the same leaf is not connected with different roots. Even more, all roots of the
1995: second SF must have a degree of 1, i.e., no roots have more than one leaf connected.
1997: sfA's leaf space and sfB's leaf space might be partially overlapped. The composition builds
1998: a graph with sfA's roots and sfB's roots only when there is a path between them. Unconnected
1999: roots are not in sfBA. Doing a Bcast on the new SF is equivalent to doing a Bcast on sfA, then
2000: a Reduce on sfB, on connected roots.
2002: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph(), PetscSFCreateInverseSF()
2003: @*/
2004: PetscErrorCode PetscSFComposeInverse(PetscSF sfA,PetscSF sfB,PetscSF *sfBA)
2005: {
2006: const PetscSFNode *remotePointsA,*remotePointsB;
2007: PetscSFNode *remotePointsBA;
2008: const PetscInt *localPointsA,*localPointsB;
2009: PetscSFNode *reorderedRemotePointsA = NULL;
2010: PetscInt i,numRootsA,numLeavesA,numLeavesBA,numRootsB,numLeavesB,minleaf,maxleaf,*localPointsBA;
2011: MPI_Op op;
2012: #if defined(PETSC_USE_64BIT_INDICES)
2013: PetscBool iswin;
2014: #endif
2017: PetscSFCheckGraphSet(sfA,1);
2019: PetscSFCheckGraphSet(sfB,2);
2022: PetscSFCheckLeavesUnique_Private(sfA);
2023: PetscSFCheckLeavesUnique_Private(sfB);
2025: PetscSFGetGraph(sfA, &numRootsA, &numLeavesA, &localPointsA, &remotePointsA);
2026: PetscSFGetGraph(sfB, &numRootsB, &numLeavesB, &localPointsB, &remotePointsB);
2028: /* TODO: Check roots of sfB have degree of 1 */
2029: /* Once we implement it, we can replace the MPI_MAXLOC
2030: with MPI_REPLACE. In that case, MPI_MAXLOC and MPI_REPLACE have the same effect.
2031: We use MPI_MAXLOC only to have a deterministic output from this routine if
2032: the root condition is not meet.
2033: */
2034: op = MPI_MAXLOC;
2035: #if defined(PETSC_USE_64BIT_INDICES)
2036: /* we accept a non-deterministic output (if any) with PETSCSFWINDOW, since MPI_MAXLOC cannot operate on MPIU_2INT with MPI_Accumulate */
2037: PetscObjectTypeCompare((PetscObject)sfB,PETSCSFWINDOW,&iswin);
2038: if (iswin) op = MPI_REPLACE;
2039: #endif
2041: PetscSFGetLeafRange(sfB, &minleaf, &maxleaf);
2042: PetscMalloc1(maxleaf - minleaf + 1,&reorderedRemotePointsA);
2043: for (i=0; i<maxleaf - minleaf + 1; i++) {
2044: reorderedRemotePointsA[i].rank = -1;
2045: reorderedRemotePointsA[i].index = -1;
2046: }
2047: if (localPointsA) {
2048: for (i=0; i<numLeavesA; i++) {
2049: if (localPointsA[i] > maxleaf || localPointsA[i] < minleaf) continue;
2050: reorderedRemotePointsA[localPointsA[i] - minleaf] = remotePointsA[i];
2051: }
2052: } else {
2053: for (i=0; i<numLeavesA; i++) {
2054: if (i > maxleaf || i < minleaf) continue;
2055: reorderedRemotePointsA[i - minleaf] = remotePointsA[i];
2056: }
2057: }
2059: PetscMalloc1(numRootsB,&localPointsBA);
2060: PetscMalloc1(numRootsB,&remotePointsBA);
2061: for (i=0; i<numRootsB; i++) {
2062: remotePointsBA[i].rank = -1;
2063: remotePointsBA[i].index = -1;
2064: }
2066: PetscSFReduceBegin(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2067: PetscSFReduceEnd(sfB,MPIU_2INT,reorderedRemotePointsA-minleaf,remotePointsBA,op);
2068: PetscFree(reorderedRemotePointsA);
2069: for (i=0,numLeavesBA=0; i<numRootsB; i++) {
2070: if (remotePointsBA[i].rank == -1) continue;
2071: remotePointsBA[numLeavesBA].rank = remotePointsBA[i].rank;
2072: remotePointsBA[numLeavesBA].index = remotePointsBA[i].index;
2073: localPointsBA[numLeavesBA] = i;
2074: numLeavesBA++;
2075: }
2076: PetscSFCreate(PetscObjectComm((PetscObject)sfA),sfBA);
2077: PetscSFSetFromOptions(*sfBA);
2078: PetscSFSetGraph(*sfBA,numRootsA,numLeavesBA,localPointsBA,PETSC_OWN_POINTER,remotePointsBA,PETSC_OWN_POINTER);
2079: return 0;
2080: }
2082: /*
2083: PetscSFCreateLocalSF_Private - Creates a local PetscSF that only has intra-process edges of the global PetscSF
2085: Input Parameters:
2086: . sf - The global PetscSF
2088: Output Parameters:
2089: . out - The local PetscSF
2090: */
2091: PetscErrorCode PetscSFCreateLocalSF_Private(PetscSF sf,PetscSF *out)
2092: {
2093: MPI_Comm comm;
2094: PetscMPIInt myrank;
2095: const PetscInt *ilocal;
2096: const PetscSFNode *iremote;
2097: PetscInt i,j,nroots,nleaves,lnleaves,*lilocal;
2098: PetscSFNode *liremote;
2099: PetscSF lsf;
2102: if (sf->ops->CreateLocalSF) {
2103: (*sf->ops->CreateLocalSF)(sf,out);
2104: } else {
2105: /* Could use PetscSFCreateEmbeddedLeafSF, but since we know the comm is PETSC_COMM_SELF, we can make it fast */
2106: PetscObjectGetComm((PetscObject)sf,&comm);
2107: MPI_Comm_rank(comm,&myrank);
2109: /* Find out local edges and build a local SF */
2110: PetscSFGetGraph(sf,&nroots,&nleaves,&ilocal,&iremote);
2111: for (i=lnleaves=0; i<nleaves; i++) {if (iremote[i].rank == (PetscInt)myrank) lnleaves++;}
2112: PetscMalloc1(lnleaves,&lilocal);
2113: PetscMalloc1(lnleaves,&liremote);
2115: for (i=j=0; i<nleaves; i++) {
2116: if (iremote[i].rank == (PetscInt)myrank) {
2117: lilocal[j] = ilocal? ilocal[i] : i; /* ilocal=NULL for contiguous storage */
2118: liremote[j].rank = 0; /* rank in PETSC_COMM_SELF */
2119: liremote[j].index = iremote[i].index;
2120: j++;
2121: }
2122: }
2123: PetscSFCreate(PETSC_COMM_SELF,&lsf);
2124: PetscSFSetFromOptions(lsf);
2125: PetscSFSetGraph(lsf,nroots,lnleaves,lilocal,PETSC_OWN_POINTER,liremote,PETSC_OWN_POINTER);
2126: PetscSFSetUp(lsf);
2127: *out = lsf;
2128: }
2129: return 0;
2130: }
2132: /* Similar to PetscSFBcast, but only Bcast to leaves on rank 0 */
2133: PetscErrorCode PetscSFBcastToZero_Private(PetscSF sf,MPI_Datatype unit,const void *rootdata,void *leafdata)
2134: {
2135: PetscMemType rootmtype,leafmtype;
2138: PetscSFSetUp(sf);
2139: PetscLogEventBegin(PETSCSF_BcastBegin,sf,0,0,0);
2140: PetscGetMemType(rootdata,&rootmtype);
2141: PetscGetMemType(leafdata,&leafmtype);
2142: if (sf->ops->BcastToZero) {
2143: (*sf->ops->BcastToZero)(sf,unit,rootmtype,rootdata,leafmtype,leafdata);
2144: } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"PetscSFBcastToZero_Private is not supported on this SF type");
2145: PetscLogEventEnd(PETSCSF_BcastBegin,sf,0,0,0);
2146: return 0;
2147: }
2149: /*@
2150: PetscSFConcatenate - concatenate multiple SFs into one
2152: Input Parameters:
2153: + comm - the communicator
2154: . nsfs - the number of input PetscSF
2155: . sfs - the array of input PetscSF
2156: . shareRoots - the flag whether roots of input PetscSFs are taken as shared (PETSC_TRUE), or separate and concatenated (PETSC_FALSE)
2157: - leafOffsets - the array of local leaf offsets, one for each input PetscSF, or NULL for contiguous storage
2159: Output Parameters:
2160: . newsf - The resulting PetscSF
2162: Level: developer
2164: Notes:
2165: The communicator of all SFs in sfs must be comm.
2167: The offsets in leafOffsets are added to the original leaf indices.
2169: If all input SFs use contiguous leaf storage (ilocal = NULL), leafOffsets can be passed as NULL as well.
2170: In this case, NULL is also passed as ilocal to the resulting SF.
2172: If any input SF has non-null ilocal, leafOffsets is needed to distinguish leaves from different input SFs.
2173: In this case, user is responsible to provide correct offsets so that the resulting leaves are unique (otherwise an error occurs).
2175: .seealso: PetscSF, PetscSFCompose(), PetscSFGetGraph(), PetscSFSetGraph()
2176: @*/
2177: PetscErrorCode PetscSFConcatenate(MPI_Comm comm, PetscInt nsfs, PetscSF sfs[], PetscBool shareRoots, PetscInt leafOffsets[], PetscSF *newsf)
2178: {
2179: PetscInt i, s, nLeaves, nRoots;
2180: PetscInt *leafArrayOffsets;
2181: PetscInt *ilocal_new;
2182: PetscSFNode *iremote_new;
2183: PetscInt *rootOffsets;
2184: PetscBool all_ilocal_null = PETSC_FALSE;
2185: PetscMPIInt rank;
2187: {
2188: PetscSF dummy; /* just to have a PetscObject on comm for input validation */
2190: PetscSFCreate(comm,&dummy);
2193: for (i=0; i<nsfs; i++) {
2196: }
2200: PetscSFDestroy(&dummy);
2201: }
2202: if (!nsfs) {
2203: PetscSFCreate(comm, newsf);
2204: PetscSFSetGraph(*newsf, 0, 0, NULL, PETSC_OWN_POINTER, NULL, PETSC_OWN_POINTER);
2205: return 0;
2206: }
2207: MPI_Comm_rank(comm, &rank);
2209: PetscCalloc1(nsfs+1, &rootOffsets);
2210: if (shareRoots) {
2211: PetscSFGetGraph(sfs[0], &nRoots, NULL, NULL, NULL);
2212: if (PetscDefined(USE_DEBUG)) {
2213: for (s=1; s<nsfs; s++) {
2214: PetscInt nr;
2216: PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2218: }
2219: }
2220: } else {
2221: for (s=0; s<nsfs; s++) {
2222: PetscInt nr;
2224: PetscSFGetGraph(sfs[s], &nr, NULL, NULL, NULL);
2225: rootOffsets[s+1] = rootOffsets[s] + nr;
2226: }
2227: nRoots = rootOffsets[nsfs];
2228: }
2230: /* Calculate leaf array offsets and automatic root offsets */
2231: PetscMalloc1(nsfs+1,&leafArrayOffsets);
2232: leafArrayOffsets[0] = 0;
2233: for (s=0; s<nsfs; s++) {
2234: PetscInt nl;
2236: PetscSFGetGraph(sfs[s], NULL, &nl, NULL, NULL);
2237: leafArrayOffsets[s+1] = leafArrayOffsets[s] + nl;
2238: }
2239: nLeaves = leafArrayOffsets[nsfs];
2241: if (!leafOffsets) {
2242: all_ilocal_null = PETSC_TRUE;
2243: for (s=0; s<nsfs; s++) {
2244: const PetscInt *ilocal;
2246: PetscSFGetGraph(sfs[s], NULL, NULL, &ilocal, NULL);
2247: if (ilocal) {
2248: all_ilocal_null = PETSC_FALSE;
2249: break;
2250: }
2251: }
2253: }
2255: /* Renumber and concatenate local leaves */
2256: ilocal_new = NULL;
2257: if (!all_ilocal_null) {
2258: PetscMalloc1(nLeaves, &ilocal_new);
2259: for (i = 0; i<nLeaves; i++) ilocal_new[i] = -1;
2260: for (s = 0; s<nsfs; s++) {
2261: const PetscInt *ilocal;
2262: PetscInt *ilocal_l = &ilocal_new[leafArrayOffsets[s]];
2263: PetscInt i, nleaves_l;
2265: PetscSFGetGraph(sfs[s], NULL, &nleaves_l, &ilocal, NULL);
2266: for (i=0; i<nleaves_l; i++) ilocal_l[i] = (ilocal ? ilocal[i] : i) + leafOffsets[s];
2267: }
2268: }
2270: /* Renumber and concatenate remote roots */
2271: PetscMalloc1(nLeaves, &iremote_new);
2272: for (i = 0; i < nLeaves; i++) {
2273: iremote_new[i].rank = -1;
2274: iremote_new[i].index = -1;
2275: }
2276: for (s = 0; s<nsfs; s++) {
2277: PetscInt i, nl, nr;
2278: PetscSF tmp_sf;
2279: const PetscSFNode *iremote;
2280: PetscSFNode *tmp_rootdata;
2281: PetscSFNode *tmp_leafdata = &iremote_new[leafArrayOffsets[s]];
2283: PetscSFGetGraph(sfs[s], &nr, &nl, NULL, &iremote);
2284: PetscSFCreate(comm, &tmp_sf);
2285: /* create helper SF with contiguous leaves */
2286: PetscSFSetGraph(tmp_sf, nr, nl, NULL, PETSC_USE_POINTER, (PetscSFNode*) iremote, PETSC_COPY_VALUES);
2287: PetscSFSetUp(tmp_sf);
2288: PetscMalloc1(nr, &tmp_rootdata);
2289: for (i = 0; i < nr; i++) {
2290: tmp_rootdata[i].index = i + rootOffsets[s];
2291: tmp_rootdata[i].rank = (PetscInt) rank;
2292: }
2293: PetscSFBcastBegin(tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2294: PetscSFBcastEnd( tmp_sf, MPIU_2INT, tmp_rootdata, tmp_leafdata, MPI_REPLACE);
2295: PetscSFDestroy(&tmp_sf);
2296: PetscFree(tmp_rootdata);
2297: }
2299: /* Build the new SF */
2300: PetscSFCreate(comm, newsf);
2301: PetscSFSetGraph(*newsf, nRoots, nLeaves, ilocal_new, PETSC_OWN_POINTER, iremote_new, PETSC_OWN_POINTER);
2302: PetscSFSetUp(*newsf);
2303: PetscFree(rootOffsets);
2304: PetscFree(leafArrayOffsets);
2305: return 0;
2306: }