Actual source code: vecstash.c
2: #include <petsc/private/vecimpl.h>
4: #define DEFAULT_STASH_SIZE 100
6: /*
7: VecStashCreate_Private - Creates a stash,currently used for all the parallel
8: matrix implementations. The stash is where elements of a matrix destined
9: to be stored on other processors are kept until matrix assembly is done.
11: This is a simple minded stash. Simply adds entries to end of stash.
13: Input Parameters:
14: comm - communicator, required for scatters.
15: bs - stash block size. used when stashing blocks of values
17: Output Parameters:
18: stash - the newly created stash
19: */
20: PetscErrorCode VecStashCreate_Private(MPI_Comm comm,PetscInt bs,VecStash *stash)
21: {
22: PetscInt max,*opt,nopt;
23: PetscBool flg;
25: /* Require 2 tags, get the second using PetscCommGetNewTag() */
26: stash->comm = comm;
27: PetscCommGetNewTag(stash->comm,&stash->tag1);
28: PetscCommGetNewTag(stash->comm,&stash->tag2);
29: MPI_Comm_size(stash->comm,&stash->size);
30: MPI_Comm_rank(stash->comm,&stash->rank);
32: nopt = stash->size;
33: PetscMalloc1(nopt,&opt);
34: PetscOptionsGetIntArray(NULL,NULL,"-vecstash_initial_size",opt,&nopt,&flg);
35: if (flg) {
36: if (nopt == 1) max = opt[0];
37: else if (nopt == stash->size) max = opt[stash->rank];
38: else if (stash->rank < nopt) max = opt[stash->rank];
39: else max = 0; /* use default */
40: stash->umax = max;
41: } else {
42: stash->umax = 0;
43: }
44: PetscFree(opt);
46: if (bs <= 0) bs = 1;
48: stash->bs = bs;
49: stash->nmax = 0;
50: stash->oldnmax = 0;
51: stash->n = 0;
52: stash->reallocs = -1;
53: stash->idx = NULL;
54: stash->array = NULL;
56: stash->send_waits = NULL;
57: stash->recv_waits = NULL;
58: stash->send_status = NULL;
59: stash->nsends = 0;
60: stash->nrecvs = 0;
61: stash->svalues = NULL;
62: stash->rvalues = NULL;
63: stash->rmax = 0;
64: stash->nprocs = NULL;
65: stash->nprocessed = 0;
66: stash->donotstash = PETSC_FALSE;
67: stash->ignorenegidx = PETSC_FALSE;
68: return 0;
69: }
71: /*
72: VecStashDestroy_Private - Destroy the stash
73: */
74: PetscErrorCode VecStashDestroy_Private(VecStash *stash)
75: {
76: PetscFree2(stash->array,stash->idx);
77: PetscFree(stash->bowners);
78: return 0;
79: }
81: /*
82: VecStashScatterEnd_Private - This is called as the fial stage of
83: scatter. The final stages of message passing is done here, and
84: all the memory used for message passing is cleanedu up. This
85: routine also resets the stash, and deallocates the memory used
86: for the stash. It also keeps track of the current memory usage
87: so that the same value can be used the next time through.
88: */
89: PetscErrorCode VecStashScatterEnd_Private(VecStash *stash)
90: {
91: PetscInt nsends=stash->nsends,oldnmax;
92: MPI_Status *send_status;
94: /* wait on sends */
95: if (nsends) {
96: PetscMalloc1(2*nsends,&send_status);
97: MPI_Waitall(2*nsends,stash->send_waits,send_status);
98: PetscFree(send_status);
99: }
101: /* Now update nmaxold to be app 10% more than max n, this way the
102: wastage of space is reduced the next time this stash is used.
103: Also update the oldmax, only if it increases */
104: if (stash->n) {
105: oldnmax = ((PetscInt)(stash->n * 1.1) + 5)*stash->bs;
106: if (oldnmax > stash->oldnmax) stash->oldnmax = oldnmax;
107: }
109: stash->nmax = 0;
110: stash->n = 0;
111: stash->reallocs = -1;
112: stash->rmax = 0;
113: stash->nprocessed = 0;
115: PetscFree2(stash->array,stash->idx);
116: stash->array = NULL;
117: stash->idx = NULL;
118: PetscFree(stash->send_waits);
119: PetscFree(stash->recv_waits);
120: PetscFree2(stash->svalues,stash->sindices);
121: PetscFree2(stash->rvalues,stash->rindices);
122: PetscFree(stash->nprocs);
123: return 0;
124: }
126: /*
127: VecStashGetInfo_Private - Gets the relavant statistics of the stash
129: Input Parameters:
130: stash - the stash
131: nstash - the size of the stash
132: reallocs - the number of additional mallocs incurred.
134: */
135: PetscErrorCode VecStashGetInfo_Private(VecStash *stash,PetscInt *nstash,PetscInt *reallocs)
136: {
137: if (nstash) *nstash = stash->n*stash->bs;
138: if (reallocs) {
139: if (stash->reallocs < 0) *reallocs = 0;
140: else *reallocs = stash->reallocs;
141: }
142: return 0;
143: }
145: /*
146: VecStashSetInitialSize_Private - Sets the initial size of the stash
148: Input Parameters:
149: stash - the stash
150: max - the value that is used as the max size of the stash.
151: this value is used while allocating memory. It specifies
152: the number of vals stored, even with the block-stash
153: */
154: PetscErrorCode VecStashSetInitialSize_Private(VecStash *stash,PetscInt max)
155: {
156: stash->umax = max;
157: return 0;
158: }
160: /* VecStashExpand_Private - Expand the stash. This function is called
161: when the space in the stash is not sufficient to add the new values
162: being inserted into the stash.
164: Input Parameters:
165: stash - the stash
166: incr - the minimum increase requested
168: Notes:
169: This routine doubles the currently used memory.
170: */
171: PetscErrorCode VecStashExpand_Private(VecStash *stash,PetscInt incr)
172: {
173: PetscInt *n_idx,newnmax,bs=stash->bs;
174: PetscScalar *n_array;
176: /* allocate a larger stash. */
177: if (!stash->oldnmax && !stash->nmax) { /* new stash */
178: if (stash->umax) newnmax = stash->umax/bs;
179: else newnmax = DEFAULT_STASH_SIZE/bs;
180: } else if (!stash->nmax) { /* resuing stash */
181: if (stash->umax > stash->oldnmax) newnmax = stash->umax/bs;
182: else newnmax = stash->oldnmax/bs;
183: } else newnmax = stash->nmax*2;
185: if (newnmax < (stash->nmax + incr)) newnmax += 2*incr;
187: PetscMalloc2(bs*newnmax,&n_array,newnmax,&n_idx);
188: PetscMemcpy(n_array,stash->array,bs*stash->nmax*sizeof(PetscScalar));
189: PetscMemcpy(n_idx,stash->idx,stash->nmax*sizeof(PetscInt));
190: PetscFree2(stash->array,stash->idx);
192: stash->array = n_array;
193: stash->idx = n_idx;
194: stash->nmax = newnmax;
195: stash->reallocs++;
196: return 0;
197: }
198: /*
199: VecStashScatterBegin_Private - Initiates the transfer of values to the
200: correct owners. This function goes through the stash, and check the
201: owners of each stashed value, and sends the values off to the owner
202: processors.
204: Input Parameters:
205: stash - the stash
206: owners - an array of size 'no-of-procs' which gives the ownership range
207: for each node.
209: Notes:
210: The 'owners' array in the cased of the blocked-stash has the
211: ranges specified blocked global indices, and for the regular stash in
212: the proper global indices.
213: */
214: PetscErrorCode VecStashScatterBegin_Private(VecStash *stash,PetscInt *owners)
215: {
216: PetscMPIInt size = stash->size,tag1=stash->tag1,tag2=stash->tag2;
217: PetscInt *owner,*start,*nprocs,nsends,nreceives;
218: PetscInt nmax,count,*sindices,*rindices,i,j,idx,bs=stash->bs,lastidx;
219: PetscScalar *rvalues,*svalues;
220: MPI_Comm comm = stash->comm;
221: MPI_Request *send_waits,*recv_waits;
223: /* first count number of contributors to each processor */
224: PetscCalloc1(2*size,&nprocs);
225: PetscMalloc1(stash->n,&owner);
227: j = 0;
228: lastidx = -1;
229: for (i=0; i<stash->n; i++) {
230: /* if indices are NOT locally sorted, need to start search at the beginning */
231: if (lastidx > (idx = stash->idx[i])) j = 0;
232: lastidx = idx;
233: for (; j<size; j++) {
234: if (idx >= owners[j] && idx < owners[j+1]) {
235: nprocs[2*j]++; nprocs[2*j+1] = 1; owner[i] = j; break;
236: }
237: }
238: }
239: nsends = 0;
240: for (i=0; i<size; i++) nsends += nprocs[2*i+1];
242: /* inform other processors of number of messages and max length*/
243: PetscMaxSum(comm,nprocs,&nmax,&nreceives);
245: /* post receives:
246: since we don't know how long each individual message is we
247: allocate the largest needed buffer for each receive. Potentially
248: this is a lot of wasted space.
249: */
250: PetscMalloc2(nreceives*nmax*bs,&rvalues,nreceives*nmax,&rindices);
251: PetscMalloc1(2*nreceives,&recv_waits);
252: for (i=0,count=0; i<nreceives; i++) {
253: MPI_Irecv(rvalues+bs*nmax*i,bs*nmax,MPIU_SCALAR,MPI_ANY_SOURCE,tag1,comm,recv_waits+count++);
254: MPI_Irecv(rindices+nmax*i,nmax,MPIU_INT,MPI_ANY_SOURCE,tag2,comm,recv_waits+count++);
255: }
257: /* do sends:
258: 1) starts[i] gives the starting index in svalues for stuff going to
259: the ith processor
260: */
261: PetscMalloc2(stash->n*bs,&svalues,stash->n,&sindices);
262: PetscMalloc1(2*nsends,&send_waits);
263: PetscMalloc1(size,&start);
264: /* use 2 sends the first with all_v, the next with all_i */
265: start[0] = 0;
266: for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
268: for (i=0; i<stash->n; i++) {
269: j = owner[i];
270: if (bs == 1) svalues[start[j]] = stash->array[i];
271: else {
272: PetscMemcpy(svalues+bs*start[j],stash->array+bs*i,bs*sizeof(PetscScalar));
273: }
274: sindices[start[j]] = stash->idx[i];
275: start[j]++;
276: }
277: start[0] = 0;
278: for (i=1; i<size; i++) start[i] = start[i-1] + nprocs[2*i-2];
280: for (i=0,count=0; i<size; i++) {
281: if (nprocs[2*i+1]) {
282: MPI_Isend(svalues+bs*start[i],bs*nprocs[2*i],MPIU_SCALAR,i,tag1,comm,send_waits+count++);
283: MPI_Isend(sindices+start[i],nprocs[2*i],MPIU_INT,i,tag2,comm,send_waits+count++);
284: }
285: }
286: PetscFree(owner);
287: PetscFree(start);
288: /* This memory is reused in scatter end for a different purpose*/
289: for (i=0; i<2*size; i++) nprocs[i] = -1;
291: stash->nprocs = nprocs;
292: stash->svalues = svalues;
293: stash->sindices = sindices;
294: stash->rvalues = rvalues;
295: stash->rindices = rindices;
296: stash->nsends = nsends;
297: stash->nrecvs = nreceives;
298: stash->send_waits = send_waits;
299: stash->recv_waits = recv_waits;
300: stash->rmax = nmax;
301: return 0;
302: }
304: /*
305: VecStashScatterGetMesg_Private - This function waits on the receives posted
306: in the function VecStashScatterBegin_Private() and returns one message at
307: a time to the calling function. If no messages are left, it indicates this
308: by setting flg = 0, else it sets flg = 1.
310: Input Parameters:
311: stash - the stash
313: Output Parameters:
314: nvals - the number of entries in the current message.
315: rows - an array of row indices (or blocked indices) corresponding to the values
316: cols - an array of columnindices (or blocked indices) corresponding to the values
317: vals - the values
318: flg - 0 indicates no more message left, and the current call has no values associated.
319: 1 indicates that the current call successfully received a message, and the
320: other output parameters nvals,rows,cols,vals are set appropriately.
321: */
322: PetscErrorCode VecStashScatterGetMesg_Private(VecStash *stash,PetscMPIInt *nvals,PetscInt **rows,PetscScalar **vals,PetscInt *flg)
323: {
324: PetscMPIInt i = 0; /* dummy value so MPI-Uni doesn't think it is not set */
325: PetscInt *flg_v;
326: PetscInt i1,i2,bs=stash->bs;
327: MPI_Status recv_status;
328: PetscBool match_found = PETSC_FALSE;
330: *flg = 0; /* When a message is discovered this is reset to 1 */
331: /* Return if no more messages to process */
332: if (stash->nprocessed == stash->nrecvs) return 0;
334: flg_v = stash->nprocs;
335: /* If a matching pair of receives are found, process them, and return the data to
336: the calling function. Until then keep receiving messages */
337: while (!match_found) {
338: MPI_Waitany(2*stash->nrecvs,stash->recv_waits,&i,&recv_status);
339: /* Now pack the received message into a structure which is useable by others */
340: if (i % 2) {
341: MPI_Get_count(&recv_status,MPIU_INT,nvals);
342: flg_v[2*recv_status.MPI_SOURCE+1] = i/2;
343: } else {
344: MPI_Get_count(&recv_status,MPIU_SCALAR,nvals);
345: flg_v[2*recv_status.MPI_SOURCE] = i/2;
346: *nvals = *nvals/bs;
347: }
349: /* Check if we have both the messages from this proc */
350: i1 = flg_v[2*recv_status.MPI_SOURCE];
351: i2 = flg_v[2*recv_status.MPI_SOURCE+1];
352: if (i1 != -1 && i2 != -1) {
353: *rows = stash->rindices + i2*stash->rmax;
354: *vals = stash->rvalues + i1*bs*stash->rmax;
355: *flg = 1;
356: stash->nprocessed++;
357: match_found = PETSC_TRUE;
358: }
359: }
360: return 0;
361: }
363: /*
364: * Sort the stash, removing duplicates (combining as appropriate).
365: */
366: PetscErrorCode VecStashSortCompress_Private(VecStash *stash)
367: {
368: PetscInt i,j,bs = stash->bs;
370: if (!stash->n) return 0;
371: if (bs == 1) {
372: PetscSortIntWithScalarArray(stash->n,stash->idx,stash->array);
373: for (i=1,j=0; i<stash->n; i++) {
374: if (stash->idx[i] == stash->idx[j]) {
375: switch (stash->insertmode) {
376: case ADD_VALUES:
377: stash->array[j] += stash->array[i];
378: break;
379: case INSERT_VALUES:
380: stash->array[j] = stash->array[i];
381: break;
382: default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
383: }
384: } else {
385: j++;
386: stash->idx[j] = stash->idx[i];
387: stash->array[j] = stash->array[i];
388: }
389: }
390: stash->n = j + 1;
391: } else { /* block stash */
392: PetscInt *perm = NULL;
393: PetscScalar *arr;
394: PetscMalloc2(stash->n,&perm,stash->n*bs,&arr);
395: for (i=0; i<stash->n; i++) perm[i] = i;
396: PetscSortIntWithArray(stash->n,stash->idx,perm);
398: /* Out-of-place copy of arr */
399: PetscMemcpy(arr,stash->array+perm[0]*bs,bs*sizeof(PetscScalar));
400: for (i=1,j=0; i<stash->n; i++) {
401: PetscInt k;
402: if (stash->idx[i] == stash->idx[j]) {
403: switch (stash->insertmode) {
404: case ADD_VALUES:
405: for (k=0; k<bs; k++) arr[j*bs+k] += stash->array[perm[i]*bs+k];
406: break;
407: case INSERT_VALUES:
408: for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
409: break;
410: default: SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Insert mode not supported 0x%x",stash->insertmode);
411: }
412: } else {
413: j++;
414: stash->idx[j] = stash->idx[i];
415: for (k=0; k<bs; k++) arr[j*bs+k] = stash->array[perm[i]*bs+k];
416: }
417: }
418: stash->n = j + 1;
419: PetscMemcpy(stash->array,arr,stash->n*bs*sizeof(PetscScalar));
420: PetscFree2(perm,arr);
421: }
422: return 0;
423: }
425: PetscErrorCode VecStashGetOwnerList_Private(VecStash *stash,PetscLayout map,PetscMPIInt *nowners,PetscMPIInt **owners)
426: {
427: PetscInt i,bs = stash->bs;
428: PetscMPIInt r;
429: PetscSegBuffer seg;
432: PetscSegBufferCreate(sizeof(PetscMPIInt),50,&seg);
433: *nowners = 0;
434: for (i=0,r=-1; i<stash->n; i++) {
435: if (stash->idx[i]*bs >= map->range[r+1]) {
436: PetscMPIInt *rank;
437: PetscSegBufferGet(seg,1,&rank);
438: PetscLayoutFindOwner(map,stash->idx[i]*bs,&r);
439: *rank = r;
440: (*nowners)++;
441: }
442: }
443: PetscSegBufferExtractAlloc(seg,owners);
444: PetscSegBufferDestroy(&seg);
445: return 0;
446: }