Actual source code: baijfact3.c


  2: /*
  3:     Factorization code for BAIJ format.
  4: */
  5: #include <../src/mat/impls/baij/seq/baij.h>
  6: #include <petsc/private/kernels/blockinvert.h>

  8: /*
  9:    This is used to set the numeric factorization for both LU and ILU symbolic factorization
 10: */
 11: PetscErrorCode MatSeqBAIJSetNumericFactorization(Mat fact,PetscBool natural)
 12: {
 13:   if (natural) {
 14:     switch (fact->rmap->bs) {
 15:     case 1:
 16:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 17:       break;
 18:     case 2:
 19:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering;
 20:       break;
 21:     case 3:
 22:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering;
 23:       break;
 24:     case 4:
 25:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering;
 26:       break;
 27:     case 5:
 28:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering;
 29:       break;
 30:     case 6:
 31:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering;
 32:       break;
 33:     case 7:
 34:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering;
 35:       break;
 36:     case 9:
 37: #if defined(PETSC_HAVE_IMMINTRIN_H) && defined(__AVX2__) && defined(__FMA__) && defined(PETSC_USE_REAL_DOUBLE) && !defined(PETSC_USE_COMPLEX) && !defined(PETSC_USE_64BIT_INDICES)
 38:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_9_NaturalOrdering;
 39: #else
 40:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 41: #endif
 42:       break;
 43:     case 15:
 44:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_15_NaturalOrdering;
 45:       break;
 46:     default:
 47:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 48:       break;
 49:     }
 50:   } else {
 51:     switch (fact->rmap->bs) {
 52:     case 1:
 53:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1;
 54:       break;
 55:     case 2:
 56:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2;
 57:       break;
 58:     case 3:
 59:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3;
 60:       break;
 61:     case 4:
 62:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4;
 63:       break;
 64:     case 5:
 65:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5;
 66:       break;
 67:     case 6:
 68:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6;
 69:       break;
 70:     case 7:
 71:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7;
 72:       break;
 73:     default:
 74:       fact->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N;
 75:       break;
 76:     }
 77:   }
 78:   return 0;
 79: }

 81: PetscErrorCode MatSeqBAIJSetNumericFactorization_inplace(Mat inA,PetscBool natural)
 82: {
 83:   if (natural) {
 84:     switch (inA->rmap->bs) {
 85:     case 1:
 86:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
 87:       break;
 88:     case 2:
 89:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_NaturalOrdering_inplace;
 90:       break;
 91:     case 3:
 92:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_NaturalOrdering_inplace;
 93:       break;
 94:     case 4:
 95: #if defined(PETSC_USE_REAL_MAT_SINGLE)
 96:       {
 97:         PetscBool sse_enabled_local;
 98:         PetscSSEIsEnabled(inA->comm,&sse_enabled_local,NULL);
 99:         if (sse_enabled_local) {
100: #  if defined(PETSC_HAVE_SSE)
101:           int i,*AJ=a->j,nz=a->nz,n=a->mbs;
102:           if (n==(unsigned short)n) {
103:             unsigned short *aj=(unsigned short*)AJ;
104:             for (i=0; i<nz; i++) aj[i] = (unsigned short)AJ[i];

106:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE_usj;
107:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE_usj;

109:             PetscInfo(inA,"Using special SSE, in-place natural ordering, ushort j index factor BS=4\n");
110:           } else {
111:             /* Scale the column indices for easier indexing in MatSolve. */
112: /*            for (i=0;i<nz;i++) { */
113: /*              AJ[i] = AJ[i]*4; */
114: /*            } */
115:             inA->ops->setunfactored   = MatSetUnfactored_SeqBAIJ_4_NaturalOrdering_SSE;
116:             inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_SSE;

118:             PetscInfo(inA,"Using special SSE, in-place natural ordering, int j index factor BS=4\n");
119:           }
120: #  else
121:           /* This should never be reached.  If so, problem in PetscSSEIsEnabled. */
122:           SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSE Hardware unavailable");
123: #  endif
124:         } else {
125:           inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
126:         }
127:       }
128: #else
129:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_NaturalOrdering_inplace;
130: #endif
131:       break;
132:     case 5:
133:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_NaturalOrdering_inplace;
134:       break;
135:     case 6:
136:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_NaturalOrdering_inplace;
137:       break;
138:     case 7:
139:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_NaturalOrdering_inplace;
140:       break;
141:     default:
142:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
143:       break;
144:     }
145:   } else {
146:     switch (inA->rmap->bs) {
147:     case 1:
148:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_1_inplace;
149:       break;
150:     case 2:
151:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_2_inplace;
152:       break;
153:     case 3:
154:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_3_inplace;
155:       break;
156:     case 4:
157:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_4_inplace;
158:       break;
159:     case 5:
160:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_5_inplace;
161:       break;
162:     case 6:
163:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_6_inplace;
164:       break;
165:     case 7:
166:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_7_inplace;
167:       break;
168:     default:
169:       inA->ops->lufactornumeric = MatLUFactorNumeric_SeqBAIJ_N_inplace;
170:       break;
171:     }
172:   }
173:   return 0;
174: }

176: /*
177:     The symbolic factorization code is identical to that for AIJ format,
178:   except for very small changes since this is now a SeqBAIJ datastructure.
179:   NOT good code reuse.
180: */
181: #include <petscbt.h>
182: #include <../src/mat/utils/freespace.h>

184: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
185: {
186:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
187:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
188:   PetscBool          row_identity,col_identity,both_identity;
189:   IS                 isicol;
190:   const PetscInt     *r,*ic;
191:   PetscInt           i,*ai=a->i,*aj=a->j;
192:   PetscInt           *bi,*bj,*ajtmp;
193:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
194:   PetscReal          f;
195:   PetscInt           nlnk,*lnk,k,**bi_ptr;
196:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
197:   PetscBT            lnkbt;
198:   PetscBool          missing;

201:   MatMissingDiagonal(A,&missing,&i);

204:   if (bs>1) {  /* check shifttype */
206:   }

208:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
209:   ISGetIndices(isrow,&r);
210:   ISGetIndices(isicol,&ic);

212:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
213:   PetscMalloc1(n+1,&bi);
214:   PetscMalloc1(n+1,&bdiag);
215:   bi[0] = bdiag[0] = 0;

217:   /* linked list for storing column indices of the active row */
218:   nlnk = n + 1;
219:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

221:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

223:   /* initial FreeSpace size is f*(ai[n]+1) */
224:   f    = info->fill;
225:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);

227:   current_space = free_space;

229:   for (i=0; i<n; i++) {
230:     /* copy previous fill into linked list */
231:     nzi = 0;
232:     nnz = ai[r[i]+1] - ai[r[i]];
233:     ajtmp = aj + ai[r[i]];
234:     PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt);
235:     nzi  += nlnk;

237:     /* add pivot rows into linked list */
238:     row = lnk[n];
239:     while (row < i) {
240:       nzbd  = bdiag[row] + 1;   /* num of entries in the row with column index <= row */
241:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
242:       PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im);
243:       nzi  += nlnk;
244:       row   = lnk[row];
245:     }
246:     bi[i+1] = bi[i] + nzi;
247:     im[i]   = nzi;

249:     /* mark bdiag */
250:     nzbd = 0;
251:     nnz  = nzi;
252:     k    = lnk[n];
253:     while (nnz-- && k < i) {
254:       nzbd++;
255:       k = lnk[k];
256:     }
257:     bdiag[i] = nzbd; /* note : bdaig[i] = nnzL as input for PetscFreeSpaceContiguous_LU() */

259:     /* if free space is not available, make more free space */
260:     if (current_space->local_remaining<nzi) {
261:       nnz  = PetscIntMultTruncate(2,PetscIntMultTruncate(n - i,nzi)); /* estimated and max additional space needed */
262:       PetscFreeSpaceGet(nnz,&current_space);
263:       reallocs++;
264:     }

266:     /* copy data into free space, then initialize lnk */
267:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

269:     bi_ptr[i]                       = current_space->array;
270:     current_space->array           += nzi;
271:     current_space->local_used      += nzi;
272:     current_space->local_remaining -= nzi;
273:   }

275:   ISRestoreIndices(isrow,&r);
276:   ISRestoreIndices(isicol,&ic);

278:   /* copy free_space into bj and free free_space; set bi, bj, bdiag in new datastructure; */
279:   PetscMalloc1(bi[n]+1,&bj);
280:   PetscFreeSpaceContiguous_LU(&free_space,bj,n,bi,bdiag);
281:   PetscLLDestroy(lnk,lnkbt);
282:   PetscFree2(bi_ptr,im);

284:   /* put together the new matrix */
285:   MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
286:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
287:   b    = (Mat_SeqBAIJ*)(B)->data;

289:   b->free_a       = PETSC_TRUE;
290:   b->free_ij      = PETSC_TRUE;
291:   b->singlemalloc = PETSC_FALSE;

293:   PetscMalloc1((bdiag[0]+1)*bs2,&b->a);
294:   b->j             = bj;
295:   b->i             = bi;
296:   b->diag          = bdiag;
297:   b->free_diag     = PETSC_TRUE;
298:   b->ilen          = NULL;
299:   b->imax          = NULL;
300:   b->row           = isrow;
301:   b->col           = iscol;
302:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

304:   PetscObjectReference((PetscObject)isrow);
305:   PetscObjectReference((PetscObject)iscol);
306:   b->icol = isicol;
307:   PetscMalloc1(bs*n+bs,&b->solve_work);
308:   PetscLogObjectMemory((PetscObject)B,(bdiag[0]+1)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

310:   b->maxnz = b->nz = bdiag[0]+1;

312:   B->factortype            =  MAT_FACTOR_LU;
313:   B->info.factor_mallocs   = reallocs;
314:   B->info.fill_ratio_given = f;

316:   if (ai[n] != 0) {
317:     B->info.fill_ratio_needed = ((PetscReal)(bdiag[0]+1))/((PetscReal)ai[n]);
318:   } else {
319:     B->info.fill_ratio_needed = 0.0;
320:   }
321: #if defined(PETSC_USE_INFO)
322:   if (ai[n] != 0) {
323:     PetscReal af = B->info.fill_ratio_needed;
324:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
325:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
326:     PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
327:     PetscInfo(A,"for best performance.\n");
328:   } else {
329:     PetscInfo(A,"Empty matrix\n");
330:   }
331: #endif

333:   ISIdentity(isrow,&row_identity);
334:   ISIdentity(iscol,&col_identity);

336:   both_identity = (PetscBool) (row_identity && col_identity);

338:   MatSeqBAIJSetNumericFactorization(B,both_identity);
339:   return 0;
340: }

342: PetscErrorCode MatLUFactorSymbolic_SeqBAIJ_inplace(Mat B,Mat A,IS isrow,IS iscol,const MatFactorInfo *info)
343: {
344:   Mat_SeqBAIJ        *a = (Mat_SeqBAIJ*)A->data,*b;
345:   PetscInt           n  =a->mbs,bs = A->rmap->bs,bs2=a->bs2;
346:   PetscBool          row_identity,col_identity,both_identity;
347:   IS                 isicol;
348:   const PetscInt     *r,*ic;
349:   PetscInt           i,*ai=a->i,*aj=a->j;
350:   PetscInt           *bi,*bj,*ajtmp;
351:   PetscInt           *bdiag,row,nnz,nzi,reallocs=0,nzbd,*im;
352:   PetscReal          f;
353:   PetscInt           nlnk,*lnk,k,**bi_ptr;
354:   PetscFreeSpaceList free_space=NULL,current_space=NULL;
355:   PetscBT            lnkbt;
356:   PetscBool          missing;

359:   MatMissingDiagonal(A,&missing,&i);

362:   ISInvertPermutation(iscol,PETSC_DECIDE,&isicol);
363:   ISGetIndices(isrow,&r);
364:   ISGetIndices(isicol,&ic);

366:   /* get new row and diagonal pointers, must be allocated separately because they will be given to the Mat_SeqAIJ and freed separately */
367:   PetscMalloc1(n+1,&bi);
368:   PetscMalloc1(n+1,&bdiag);

370:   bi[0] = bdiag[0] = 0;

372:   /* linked list for storing column indices of the active row */
373:   nlnk = n + 1;
374:   PetscLLCreate(n,n,nlnk,lnk,lnkbt);

376:   PetscMalloc2(n+1,&bi_ptr,n+1,&im);

378:   /* initial FreeSpace size is f*(ai[n]+1) */
379:   f             = info->fill;
380:   PetscFreeSpaceGet(PetscRealIntMultTruncate(f,ai[n]+1),&free_space);
381:   current_space = free_space;

383:   for (i=0; i<n; i++) {
384:     /* copy previous fill into linked list */
385:     nzi = 0;
386:     nnz = ai[r[i]+1] - ai[r[i]];
387:     ajtmp = aj + ai[r[i]];
388:     PetscLLAddPerm(nnz,ajtmp,ic,n,&nlnk,lnk,lnkbt);
389:     nzi  += nlnk;

391:     /* add pivot rows into linked list */
392:     row = lnk[n];
393:     while (row < i) {
394:       nzbd  = bdiag[row] - bi[row] + 1;   /* num of entries in the row with column index <= row */
395:       ajtmp = bi_ptr[row] + nzbd;   /* points to the entry next to the diagonal */
396:       PetscLLAddSortedLU(ajtmp,row,&nlnk,lnk,lnkbt,i,nzbd,im);
397:       nzi  += nlnk;
398:       row   = lnk[row];
399:     }
400:     bi[i+1] = bi[i] + nzi;
401:     im[i]   = nzi;

403:     /* mark bdiag */
404:     nzbd = 0;
405:     nnz  = nzi;
406:     k    = lnk[n];
407:     while (nnz-- && k < i) {
408:       nzbd++;
409:       k = lnk[k];
410:     }
411:     bdiag[i] = bi[i] + nzbd;

413:     /* if free space is not available, make more free space */
414:     if (current_space->local_remaining<nzi) {
415:       nnz  = PetscIntMultTruncate(n - i,nzi); /* estimated and max additional space needed */
416:       PetscFreeSpaceGet(nnz,&current_space);
417:       reallocs++;
418:     }

420:     /* copy data into free space, then initialize lnk */
421:     PetscLLClean(n,n,nzi,lnk,current_space->array,lnkbt);

423:     bi_ptr[i]                       = current_space->array;
424:     current_space->array           += nzi;
425:     current_space->local_used      += nzi;
426:     current_space->local_remaining -= nzi;
427:   }
428: #if defined(PETSC_USE_INFO)
429:   if (ai[n] != 0) {
430:     PetscReal af = ((PetscReal)bi[n])/((PetscReal)ai[n]);
431:     PetscInfo(A,"Reallocs %" PetscInt_FMT " Fill ratio:given %g needed %g\n",reallocs,(double)f,(double)af);
432:     PetscInfo(A,"Run with -pc_factor_fill %g or use \n",(double)af);
433:     PetscInfo(A,"PCFactorSetFill(pc,%g);\n",(double)af);
434:     PetscInfo(A,"for best performance.\n");
435:   } else {
436:     PetscInfo(A,"Empty matrix\n");
437:   }
438: #endif

440:   ISRestoreIndices(isrow,&r);
441:   ISRestoreIndices(isicol,&ic);

443:   /* destroy list of free space and other temporary array(s) */
444:   PetscMalloc1(bi[n]+1,&bj);
445:   PetscFreeSpaceContiguous(&free_space,bj);
446:   PetscLLDestroy(lnk,lnkbt);
447:   PetscFree2(bi_ptr,im);

449:   /* put together the new matrix */
450:   MatSeqBAIJSetPreallocation(B,bs,MAT_SKIP_ALLOCATION,NULL);
451:   PetscLogObjectParent((PetscObject)B,(PetscObject)isicol);
452:   b    = (Mat_SeqBAIJ*)(B)->data;

454:   b->free_a       = PETSC_TRUE;
455:   b->free_ij      = PETSC_TRUE;
456:   b->singlemalloc = PETSC_FALSE;

458:   PetscMalloc1((bi[n]+1)*bs2,&b->a);
459:   b->j             = bj;
460:   b->i             = bi;
461:   b->diag          = bdiag;
462:   b->free_diag     = PETSC_TRUE;
463:   b->ilen          = NULL;
464:   b->imax          = NULL;
465:   b->row           = isrow;
466:   b->col           = iscol;
467:   b->pivotinblocks = (info->pivotinblocks) ? PETSC_TRUE : PETSC_FALSE;

469:   PetscObjectReference((PetscObject)isrow);
470:   PetscObjectReference((PetscObject)iscol);
471:   b->icol = isicol;

473:   PetscMalloc1(bs*n+bs,&b->solve_work);
474:   PetscLogObjectMemory((PetscObject)B,(bi[n]-n)*(sizeof(PetscInt)+sizeof(PetscScalar)*bs2));

476:   b->maxnz = b->nz = bi[n];

478:   (B)->factortype            =  MAT_FACTOR_LU;
479:   (B)->info.factor_mallocs   = reallocs;
480:   (B)->info.fill_ratio_given = f;

482:   if (ai[n] != 0) {
483:     (B)->info.fill_ratio_needed = ((PetscReal)bi[n])/((PetscReal)ai[n]);
484:   } else {
485:     (B)->info.fill_ratio_needed = 0.0;
486:   }

488:   ISIdentity(isrow,&row_identity);
489:   ISIdentity(iscol,&col_identity);

491:   both_identity = (PetscBool) (row_identity && col_identity);

493:   MatSeqBAIJSetNumericFactorization_inplace(B,both_identity);
494:   return 0;
495: }