Actual source code: ks-slice.c

slepc-3.17.0 2022-03-31
Report Typos and Errors
  1: /*
  2:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  3:    SLEPc - Scalable Library for Eigenvalue Problem Computations
  4:    Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain

  6:    This file is part of SLEPc.
  7:    SLEPc is distributed under a 2-clause BSD license (see LICENSE).
  8:    - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  9: */
 10: /*
 11:    SLEPc eigensolver: "krylovschur"

 13:    Method: Krylov-Schur with spectrum slicing for symmetric eigenproblems

 15:    References:

 17:        [1] R.G. Grimes et al., "A shifted block Lanczos algorithm for
 18:            solving sparse symmetric generalized eigenproblems", SIAM J.
 19:            Matrix Anal. Appl. 15(1):228-272, 1994.

 21:        [2] C. Campos and J.E. Roman, "Spectrum slicing strategies based
 22:            on restarted Lanczos methods", Numer. Algor. 60(2):279-295,
 23:            2012.
 24: */

 26: #include <slepc/private/epsimpl.h>
 27: #include "krylovschur.h"

 29: static PetscBool  cited = PETSC_FALSE;
 30: static const char citation[] =
 31:   "@Article{slepc-slice,\n"
 32:   "   author = \"C. Campos and J. E. Roman\",\n"
 33:   "   title = \"Strategies for spectrum slicing based on restarted {Lanczos} methods\",\n"
 34:   "   journal = \"Numer. Algorithms\",\n"
 35:   "   volume = \"60\",\n"
 36:   "   number = \"2\",\n"
 37:   "   pages = \"279--295\",\n"
 38:   "   year = \"2012,\"\n"
 39:   "   doi = \"https://doi.org/10.1007/s11075-012-9564-z\"\n"
 40:   "}\n";

 42: #define SLICE_PTOL PETSC_SQRT_MACHINE_EPSILON

 44: static PetscErrorCode EPSSliceResetSR(EPS eps)
 45: {
 46:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
 47:   EPS_SR          sr=ctx->sr;
 48:   EPS_shift       s;

 50:   if (sr) {
 51:     if (ctx->npart>1) {
 52:       BVDestroy(&sr->V);
 53:       PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
 54:     }
 55:     /* Reviewing list of shifts to free memory */
 56:     s = sr->s0;
 57:     if (s) {
 58:       while (s->neighb[1]) {
 59:         s = s->neighb[1];
 60:         PetscFree(s->neighb[0]);
 61:       }
 62:       PetscFree(s);
 63:     }
 64:     PetscFree(sr);
 65:   }
 66:   ctx->sr = NULL;
 67:   PetscFunctionReturn(0);
 68: }

 70: PetscErrorCode EPSReset_KrylovSchur_Slice(EPS eps)
 71: {
 72:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

 74:   if (!ctx->global) PetscFunctionReturn(0);
 75:   /* Reset auxiliary EPS */
 76:   EPSSliceResetSR(ctx->eps);
 77:   EPSReset(ctx->eps);
 78:   EPSSliceResetSR(eps);
 79:   PetscFree(ctx->inertias);
 80:   PetscFree(ctx->shifts);
 81:   PetscFunctionReturn(0);
 82: }

 84: PetscErrorCode EPSDestroy_KrylovSchur_Slice(EPS eps)
 85: {
 86:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

 88:   if (!ctx->global) PetscFunctionReturn(0);
 89:   /* Destroy auxiliary EPS */
 90:   EPSReset_KrylovSchur_Slice(eps);
 91:   EPSDestroy(&ctx->eps);
 92:   if (ctx->npart>1) {
 93:     PetscSubcommDestroy(&ctx->subc);
 94:     if (ctx->commset) {
 95:       MPI_Comm_free(&ctx->commrank);
 96:       ctx->commset = PETSC_FALSE;
 97:     }
 98:     ISDestroy(&ctx->isrow);
 99:     ISDestroy(&ctx->iscol);
100:     MatDestroyMatrices(1,&ctx->submata);
101:     MatDestroyMatrices(1,&ctx->submatb);
102:   }
103:   PetscFree(ctx->subintervals);
104:   PetscFree(ctx->nconv_loc);
105:   PetscFunctionReturn(0);
106: }

108: /*
109:   EPSSliceAllocateSolution - Allocate memory storage for common variables such
110:   as eigenvalues and eigenvectors. The argument extra is used for methods
111:   that require a working basis slightly larger than ncv.
112: */
113: static PetscErrorCode EPSSliceAllocateSolution(EPS eps,PetscInt extra)
114: {
115:   EPS_KRYLOVSCHUR    *ctx=(EPS_KRYLOVSCHUR*)eps->data;
116:   PetscReal          eta;
117:   PetscInt           k;
118:   PetscLogDouble     cnt;
119:   BVType             type;
120:   BVOrthogType       orthog_type;
121:   BVOrthogRefineType orthog_ref;
122:   BVOrthogBlockType  ob_type;
123:   Mat                matrix;
124:   Vec                t;
125:   EPS_SR             sr = ctx->sr;

127:   /* allocate space for eigenvalues and friends */
128:   k = PetscMax(1,sr->numEigs);
129:   PetscFree4(sr->eigr,sr->eigi,sr->errest,sr->perm);
130:   PetscMalloc4(k,&sr->eigr,k,&sr->eigi,k,&sr->errest,k,&sr->perm);
131:   cnt = 2*k*sizeof(PetscScalar) + 2*k*sizeof(PetscReal) + k*sizeof(PetscInt);
132:   PetscLogObjectMemory((PetscObject)eps,cnt);

134:   /* allocate sr->V and transfer options from eps->V */
135:   BVDestroy(&sr->V);
136:   BVCreate(PetscObjectComm((PetscObject)eps),&sr->V);
137:   PetscLogObjectParent((PetscObject)eps,(PetscObject)sr->V);
138:   if (!eps->V) EPSGetBV(eps,&eps->V);
139:   if (!((PetscObject)(eps->V))->type_name) BVSetType(sr->V,BVSVEC);
140:   else {
141:     BVGetType(eps->V,&type);
142:     BVSetType(sr->V,type);
143:   }
144:   STMatCreateVecsEmpty(eps->st,&t,NULL);
145:   BVSetSizesFromVec(sr->V,t,k);
146:   VecDestroy(&t);
147:   EPS_SetInnerProduct(eps);
148:   BVGetMatrix(eps->V,&matrix,NULL);
149:   BVSetMatrix(sr->V,matrix,PETSC_FALSE);
150:   BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
151:   BVSetOrthogonalization(sr->V,orthog_type,orthog_ref,eta,ob_type);
152:   PetscFunctionReturn(0);
153: }

155: static PetscErrorCode EPSSliceGetEPS(EPS eps)
156: {
157:   EPS_KRYLOVSCHUR    *ctx=(EPS_KRYLOVSCHUR*)eps->data,*ctx_local;
158:   BV                 V;
159:   BVType             type;
160:   PetscReal          eta;
161:   BVOrthogType       orthog_type;
162:   BVOrthogRefineType orthog_ref;
163:   BVOrthogBlockType  ob_type;
164:   PetscInt           i;
165:   PetscReal          h,a,b;
166:   PetscRandom        rand;
167:   EPS_SR             sr=ctx->sr;

169:   if (!ctx->eps) EPSKrylovSchurGetChildEPS(eps,&ctx->eps);

171:   /* Determine subintervals */
172:   if (ctx->npart==1) {
173:     a = eps->inta; b = eps->intb;
174:   } else {
175:     if (!ctx->subintset) { /* uniform distribution if no set by user */
177:       h = (eps->intb-eps->inta)/ctx->npart;
178:       a = eps->inta+ctx->subc->color*h;
179:       b = (ctx->subc->color==ctx->npart-1)?eps->intb:eps->inta+(ctx->subc->color+1)*h;
180:       PetscFree(ctx->subintervals);
181:       PetscMalloc1(ctx->npart+1,&ctx->subintervals);
182:       for (i=0;i<ctx->npart;i++) ctx->subintervals[i] = eps->inta+h*i;
183:       ctx->subintervals[ctx->npart] = eps->intb;
184:     } else {
185:       a = ctx->subintervals[ctx->subc->color];
186:       b = ctx->subintervals[ctx->subc->color+1];
187:     }
188:   }
189:   EPSSetInterval(ctx->eps,a,b);
190:   EPSSetConvergenceTest(ctx->eps,eps->conv);
191:   EPSSetDimensions(ctx->eps,ctx->nev,ctx->ncv,ctx->mpd);
192:   EPSKrylovSchurSetLocking(ctx->eps,ctx->lock);

194:   ctx_local = (EPS_KRYLOVSCHUR*)ctx->eps->data;
195:   ctx_local->detect = ctx->detect;

197:   /* transfer options from eps->V */
198:   EPSGetBV(ctx->eps,&V);
199:   BVGetRandomContext(V,&rand);  /* make sure the random context is available when duplicating */
200:   if (!eps->V) EPSGetBV(eps,&eps->V);
201:   if (!((PetscObject)(eps->V))->type_name) BVSetType(V,BVSVEC);
202:   else {
203:     BVGetType(eps->V,&type);
204:     BVSetType(V,type);
205:   }
206:   BVGetOrthogonalization(eps->V,&orthog_type,&orthog_ref,&eta,&ob_type);
207:   BVSetOrthogonalization(V,orthog_type,orthog_ref,eta,ob_type);

209:   ctx->eps->which = eps->which;
210:   ctx->eps->max_it = eps->max_it;
211:   ctx->eps->tol = eps->tol;
212:   ctx->eps->purify = eps->purify;
213:   if (eps->tol==PETSC_DEFAULT) eps->tol = SLEPC_DEFAULT_TOL;
214:   EPSSetProblemType(ctx->eps,eps->problem_type);
215:   EPSSetUp(ctx->eps);
216:   ctx->eps->nconv = 0;
217:   ctx->eps->its   = 0;
218:   for (i=0;i<ctx->eps->ncv;i++) {
219:     ctx->eps->eigr[i]   = 0.0;
220:     ctx->eps->eigi[i]   = 0.0;
221:     ctx->eps->errest[i] = 0.0;
222:   }
223:   PetscFunctionReturn(0);
224: }

226: static PetscErrorCode EPSSliceGetInertia(EPS eps,PetscReal shift,PetscInt *inertia,PetscInt *zeros)
227: {
228:   KSP            ksp,kspr;
229:   PC             pc;
230:   Mat            F;
231:   PetscReal      nzshift=shift;
232:   PetscBool      flg;

234:   if (shift >= PETSC_MAX_REAL) { /* Right-open interval */
235:     if (inertia) *inertia = eps->n;
236:   } else if (shift <= PETSC_MIN_REAL) {
237:     if (inertia) *inertia = 0;
238:     if (zeros) *zeros = 0;
239:   } else {
240:     /* If the shift is zero, perturb it to a very small positive value.
241:        The goal is that the nonzero pattern is the same in all cases and reuse
242:        the symbolic factorizations */
243:     nzshift = (shift==0.0)? 10.0/PETSC_MAX_REAL: shift;
244:     STSetShift(eps->st,nzshift);
245:     STGetKSP(eps->st,&ksp);
246:     KSPGetPC(ksp,&pc);
247:     PetscObjectTypeCompare((PetscObject)pc,PCREDUNDANT,&flg);
248:     if (flg) {
249:       PCRedundantGetKSP(pc,&kspr);
250:       KSPGetPC(kspr,&pc);
251:     }
252:     PCFactorGetMatrix(pc,&F);
253:     MatGetInertia(F,inertia,zeros,NULL);
254:   }
255:   if (inertia) PetscInfo(eps,"Computed inertia at shift %g: %" PetscInt_FMT "\n",(double)nzshift,*inertia);
256:   PetscFunctionReturn(0);
257: }

259: /*
260:    Dummy backtransform operation
261:  */
262: static PetscErrorCode EPSBackTransform_Skip(EPS eps)
263: {
264:   PetscFunctionReturn(0);
265: }

267: PetscErrorCode EPSSetUp_KrylovSchur_Slice(EPS eps)
268: {
269:   EPS_KRYLOVSCHUR *ctx = (EPS_KRYLOVSCHUR*)eps->data,*ctx_glob;
270:   EPS_SR          sr,sr_loc,sr_glob;
271:   PetscInt        nEigs,dssz=1,i,zeros=0,off=0,method,hiteig=0;
272:   PetscMPIInt     nproc,rank=0,aux;
273:   PetscReal       r;
274:   MPI_Request     req;
275:   Mat             A,B=NULL;
276:   DSParallelType  ptype;
277:   MPI_Comm        child;

279:   if (ctx->global) {
280:     EPSCheckHermitianDefiniteCondition(eps,PETSC_TRUE," with spectrum slicing");
281:     EPSCheckSinvertCayleyCondition(eps,PETSC_TRUE," with spectrum slicing");
285:     EPSCheckUnsupportedCondition(eps,EPS_FEATURE_ARBITRARY | EPS_FEATURE_REGION | EPS_FEATURE_STOPPING,PETSC_TRUE," with spectrum slicing");
286:     EPSCheckIgnoredCondition(eps,EPS_FEATURE_BALANCE,PETSC_TRUE," with spectrum slicing");
287:     if (eps->tol==PETSC_DEFAULT) {
288:  #if defined(PETSC_USE_REAL_SINGLE)
289:       eps->tol = SLEPC_DEFAULT_TOL;
290: #else
291:       /* use tighter tolerance */
292:       eps->tol = SLEPC_DEFAULT_TOL*1e-2;
293: #endif
294:     }
295:     if (eps->max_it==PETSC_DEFAULT) eps->max_it = 100;
296:     if (ctx->nev==1) ctx->nev = PetscMin(40,eps->n);  /* nev not set, use default value */
298:   }
299:   eps->ops->backtransform = EPSBackTransform_Skip;

301:   /* create spectrum slicing context and initialize it */
302:   EPSSliceResetSR(eps);
303:   PetscNewLog(eps,&sr);
304:   ctx->sr = sr;
305:   sr->itsKs = 0;
306:   sr->nleap = 0;
307:   sr->nMAXCompl = eps->nev/4;
308:   sr->iterCompl = eps->max_it/4;
309:   sr->sPres = NULL;
310:   sr->nS = 0;

312:   if (ctx->npart==1 || ctx->global) {
313:     /* check presence of ends and finding direction */
314:     if ((eps->inta > PETSC_MIN_REAL && !(ctx->subintervals && ctx->subintervals[0]==ctx->subintervals[1])) || eps->intb >= PETSC_MAX_REAL) {
315:       sr->int0 = eps->inta;
316:       sr->int1 = eps->intb;
317:       sr->dir = 1;
318:       if (eps->intb >= PETSC_MAX_REAL) { /* Right-open interval */
319:         sr->hasEnd = PETSC_FALSE;
320:       } else sr->hasEnd = PETSC_TRUE;
321:     } else {
322:       sr->int0 = eps->intb;
323:       sr->int1 = eps->inta;
324:       sr->dir = -1;
325:       sr->hasEnd = PetscNot(eps->inta <= PETSC_MIN_REAL);
326:     }
327:   }
328:   if (ctx->global) {
329:     EPSSetDimensions_Default(eps,ctx->nev,&ctx->ncv,&ctx->mpd);
330:     /* create subintervals and initialize auxiliary eps for slicing runs */
331:     EPSKrylovSchurGetChildEPS(eps,&ctx->eps);
332:     /* prevent computation of factorization in global eps */
333:     STSetTransform(eps->st,PETSC_FALSE);
334:     EPSSliceGetEPS(eps);
335:     sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
336:     if (ctx->npart>1) {
337:       PetscSubcommGetChild(ctx->subc,&child);
338:       if ((sr->dir>0&&ctx->subc->color==0)||(sr->dir<0&&ctx->subc->color==ctx->npart-1)) sr->inertia0 = sr_loc->inertia0;
339:       MPI_Comm_rank(child,&rank);
340:       if (!rank) MPI_Bcast(&sr->inertia0,1,MPIU_INT,(sr->dir>0)?0:ctx->npart-1,ctx->commrank);
341:       MPI_Bcast(&sr->inertia0,1,MPIU_INT,0,child);
342:       PetscFree(ctx->nconv_loc);
343:       PetscMalloc1(ctx->npart,&ctx->nconv_loc);
344:       MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
345:       if (sr->dir<0) off = 1;
346:       if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
347:         PetscMPIIntCast(sr_loc->numEigs,&aux);
348:         MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
349:         MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
350:       } else {
351:         MPI_Comm_rank(child,&rank);
352:         if (!rank) {
353:           PetscMPIIntCast(sr_loc->numEigs,&aux);
354:           MPI_Allgather(&aux,1,MPI_INT,ctx->nconv_loc,1,MPI_INT,ctx->commrank);
355:           MPI_Allgather(sr_loc->dir==sr->dir?&sr_loc->int0:&sr_loc->int1,1,MPIU_REAL,ctx->subintervals+off,1,MPIU_REAL,ctx->commrank);
356:         }
357:         PetscMPIIntCast(ctx->npart,&aux);
358:         MPI_Bcast(ctx->nconv_loc,aux,MPI_INT,0,child);
359:         MPI_Bcast(ctx->subintervals+off,aux,MPIU_REAL,0,child);
360:       }
361:       nEigs = 0;
362:       for (i=0;i<ctx->npart;i++) nEigs += ctx->nconv_loc[i];
363:     } else {
364:       nEigs = sr_loc->numEigs;
365:       sr->inertia0 = sr_loc->inertia0;
366:       sr->dir = sr_loc->dir;
367:     }
368:     sr->inertia1 = sr->inertia0+sr->dir*nEigs;
369:     sr->numEigs = nEigs;
370:     eps->nev = nEigs;
371:     eps->ncv = nEigs;
372:     eps->mpd = nEigs;
373:   } else {
374:     ctx_glob = (EPS_KRYLOVSCHUR*)ctx->eps->data;
375:     sr_glob = ctx_glob->sr;
376:     if (ctx->npart>1) {
377:       sr->dir = sr_glob->dir;
378:       sr->int0 = (sr->dir==1)?eps->inta:eps->intb;
379:       sr->int1 = (sr->dir==1)?eps->intb:eps->inta;
380:       if ((sr->dir>0&&ctx->subc->color==ctx->npart-1)||(sr->dir<0&&ctx->subc->color==0)) sr->hasEnd = sr_glob->hasEnd;
381:       else sr->hasEnd = PETSC_TRUE;
382:     }
383:     /* sets first shift */
384:     STSetShift(eps->st,(sr->int0==0.0)?10.0/PETSC_MAX_REAL:sr->int0);
385:     STSetUp(eps->st);

387:     /* compute inertia0 */
388:     EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,ctx->detect?&zeros:NULL);
389:     /* undocumented option to control what to do when an eigenvalue is found:
390:        - error out if it's the endpoint of the user-provided interval (or sub-interval)
391:        - if it's an endpoint computed internally:
392:           + if hiteig=0 error out
393:           + else if hiteig=1 the subgroup that hit the eigenvalue does nothing
394:           + otherwise the subgroup that hit the eigenvalue perturbs the shift and recomputes inertia
395:     */
396:     PetscOptionsGetInt(NULL,NULL,"-eps_krylovschur_hiteigenvalue",&hiteig,NULL);
397:     if (zeros) { /* error in factorization */
400:       if (hiteig==1) { /* idle subgroup */
401:         sr->inertia0 = -1;
402:       } else { /* perturb shift */
403:         sr->int0 *= (1.0+SLICE_PTOL);
404:         EPSSliceGetInertia(eps,sr->int0,&sr->inertia0,&zeros);
406:       }
407:     }
408:     if (ctx->npart>1) {
409:       PetscSubcommGetChild(ctx->subc,&child);
410:       /* inertia1 is received from neighbour */
411:       MPI_Comm_rank(child,&rank);
412:       if (!rank) {
413:         if (sr->inertia0!=-1 && ((sr->dir>0 && ctx->subc->color>0) || (sr->dir<0 && ctx->subc->color<ctx->npart-1))) { /* send inertia0 to neighbour0 */
414:           MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
415:           MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
416:         }
417:         if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)|| (sr->dir<0 && ctx->subc->color>0)) { /* receive inertia1 from neighbour1 */
418:           MPI_Recv(&(sr->inertia1),1,MPIU_INT,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
419:           MPI_Recv(&(sr->int1),1,MPIU_REAL,ctx->subc->color+sr->dir,0,ctx->commrank,MPI_STATUS_IGNORE);
420:         }
421:         if (sr->inertia0==-1 && !(sr->dir>0 && ctx->subc->color==ctx->npart-1) && !(sr->dir<0 && ctx->subc->color==0)) {
422:           sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
423:           MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
424:           MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
425:         }
426:       }
427:       if ((sr->dir>0 && ctx->subc->color<ctx->npart-1)||(sr->dir<0 && ctx->subc->color>0)) {
428:         MPI_Bcast(&sr->inertia1,1,MPIU_INT,0,child);
429:         MPI_Bcast(&sr->int1,1,MPIU_REAL,0,child);
430:       } else sr_glob->inertia1 = sr->inertia1;
431:     }

433:     /* last process in eps comm computes inertia1 */
434:     if (ctx->npart==1 || ((sr->dir>0 && ctx->subc->color==ctx->npart-1) || (sr->dir<0 && ctx->subc->color==0))) {
435:       EPSSliceGetInertia(eps,sr->int1,&sr->inertia1,ctx->detect?&zeros:NULL);
437:       if (!rank && sr->inertia0==-1) {
438:         sr->inertia0 = sr->inertia1; sr->int0 = sr->int1;
439:         MPI_Isend(&(sr->inertia0),1,MPIU_INT,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
440:         MPI_Isend(&(sr->int0),1,MPIU_REAL,ctx->subc->color-sr->dir,0,ctx->commrank,&req);
441:       }
442:       if (sr->hasEnd) {
443:         sr->dir = -sr->dir; r = sr->int0; sr->int0 = sr->int1; sr->int1 = r;
444:         i = sr->inertia0; sr->inertia0 = sr->inertia1; sr->inertia1 = i;
445:       }
446:     }

448:     /* number of eigenvalues in interval */
449:     sr->numEigs = (sr->dir)*(sr->inertia1 - sr->inertia0);
450:     if (ctx->npart>1) {
451:       /* memory allocate for subinterval eigenpairs */
452:       EPSSliceAllocateSolution(eps,1);
453:     }
454:     dssz = eps->ncv+1;
455:     DSGetParallel(ctx->eps->ds,&ptype);
456:     DSSetParallel(eps->ds,ptype);
457:     DSGetMethod(ctx->eps->ds,&method);
458:     DSSetMethod(eps->ds,method);
459:   }
460:   DSSetType(eps->ds,DSHEP);
461:   DSSetCompact(eps->ds,PETSC_TRUE);
462:   DSAllocate(eps->ds,dssz);
463:   /* keep state of subcomm matrices to check that the user does not modify them */
464:   EPSGetOperators(eps,&A,&B);
465:   PetscObjectStateGet((PetscObject)A,&ctx->Astate);
466:   PetscObjectGetId((PetscObject)A,&ctx->Aid);
467:   if (B) {
468:     PetscObjectStateGet((PetscObject)B,&ctx->Bstate);
469:     PetscObjectGetId((PetscObject)B,&ctx->Bid);
470:   } else {
471:     ctx->Bstate=0;
472:     ctx->Bid=0;
473:   }
474:   PetscFunctionReturn(0);
475: }

477: static PetscErrorCode EPSSliceGatherEigenVectors(EPS eps)
478: {
479:   Vec             v,vg,v_loc;
480:   IS              is1,is2;
481:   VecScatter      vec_sc;
482:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
483:   PetscInt        nloc,m0,n0,i,si,idx,*idx1,*idx2,j;
484:   PetscScalar     *array;
485:   EPS_SR          sr_loc;
486:   BV              V_loc;

488:   sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;
489:   V_loc = sr_loc->V;

491:   /* Gather parallel eigenvectors */
492:   BVGetColumn(eps->V,0,&v);
493:   VecGetOwnershipRange(v,&n0,&m0);
494:   BVRestoreColumn(eps->V,0,&v);
495:   BVGetColumn(ctx->eps->V,0,&v);
496:   VecGetLocalSize(v,&nloc);
497:   BVRestoreColumn(ctx->eps->V,0,&v);
498:   PetscMalloc2(m0-n0,&idx1,m0-n0,&idx2);
499:   VecCreateMPI(PetscObjectComm((PetscObject)eps),nloc,PETSC_DECIDE,&vg);
500:   idx = -1;
501:   for (si=0;si<ctx->npart;si++) {
502:     j = 0;
503:     for (i=n0;i<m0;i++) {
504:       idx1[j]   = i;
505:       idx2[j++] = i+eps->n*si;
506:     }
507:     ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx1,PETSC_COPY_VALUES,&is1);
508:     ISCreateGeneral(PetscObjectComm((PetscObject)eps),(m0-n0),idx2,PETSC_COPY_VALUES,&is2);
509:     BVGetColumn(eps->V,0,&v);
510:     VecScatterCreate(v,is1,vg,is2,&vec_sc);
511:     BVRestoreColumn(eps->V,0,&v);
512:     ISDestroy(&is1);
513:     ISDestroy(&is2);
514:     for (i=0;i<ctx->nconv_loc[si];i++) {
515:       BVGetColumn(eps->V,++idx,&v);
516:       if (ctx->subc->color==si) {
517:         BVGetColumn(V_loc,i,&v_loc);
518:         VecGetArray(v_loc,&array);
519:         VecPlaceArray(vg,array);
520:       }
521:       VecScatterBegin(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
522:       VecScatterEnd(vec_sc,vg,v,INSERT_VALUES,SCATTER_REVERSE);
523:       if (ctx->subc->color==si) {
524:         VecResetArray(vg);
525:         VecRestoreArray(v_loc,&array);
526:         BVRestoreColumn(V_loc,i,&v_loc);
527:       }
528:       BVRestoreColumn(eps->V,idx,&v);
529:     }
530:     VecScatterDestroy(&vec_sc);
531:   }
532:   PetscFree2(idx1,idx2);
533:   VecDestroy(&vg);
534:   PetscFunctionReturn(0);
535: }

537: /*
538:   EPSComputeVectors_Slice - Recover Eigenvectors from subcomunicators
539:  */
540: PetscErrorCode EPSComputeVectors_Slice(EPS eps)
541: {
542:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

544:   if (ctx->global && ctx->npart>1) {
545:     EPSComputeVectors(ctx->eps);
546:     EPSSliceGatherEigenVectors(eps);
547:   }
548:   PetscFunctionReturn(0);
549: }

551: #define SWAP(a,b,t) {t=a;a=b;b=t;}

553: static PetscErrorCode EPSSliceGetInertias(EPS eps,PetscInt *n,PetscReal **shifts,PetscInt **inertias)
554: {
555:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
556:   PetscInt        i=0,j,tmpi;
557:   PetscReal       v,tmpr;
558:   EPS_shift       s;

562:   if (!ctx->sr->s0) {  /* EPSSolve not called yet */
563:     *n = 2;
564:   } else {
565:     *n = 1;
566:     s = ctx->sr->s0;
567:     while (s) {
568:       (*n)++;
569:       s = s->neighb[1];
570:     }
571:   }
572:   PetscMalloc1(*n,shifts);
573:   PetscMalloc1(*n,inertias);
574:   if (!ctx->sr->s0) {  /* EPSSolve not called yet */
575:     (*shifts)[0]   = ctx->sr->int0;
576:     (*shifts)[1]   = ctx->sr->int1;
577:     (*inertias)[0] = ctx->sr->inertia0;
578:     (*inertias)[1] = ctx->sr->inertia1;
579:   } else {
580:     s = ctx->sr->s0;
581:     while (s) {
582:       (*shifts)[i]     = s->value;
583:       (*inertias)[i++] = s->inertia;
584:       s = s->neighb[1];
585:     }
586:     (*shifts)[i]   = ctx->sr->int1;
587:     (*inertias)[i] = ctx->sr->inertia1;
588:   }
589:   /* remove possible duplicate in last position */
590:   if ((*shifts)[(*n)-1]==(*shifts)[(*n)-2]) (*n)--;
591:   /* sort result */
592:   for (i=0;i<*n;i++) {
593:     v = (*shifts)[i];
594:     for (j=i+1;j<*n;j++) {
595:       if (v > (*shifts)[j]) {
596:         SWAP((*shifts)[i],(*shifts)[j],tmpr);
597:         SWAP((*inertias)[i],(*inertias)[j],tmpi);
598:         v = (*shifts)[i];
599:       }
600:     }
601:   }
602:   PetscFunctionReturn(0);
603: }

605: static PetscErrorCode EPSSliceGatherSolution(EPS eps)
606: {
607:   PetscMPIInt     rank,nproc,*disp,*ns_loc,aux;
608:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
609:   PetscInt        i,idx,j,*perm_loc,off=0,*inertias_loc,ns;
610:   PetscScalar     *eigr_loc;
611:   EPS_SR          sr_loc;
612:   PetscReal       *shifts_loc;
613:   MPI_Comm        child;

615:   eps->nconv = 0;
616:   for (i=0;i<ctx->npart;i++) eps->nconv += ctx->nconv_loc[i];
617:   sr_loc = ((EPS_KRYLOVSCHUR*)ctx->eps->data)->sr;

619:   /* Gather the shifts used and the inertias computed */
620:   EPSSliceGetInertias(ctx->eps,&ns,&shifts_loc,&inertias_loc);
621:   if (ctx->sr->dir>0 && shifts_loc[ns-1]==sr_loc->int1 && ctx->subc->color<ctx->npart-1) ns--;
622:   if (ctx->sr->dir<0 && shifts_loc[ns-1]==sr_loc->int0 && ctx->subc->color>0) {
623:     ns--;
624:     for (i=0;i<ns;i++) {
625:       inertias_loc[i] = inertias_loc[i+1];
626:       shifts_loc[i] = shifts_loc[i+1];
627:     }
628:   }
629:   PetscMalloc1(ctx->npart,&ns_loc);
630:   PetscSubcommGetChild(ctx->subc,&child);
631:   MPI_Comm_rank(child,&rank);
632:   PetscMPIIntCast(ns,&aux);
633:   if (!rank) MPI_Allgather(&aux,1,MPI_INT,ns_loc,1,MPI_INT,ctx->commrank);
634:   PetscMPIIntCast(ctx->npart,&aux);
635:   MPI_Bcast(ns_loc,aux,MPI_INT,0,child);
636:   ctx->nshifts = 0;
637:   for (i=0;i<ctx->npart;i++) ctx->nshifts += ns_loc[i];
638:   PetscFree(ctx->inertias);
639:   PetscFree(ctx->shifts);
640:   PetscMalloc1(ctx->nshifts,&ctx->inertias);
641:   PetscMalloc1(ctx->nshifts,&ctx->shifts);

643:   /* Gather eigenvalues (same ranks have fully set of eigenvalues)*/
644:   eigr_loc = sr_loc->eigr;
645:   perm_loc = sr_loc->perm;
646:   MPI_Comm_size(((PetscObject)eps)->comm,&nproc);
647:   PetscMalloc1(ctx->npart,&disp);
648:   disp[0] = 0;
649:   for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ctx->nconv_loc[i-1];
650:   if (nproc%ctx->npart==0) { /* subcommunicators with the same size */
651:     PetscMPIIntCast(sr_loc->numEigs,&aux);
652:     MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
653:     MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
654:     for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
655:     PetscMPIIntCast(ns,&aux);
656:     MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
657:     MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
658:     MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
659:   } else { /* subcommunicators with different size */
660:     if (!rank) {
661:       PetscMPIIntCast(sr_loc->numEigs,&aux);
662:       MPI_Allgatherv(eigr_loc,aux,MPIU_SCALAR,eps->eigr,ctx->nconv_loc,disp,MPIU_SCALAR,ctx->commrank); /* eigenvalues */
663:       MPI_Allgatherv(perm_loc,aux,MPIU_INT,eps->perm,ctx->nconv_loc,disp,MPIU_INT,ctx->commrank); /* perm */
664:       for (i=1;i<ctx->npart;i++) disp[i] = disp[i-1]+ns_loc[i-1];
665:       PetscMPIIntCast(ns,&aux);
666:       MPI_Allgatherv(shifts_loc,aux,MPIU_REAL,ctx->shifts,ns_loc,disp,MPIU_REAL,ctx->commrank); /* shifts */
667:       MPI_Allgatherv(inertias_loc,aux,MPIU_INT,ctx->inertias,ns_loc,disp,MPIU_INT,ctx->commrank); /* inertias */
668:       MPIU_Allreduce(&sr_loc->itsKs,&eps->its,1,MPIU_INT,MPI_SUM,ctx->commrank);
669:     }
670:     PetscMPIIntCast(eps->nconv,&aux);
671:     MPI_Bcast(eps->eigr,aux,MPIU_SCALAR,0,child);
672:     MPI_Bcast(eps->perm,aux,MPIU_INT,0,child);
673:     MPI_Bcast(ctx->shifts,ctx->nshifts,MPIU_REAL,0,child);
674:     PetscMPIIntCast(ctx->nshifts,&aux);
675:     MPI_Bcast(ctx->inertias,aux,MPIU_INT,0,child);
676:     MPI_Bcast(&eps->its,1,MPIU_INT,0,child);
677:   }
678:   /* Update global array eps->perm */
679:   idx = ctx->nconv_loc[0];
680:   for (i=1;i<ctx->npart;i++) {
681:     off += ctx->nconv_loc[i-1];
682:     for (j=0;j<ctx->nconv_loc[i];j++) eps->perm[idx++] += off;
683:   }

685:   /* Gather parallel eigenvectors */
686:   PetscFree(ns_loc);
687:   PetscFree(disp);
688:   PetscFree(shifts_loc);
689:   PetscFree(inertias_loc);
690:   PetscFunctionReturn(0);
691: }

693: /*
694:    Fills the fields of a shift structure
695: */
696: static PetscErrorCode EPSCreateShift(EPS eps,PetscReal val,EPS_shift neighb0,EPS_shift neighb1)
697: {
698:   EPS_shift       s,*pending2;
699:   PetscInt        i;
700:   EPS_SR          sr;
701:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

703:   sr = ctx->sr;
704:   if ((neighb0 && val==neighb0->value) || (neighb1 && val==neighb1->value)) {
705:     sr->nPend++;
706:     PetscFunctionReturn(0);
707:   }
708:   PetscNewLog(eps,&s);
709:   s->value = val;
710:   s->neighb[0] = neighb0;
711:   if (neighb0) neighb0->neighb[1] = s;
712:   s->neighb[1] = neighb1;
713:   if (neighb1) neighb1->neighb[0] = s;
714:   s->comp[0] = PETSC_FALSE;
715:   s->comp[1] = PETSC_FALSE;
716:   s->index = -1;
717:   s->neigs = 0;
718:   s->nconv[0] = s->nconv[1] = 0;
719:   s->nsch[0] = s->nsch[1]=0;
720:   /* Inserts in the stack of pending shifts */
721:   /* If needed, the array is resized */
722:   if (sr->nPend >= sr->maxPend) {
723:     sr->maxPend *= 2;
724:     PetscMalloc1(sr->maxPend,&pending2);
725:     PetscLogObjectMemory((PetscObject)eps,sr->maxPend*sizeof(EPS_shift*));
726:     for (i=0;i<sr->nPend;i++) pending2[i] = sr->pending[i];
727:     PetscFree(sr->pending);
728:     sr->pending = pending2;
729:   }
730:   sr->pending[sr->nPend++]=s;
731:   PetscFunctionReturn(0);
732: }

734: /* Prepare for Rational Krylov update */
735: static PetscErrorCode EPSPrepareRational(EPS eps)
736: {
737:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
738:   PetscInt        dir,i,k,ld,nv;
739:   PetscScalar     *A;
740:   EPS_SR          sr = ctx->sr;
741:   Vec             v;

743:   DSGetLeadingDimension(eps->ds,&ld);
744:   dir = (sr->sPres->neighb[0] == sr->sPrev)?1:-1;
745:   dir*=sr->dir;
746:   k = 0;
747:   for (i=0;i<sr->nS;i++) {
748:     if (dir*PetscRealPart(sr->S[i])>0.0) {
749:       sr->S[k] = sr->S[i];
750:       sr->S[sr->nS+k] = sr->S[sr->nS+i];
751:       BVGetColumn(sr->Vnext,k,&v);
752:       BVCopyVec(eps->V,eps->nconv+i,v);
753:       BVRestoreColumn(sr->Vnext,k,&v);
754:       k++;
755:       if (k>=sr->nS/2)break;
756:     }
757:   }
758:   /* Copy to DS */
759:   DSGetArray(eps->ds,DS_MAT_A,&A);
760:   PetscArrayzero(A,ld*ld);
761:   for (i=0;i<k;i++) {
762:     A[i*(1+ld)] = sr->S[i];
763:     A[k+i*ld] = sr->S[sr->nS+i];
764:   }
765:   sr->nS = k;
766:   DSRestoreArray(eps->ds,DS_MAT_A,&A);
767:   DSGetDimensions(eps->ds,&nv,NULL,NULL,NULL);
768:   DSSetDimensions(eps->ds,nv,0,k);
769:   /* Append u to V */
770:   BVGetColumn(sr->Vnext,sr->nS,&v);
771:   BVCopyVec(eps->V,sr->nv,v);
772:   BVRestoreColumn(sr->Vnext,sr->nS,&v);
773:   PetscFunctionReturn(0);
774: }

776: /* Provides next shift to be computed */
777: static PetscErrorCode EPSExtractShift(EPS eps)
778: {
779:   PetscInt        iner,zeros=0;
780:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
781:   EPS_SR          sr;
782:   PetscReal       newShift,diam,ptol;
783:   EPS_shift       sPres;

785:   sr = ctx->sr;
786:   if (sr->nPend > 0) {
787:     if (sr->sPres==sr->pending[sr->nPend-1]) {
788:       eps->reason = EPS_CONVERGED_ITERATING;
789:       eps->its = 0;
790:       sr->nPend--;
791:       sr->sPres->rep = PETSC_TRUE;
792:       PetscFunctionReturn(0);
793:     }
794:     sr->sPrev = sr->sPres;
795:     sr->sPres = sr->pending[--sr->nPend];
796:     sPres = sr->sPres;
797:     EPSSliceGetInertia(eps,sPres->value,&iner,ctx->detect?&zeros:NULL);
798:     if (zeros) {
799:       diam = PetscMin(PetscAbsReal(sPres->neighb[0]->value-sPres->value),PetscAbsReal(sPres->neighb[1]->value-sPres->value));
800:       ptol = PetscMin(SLICE_PTOL,diam/2);
801:       newShift = sPres->value*(1.0+ptol);
802:       if (sr->dir*(sPres->neighb[0] && newShift-sPres->neighb[0]->value) < 0) newShift = (sPres->value+sPres->neighb[0]->value)/2;
803:       else if (sPres->neighb[1] && sr->dir*(sPres->neighb[1]->value-newShift) < 0) newShift = (sPres->value+sPres->neighb[1]->value)/2;
804:       EPSSliceGetInertia(eps,newShift,&iner,&zeros);
806:       sPres->value = newShift;
807:     }
808:     sr->sPres->inertia = iner;
809:     eps->target = sr->sPres->value;
810:     eps->reason = EPS_CONVERGED_ITERATING;
811:     eps->its = 0;
812:   } else sr->sPres = NULL;
813:   PetscFunctionReturn(0);
814: }

816: /*
817:    Symmetric KrylovSchur adapted to spectrum slicing:
818:    Allows searching an specific amount of eigenvalues in the subintervals left and right.
819:    Returns whether the search has succeeded
820: */
821: static PetscErrorCode EPSKrylovSchur_Slice(EPS eps)
822: {
823:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
824:   PetscInt        i,k,l,ld,nv,*iwork,j,count0,count1,iterCompl=0,n0,n1;
825:   Mat             U,Op;
826:   PetscScalar     *Q,*A;
827:   PetscReal       *a,*b,beta,lambda;
828:   EPS_shift       sPres;
829:   PetscBool       breakdown,complIterating,sch0,sch1;
830:   EPS_SR          sr = ctx->sr;

832:   /* Spectrum slicing data */
833:   sPres = sr->sPres;
834:   complIterating =PETSC_FALSE;
835:   sch1 = sch0 = PETSC_TRUE;
836:   DSGetLeadingDimension(eps->ds,&ld);
837:   PetscMalloc1(2*ld,&iwork);
838:   count0=0;count1=0; /* Found on both sides */
839:   if (!sPres->rep && sr->nS > 0 && (sPres->neighb[0] == sr->sPrev || sPres->neighb[1] == sr->sPrev)) {
840:     /* Rational Krylov */
841:     DSTranslateRKS(eps->ds,sr->sPrev->value-sPres->value);
842:     DSGetDimensions(eps->ds,NULL,NULL,&l,NULL);
843:     DSSetDimensions(eps->ds,l+1,0,0);
844:     BVSetActiveColumns(eps->V,0,l+1);
845:     DSGetMat(eps->ds,DS_MAT_Q,&U);
846:     BVMultInPlace(eps->V,U,0,l+1);
847:     MatDestroy(&U);
848:   } else {
849:     /* Get the starting Lanczos vector */
850:     EPSGetStartVector(eps,0,NULL);
851:     l = 0;
852:   }
853:   /* Restart loop */
854:   while (eps->reason == EPS_CONVERGED_ITERATING) {
855:     eps->its++; sr->itsKs++;
856:     /* Compute an nv-step Lanczos factorization */
857:     nv = PetscMin(eps->nconv+eps->mpd,eps->ncv);
858:     DSGetArrayReal(eps->ds,DS_MAT_T,&a);
859:     b = a + ld;
860:     STGetOperator(eps->st,&Op);
861:     BVMatLanczos(eps->V,Op,a,b,eps->nconv+l,&nv,&breakdown);
862:     STRestoreOperator(eps->st,&Op);
863:     sr->nv = nv;
864:     beta = b[nv-1];
865:     DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
866:     DSSetDimensions(eps->ds,nv,eps->nconv,eps->nconv+l);
867:     if (l==0) DSSetState(eps->ds,DS_STATE_INTERMEDIATE);
868:     else DSSetState(eps->ds,DS_STATE_RAW);
869:     BVSetActiveColumns(eps->V,eps->nconv,nv);

871:     /* Solve projected problem and compute residual norm estimates */
872:     if (eps->its == 1 && l > 0) {/* After rational update */
873:       DSGetArray(eps->ds,DS_MAT_A,&A);
874:       DSGetArrayReal(eps->ds,DS_MAT_T,&a);
875:       b = a + ld;
876:       k = eps->nconv+l;
877:       A[k*ld+k-1] = A[(k-1)*ld+k];
878:       A[k*ld+k] = a[k];
879:       for (j=k+1; j< nv; j++) {
880:         A[j*ld+j] = a[j];
881:         A[j*ld+j-1] = b[j-1] ;
882:         A[(j-1)*ld+j] = b[j-1];
883:       }
884:       DSRestoreArray(eps->ds,DS_MAT_A,&A);
885:       DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
886:       DSSolve(eps->ds,eps->eigr,NULL);
887:       DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
888:       DSSetCompact(eps->ds,PETSC_TRUE);
889:     } else { /* Restart */
890:       DSSolve(eps->ds,eps->eigr,NULL);
891:       DSSort(eps->ds,eps->eigr,NULL,NULL,NULL,NULL);
892:     }
893:     DSSynchronize(eps->ds,eps->eigr,NULL);

895:     /* Residual */
896:     EPSKrylovConvergence(eps,PETSC_TRUE,eps->nconv,nv-eps->nconv,beta,0.0,1.0,&k);
897:     /* Checking values obtained for completing */
898:     for (i=0;i<k;i++) {
899:       sr->back[i]=eps->eigr[i];
900:     }
901:     STBackTransform(eps->st,k,sr->back,eps->eigi);
902:     count0=count1=0;
903:     for (i=0;i<k;i++) {
904:       lambda = PetscRealPart(sr->back[i]);
905:       if (((sr->dir)*(sPres->value - lambda) > 0) && ((sr->dir)*(lambda - sPres->ext[0]) > 0)) count0++;
906:       if (((sr->dir)*(lambda - sPres->value) > 0) && ((sr->dir)*(sPres->ext[1] - lambda) > 0)) count1++;
907:     }
908:     if (k>eps->nev && eps->ncv-k<5) eps->reason = EPS_CONVERGED_TOL;
909:     else {
910:       /* Checks completion */
911:       if ((!sch0||count0 >= sPres->nsch[0]) && (!sch1 ||count1 >= sPres->nsch[1])) {
912:         eps->reason = EPS_CONVERGED_TOL;
913:       } else {
914:         if (!complIterating && eps->its >= eps->max_it) eps->reason = EPS_DIVERGED_ITS;
915:         if (complIterating) {
916:           if (--iterCompl <= 0) eps->reason = EPS_DIVERGED_ITS;
917:         } else if (k >= eps->nev) {
918:           n0 = sPres->nsch[0]-count0;
919:           n1 = sPres->nsch[1]-count1;
920:           if (sr->iterCompl>0 && ((n0>0 && n0<= sr->nMAXCompl)||(n1>0&&n1<=sr->nMAXCompl))) {
921:             /* Iterating for completion*/
922:             complIterating = PETSC_TRUE;
923:             if (n0 >sr->nMAXCompl)sch0 = PETSC_FALSE;
924:             if (n1 >sr->nMAXCompl)sch1 = PETSC_FALSE;
925:             iterCompl = sr->iterCompl;
926:           } else eps->reason = EPS_CONVERGED_TOL;
927:         }
928:       }
929:     }
930:     /* Update l */
931:     if (eps->reason == EPS_CONVERGED_ITERATING) l = PetscMax(1,(PetscInt)((nv-k)*ctx->keep));
932:     else l = nv-k;
933:     if (breakdown) l=0;
934:     if (!ctx->lock && l>0 && eps->reason == EPS_CONVERGED_ITERATING) { l += k; k = 0; } /* non-locking variant: reset no. of converged pairs */

936:     if (eps->reason == EPS_CONVERGED_ITERATING) {
937:       if (breakdown) {
938:         /* Start a new Lanczos factorization */
939:         PetscInfo(eps,"Breakdown in Krylov-Schur method (it=%" PetscInt_FMT " norm=%g)\n",eps->its,(double)beta);
940:         EPSGetStartVector(eps,k,&breakdown);
941:         if (breakdown) {
942:           eps->reason = EPS_DIVERGED_BREAKDOWN;
943:           PetscInfo(eps,"Unable to generate more start vectors\n");
944:         }
945:       } else {
946:         /* Prepare the Rayleigh quotient for restart */
947:         DSGetArrayReal(eps->ds,DS_MAT_T,&a);
948:         DSGetArray(eps->ds,DS_MAT_Q,&Q);
949:         b = a + ld;
950:         for (i=k;i<k+l;i++) {
951:           a[i] = PetscRealPart(eps->eigr[i]);
952:           b[i] = PetscRealPart(Q[nv-1+i*ld]*beta);
953:         }
954:         DSRestoreArrayReal(eps->ds,DS_MAT_T,&a);
955:         DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
956:       }
957:     }
958:     /* Update the corresponding vectors V(:,idx) = V*Q(:,idx) */
959:     DSGetMat(eps->ds,DS_MAT_Q,&U);
960:     BVMultInPlace(eps->V,U,eps->nconv,k+l);
961:     MatDestroy(&U);

963:     /* Normalize u and append it to V */
964:     if (eps->reason == EPS_CONVERGED_ITERATING && !breakdown) BVCopyColumn(eps->V,nv,k+l);
965:     eps->nconv = k;
966:     if (eps->reason != EPS_CONVERGED_ITERATING) {
967:       /* Store approximated values for next shift */
968:       DSGetArray(eps->ds,DS_MAT_Q,&Q);
969:       sr->nS = l;
970:       for (i=0;i<l;i++) {
971:         sr->S[i] = eps->eigr[i+k];/* Diagonal elements */
972:         sr->S[i+l] = Q[nv-1+(i+k)*ld]*beta; /* Out of diagonal elements */
973:       }
974:       DSRestoreArray(eps->ds,DS_MAT_Q,&Q);
975:     }
976:   }
977:   /* Check for completion */
978:   for (i=0;i< eps->nconv; i++) {
979:     if ((sr->dir)*PetscRealPart(eps->eigr[i])>0) sPres->nconv[1]++;
980:     else sPres->nconv[0]++;
981:   }
982:   sPres->comp[0] = PetscNot(count0 < sPres->nsch[0]);
983:   sPres->comp[1] = PetscNot(count1 < sPres->nsch[1]);
984:   PetscInfo(eps,"Lanczos: %" PetscInt_FMT " evals in [%g,%g]%s and %" PetscInt_FMT " evals in [%g,%g]%s\n",count0,(double)(sr->dir==1?sPres->ext[0]:sPres->value),(double)(sr->dir==1?sPres->value:sPres->ext[0]),sPres->comp[0]?"*":"",count1,(double)(sr->dir==1?sPres->value:sPres->ext[1]),(double)(sr->dir==1?sPres->ext[1]:sPres->value),sPres->comp[1]?"*":"");
986:   PetscFree(iwork);
987:   PetscFunctionReturn(0);
988: }

990: /*
991:   Obtains value of subsequent shift
992: */
993: static PetscErrorCode EPSGetNewShiftValue(EPS eps,PetscInt side,PetscReal *newS)
994: {
995:   PetscReal       lambda,d_prev;
996:   PetscInt        i,idxP;
997:   EPS_SR          sr;
998:   EPS_shift       sPres,s;
999:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

1001:   sr = ctx->sr;
1002:   sPres = sr->sPres;
1003:   if (sPres->neighb[side]) {
1004:     /* Completing a previous interval */
1005:     *newS = (sPres->value + sPres->neighb[side]->value)/2;
1006:     if (PetscAbsReal(sPres->value - *newS)/PetscAbsReal(sPres->value)<=100*PETSC_SQRT_MACHINE_EPSILON) *newS = sPres->value;
1007:   } else { /* (Only for side=1). Creating a new interval. */
1008:     if (sPres->neigs==0) {/* No value has been accepted*/
1009:       if (sPres->neighb[0]) {
1010:         /* Multiplying by 10 the previous distance */
1011:         *newS = sPres->value + 10*(sr->dir)*PetscAbsReal(sPres->value - sPres->neighb[0]->value);
1012:         sr->nleap++;
1013:         /* Stops when the interval is open and no values are found in the last 5 shifts (there might be infinite eigenvalues) */
1015:       } else { /* First shift */
1017:         /* Unaccepted values give information for next shift */
1018:         idxP=0;/* Number of values left from shift */
1019:         for (i=0;i<eps->nconv;i++) {
1020:           lambda = PetscRealPart(eps->eigr[i]);
1021:           if ((sr->dir)*(lambda - sPres->value) <0) idxP++;
1022:           else break;
1023:         }
1024:         /* Avoiding subtraction of eigenvalues (might be the same).*/
1025:         if (idxP>0) {
1026:           d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[0]))/(idxP+0.3);
1027:         } else {
1028:           d_prev = PetscAbsReal(sPres->value - PetscRealPart(eps->eigr[eps->nconv-1]))/(eps->nconv+0.3);
1029:         }
1030:         *newS = sPres->value + ((sr->dir)*d_prev*eps->nev)/2;
1031:       }
1032:     } else { /* Accepted values found */
1033:       sr->nleap = 0;
1034:       /* Average distance of values in previous subinterval */
1035:       s = sPres->neighb[0];
1036:       while (s && PetscAbs(s->inertia - sPres->inertia)==0) {
1037:         s = s->neighb[0];/* Looking for previous shifts with eigenvalues within */
1038:       }
1039:       if (s) {
1040:         d_prev = PetscAbsReal((sPres->value - s->value)/(sPres->inertia - s->inertia));
1041:       } else { /* First shift. Average distance obtained with values in this shift */
1042:         /* first shift might be too far from first wanted eigenvalue (no values found outside the interval)*/
1043:         if ((sr->dir)*(PetscRealPart(sr->eigr[0])-sPres->value)>0 && PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0]))/PetscRealPart(sr->eigr[0])) > PetscSqrtReal(eps->tol)) {
1044:           d_prev =  PetscAbsReal((PetscRealPart(sr->eigr[sr->indexEig-1]) - PetscRealPart(sr->eigr[0])))/(sPres->neigs+0.3);
1045:         } else {
1046:           d_prev = PetscAbsReal(PetscRealPart(sr->eigr[sr->indexEig-1]) - sPres->value)/(sPres->neigs+0.3);
1047:         }
1048:       }
1049:       /* Average distance is used for next shift by adding it to value on the right or to shift */
1050:       if ((sr->dir)*(PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1]) - sPres->value)>0) {
1051:         *newS = PetscRealPart(sr->eigr[sPres->index + sPres->neigs -1])+ ((sr->dir)*d_prev*(eps->nev))/2;
1052:       } else { /* Last accepted value is on the left of shift. Adding to shift */
1053:         *newS = sPres->value + ((sr->dir)*d_prev*(eps->nev))/2;
1054:       }
1055:     }
1056:     /* End of interval can not be surpassed */
1057:     if ((sr->dir)*(sr->int1 - *newS) < 0) *newS = sr->int1;
1058:   }/* of neighb[side]==null */
1059:   PetscFunctionReturn(0);
1060: }

1062: /*
1063:   Function for sorting an array of real values
1064: */
1065: static PetscErrorCode sortRealEigenvalues(PetscScalar *r,PetscInt *perm,PetscInt nr,PetscBool prev,PetscInt dir)
1066: {
1067:   PetscReal re;
1068:   PetscInt  i,j,tmp;

1070:   if (!prev) for (i=0;i<nr;i++) perm[i] = i;
1071:   /* Insertion sort */
1072:   for (i=1;i<nr;i++) {
1073:     re = PetscRealPart(r[perm[i]]);
1074:     j = i-1;
1075:     while (j>=0 && dir*(re - PetscRealPart(r[perm[j]])) <= 0) {
1076:       tmp = perm[j]; perm[j] = perm[j+1]; perm[j+1] = tmp; j--;
1077:     }
1078:   }
1079:   PetscFunctionReturn(0);
1080: }

1082: /* Stores the pairs obtained since the last shift in the global arrays */
1083: static PetscErrorCode EPSStoreEigenpairs(EPS eps)
1084: {
1085:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1086:   PetscReal       lambda,err,norm;
1087:   PetscInt        i,count;
1088:   PetscBool       iscayley;
1089:   EPS_SR          sr = ctx->sr;
1090:   EPS_shift       sPres;
1091:   Vec             v,w;

1093:   sPres = sr->sPres;
1094:   sPres->index = sr->indexEig;
1095:   count = sr->indexEig;
1096:   /* Back-transform */
1097:   STBackTransform(eps->st,eps->nconv,eps->eigr,eps->eigi);
1098:   PetscObjectTypeCompare((PetscObject)eps->st,STCAYLEY,&iscayley);
1099:   /* Sort eigenvalues */
1100:   sortRealEigenvalues(eps->eigr,eps->perm,eps->nconv,PETSC_FALSE,sr->dir);
1101:   /* Values stored in global array */
1102:   for (i=0;i<eps->nconv;i++) {
1103:     lambda = PetscRealPart(eps->eigr[eps->perm[i]]);
1104:     err = eps->errest[eps->perm[i]];

1106:     if ((sr->dir)*(lambda - sPres->ext[0]) > 0 && (sr->dir)*(sPres->ext[1] - lambda) > 0) {/* Valid value */
1108:       sr->eigr[count] = lambda;
1109:       sr->errest[count] = err;
1110:       /* Explicit purification */
1111:       BVGetColumn(eps->V,eps->perm[i],&w);
1112:       if (eps->purify) {
1113:         BVGetColumn(sr->V,count,&v);
1114:         STApply(eps->st,w,v);
1115:         BVRestoreColumn(sr->V,count,&v);
1116:       } else BVInsertVec(sr->V,count,w);
1117:       BVRestoreColumn(eps->V,eps->perm[i],&w);
1118:       BVNormColumn(sr->V,count,NORM_2,&norm);
1119:       BVScaleColumn(sr->V,count,1.0/norm);
1120:       count++;
1121:     }
1122:   }
1123:   sPres->neigs = count - sr->indexEig;
1124:   sr->indexEig = count;
1125:   /* Global ordering array updating */
1126:   sortRealEigenvalues(sr->eigr,sr->perm,count,PETSC_TRUE,sr->dir);
1127:   PetscFunctionReturn(0);
1128: }

1130: static PetscErrorCode EPSLookForDeflation(EPS eps)
1131: {
1132:   PetscReal       val;
1133:   PetscInt        i,count0=0,count1=0;
1134:   EPS_shift       sPres;
1135:   PetscInt        ini,fin,k,idx0,idx1;
1136:   EPS_SR          sr;
1137:   Vec             v;
1138:   EPS_KRYLOVSCHUR *ctx=(EPS_KRYLOVSCHUR*)eps->data;

1140:   sr = ctx->sr;
1141:   sPres = sr->sPres;

1143:   if (sPres->neighb[0]) ini = (sr->dir)*(sPres->neighb[0]->inertia - sr->inertia0);
1144:   else ini = 0;
1145:   fin = sr->indexEig;
1146:   /* Selection of ends for searching new values */
1147:   if (!sPres->neighb[0]) sPres->ext[0] = sr->int0;/* First shift */
1148:   else sPres->ext[0] = sPres->neighb[0]->value;
1149:   if (!sPres->neighb[1]) {
1150:     if (sr->hasEnd) sPres->ext[1] = sr->int1;
1151:     else sPres->ext[1] = (sr->dir > 0)?PETSC_MAX_REAL:PETSC_MIN_REAL;
1152:   } else sPres->ext[1] = sPres->neighb[1]->value;
1153:   /* Selection of values between right and left ends */
1154:   for (i=ini;i<fin;i++) {
1155:     val=PetscRealPart(sr->eigr[sr->perm[i]]);
1156:     /* Values to the right of left shift */
1157:     if ((sr->dir)*(val - sPres->ext[1]) < 0) {
1158:       if ((sr->dir)*(val - sPres->value) < 0) count0++;
1159:       else count1++;
1160:     } else break;
1161:   }
1162:   /* The number of values on each side are found */
1163:   if (sPres->neighb[0]) {
1164:     sPres->nsch[0] = (sr->dir)*(sPres->inertia - sPres->neighb[0]->inertia)-count0;
1166:   } else sPres->nsch[0] = 0;

1168:   if (sPres->neighb[1]) {
1169:     sPres->nsch[1] = (sr->dir)*(sPres->neighb[1]->inertia - sPres->inertia) - count1;
1171:   } else sPres->nsch[1] = (sr->dir)*(sr->inertia1 - sPres->inertia);

1173:   /* Completing vector of indexes for deflation */
1174:   idx0 = ini;
1175:   idx1 = ini+count0+count1;
1176:   k=0;
1177:   for (i=idx0;i<idx1;i++) sr->idxDef[k++]=sr->perm[i];
1178:   BVDuplicateResize(eps->V,k+eps->ncv+1,&sr->Vnext);
1179:   BVSetNumConstraints(sr->Vnext,k);
1180:   for (i=0;i<k;i++) {
1181:     BVGetColumn(sr->Vnext,-i-1,&v);
1182:     BVCopyVec(sr->V,sr->idxDef[i],v);
1183:     BVRestoreColumn(sr->Vnext,-i-1,&v);
1184:   }

1186:   /* For rational Krylov */
1187:   if (sr->nS>0 && (sr->sPrev == sr->sPres->neighb[0] || sr->sPrev == sr->sPres->neighb[1])) EPSPrepareRational(eps);
1188:   eps->nconv = 0;
1189:   /* Get rid of temporary Vnext */
1190:   BVDestroy(&eps->V);
1191:   eps->V = sr->Vnext;
1192:   sr->Vnext = NULL;
1193:   PetscFunctionReturn(0);
1194: }

1196: PetscErrorCode EPSSolve_KrylovSchur_Slice(EPS eps)
1197: {
1198:   PetscInt         i,lds,ti;
1199:   PetscReal        newS;
1200:   EPS_KRYLOVSCHUR  *ctx=(EPS_KRYLOVSCHUR*)eps->data;
1201:   EPS_SR           sr=ctx->sr;
1202:   Mat              A,B=NULL;
1203:   PetscObjectState Astate,Bstate=0;
1204:   PetscObjectId    Aid,Bid=0;

1206:   PetscCitationsRegister(citation,&cited);
1207:   if (ctx->global) {
1208:     EPSSolve_KrylovSchur_Slice(ctx->eps);
1209:     ctx->eps->state = EPS_STATE_SOLVED;
1210:     eps->reason = EPS_CONVERGED_TOL;
1211:     if (ctx->npart>1) {
1212:       /* Gather solution from subsolvers */
1213:       EPSSliceGatherSolution(eps);
1214:     } else {
1215:       eps->nconv = sr->numEigs;
1216:       eps->its   = ctx->eps->its;
1217:       PetscFree(ctx->inertias);
1218:       PetscFree(ctx->shifts);
1219:       EPSSliceGetInertias(ctx->eps,&ctx->nshifts,&ctx->shifts,&ctx->inertias);
1220:     }
1221:   } else {
1222:     if (ctx->npart==1) {
1223:       sr->eigr   = ctx->eps->eigr;
1224:       sr->eigi   = ctx->eps->eigi;
1225:       sr->perm   = ctx->eps->perm;
1226:       sr->errest = ctx->eps->errest;
1227:       sr->V      = ctx->eps->V;
1228:     }
1229:     /* Check that the user did not modify subcomm matrices */
1230:     EPSGetOperators(eps,&A,&B);
1231:     PetscObjectStateGet((PetscObject)A,&Astate);
1232:     PetscObjectGetId((PetscObject)A,&Aid);
1233:     if (B) {
1234:       PetscObjectStateGet((PetscObject)B,&Bstate);
1235:       PetscObjectGetId((PetscObject)B,&Bid);
1236:     }
1238:     /* Only with eigenvalues present in the interval ...*/
1239:     if (sr->numEigs==0) {
1240:       eps->reason = EPS_CONVERGED_TOL;
1241:       PetscFunctionReturn(0);
1242:     }
1243:     /* Array of pending shifts */
1244:     sr->maxPend = 100; /* Initial size */
1245:     sr->nPend = 0;
1246:     PetscMalloc1(sr->maxPend,&sr->pending);
1247:     PetscLogObjectMemory((PetscObject)eps,sr->maxPend*sizeof(EPS_shift*));
1248:     EPSCreateShift(eps,sr->int0,NULL,NULL);
1249:     /* extract first shift */
1250:     sr->sPrev = NULL;
1251:     sr->sPres = sr->pending[--sr->nPend];
1252:     sr->sPres->inertia = sr->inertia0;
1253:     eps->target = sr->sPres->value;
1254:     sr->s0 = sr->sPres;
1255:     sr->indexEig = 0;
1256:     /* Memory reservation for auxiliary variables */
1257:     lds = PetscMin(eps->mpd,eps->ncv);
1258:     PetscCalloc1(lds*lds,&sr->S);
1259:     PetscMalloc1(eps->ncv,&sr->back);
1260:     PetscLogObjectMemory((PetscObject)eps,(lds*lds+eps->ncv)*sizeof(PetscScalar));
1261:     for (i=0;i<sr->numEigs;i++) {
1262:       sr->eigr[i]   = 0.0;
1263:       sr->eigi[i]   = 0.0;
1264:       sr->errest[i] = 0.0;
1265:       sr->perm[i]   = i;
1266:     }
1267:     /* Vectors for deflation */
1268:     PetscMalloc1(sr->numEigs,&sr->idxDef);
1269:     PetscLogObjectMemory((PetscObject)eps,sr->numEigs*sizeof(PetscInt));
1270:     sr->indexEig = 0;
1271:     /* Main loop */
1272:     while (sr->sPres) {
1273:       /* Search for deflation */
1274:       EPSLookForDeflation(eps);
1275:       /* KrylovSchur */
1276:       EPSKrylovSchur_Slice(eps);

1278:       EPSStoreEigenpairs(eps);
1279:       /* Select new shift */
1280:       if (!sr->sPres->comp[1]) {
1281:         EPSGetNewShiftValue(eps,1,&newS);
1282:         EPSCreateShift(eps,newS,sr->sPres,sr->sPres->neighb[1]);
1283:       }
1284:       if (!sr->sPres->comp[0]) {
1285:         /* Completing earlier interval */
1286:         EPSGetNewShiftValue(eps,0,&newS);
1287:         EPSCreateShift(eps,newS,sr->sPres->neighb[0],sr->sPres);
1288:       }
1289:       /* Preparing for a new search of values */
1290:       EPSExtractShift(eps);
1291:     }

1293:     /* Updating eps values prior to exit */
1294:     PetscFree(sr->S);
1295:     PetscFree(sr->idxDef);
1296:     PetscFree(sr->pending);
1297:     PetscFree(sr->back);
1298:     BVDuplicateResize(eps->V,eps->ncv+1,&sr->Vnext);
1299:     BVSetNumConstraints(sr->Vnext,0);
1300:     BVDestroy(&eps->V);
1301:     eps->V      = sr->Vnext;
1302:     eps->nconv  = sr->indexEig;
1303:     eps->reason = EPS_CONVERGED_TOL;
1304:     eps->its    = sr->itsKs;
1305:     eps->nds    = 0;
1306:     if (sr->dir<0) {
1307:       for (i=0;i<eps->nconv/2;i++) {
1308:         ti = sr->perm[i]; sr->perm[i] = sr->perm[eps->nconv-1-i]; sr->perm[eps->nconv-1-i] = ti;
1309:       }
1310:     }
1311:   }
1312:   PetscFunctionReturn(0);
1313: }