Actual source code: dshep.c
slepc-3.17.0 2022-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #include <slepc/private/dsimpl.h>
12: #include <slepcblaslapack.h>
14: PetscErrorCode DSAllocate_HEP(DS ds,PetscInt ld)
15: {
16: DSAllocateMat_Private(ds,DS_MAT_A);
17: DSAllocateMat_Private(ds,DS_MAT_Q);
18: DSAllocateMatReal_Private(ds,DS_MAT_T);
19: PetscFree(ds->perm);
20: PetscMalloc1(ld,&ds->perm);
21: PetscLogObjectMemory((PetscObject)ds,ld*sizeof(PetscInt));
22: PetscFunctionReturn(0);
23: }
25: /* 0 l k n-1
26: -----------------------------------------
27: |* . . |
28: | * . . |
29: | * . . |
30: | * . . |
31: |. . . . o o |
32: | o o |
33: | o o |
34: | o o |
35: | o o |
36: | o o |
37: |. . . . o o o o o o o x |
38: | x x x |
39: | x x x |
40: | x x x |
41: | x x x |
42: | x x x |
43: | x x x |
44: | x x x |
45: | x x x|
46: | x x|
47: -----------------------------------------
48: */
50: static PetscErrorCode DSSwitchFormat_HEP(DS ds)
51: {
52: PetscReal *T = ds->rmat[DS_MAT_T];
53: PetscScalar *A = ds->mat[DS_MAT_A];
54: PetscInt i,n=ds->n,k=ds->k,ld=ds->ld;
56: /* switch from compact (arrow) to dense storage */
57: PetscArrayzero(A,ld*ld);
58: for (i=0;i<k;i++) {
59: A[i+i*ld] = T[i];
60: A[k+i*ld] = T[i+ld];
61: A[i+k*ld] = T[i+ld];
62: }
63: A[k+k*ld] = T[k];
64: for (i=k+1;i<n;i++) {
65: A[i+i*ld] = T[i];
66: A[i-1+i*ld] = T[i-1+ld];
67: A[i+(i-1)*ld] = T[i-1+ld];
68: }
69: if (ds->extrarow) A[n+(n-1)*ld] = T[n-1+ld];
70: PetscFunctionReturn(0);
71: }
73: PetscErrorCode DSView_HEP(DS ds,PetscViewer viewer)
74: {
75: PetscViewerFormat format;
76: PetscInt i,j,r,c,rows;
77: PetscReal value;
78: const char *methodname[] = {
79: "Implicit QR method (_steqr)",
80: "Relatively Robust Representations (_stevr)",
81: "Divide and Conquer method (_stedc)",
82: "Block Divide and Conquer method (dsbtdc)"
83: };
84: const int nmeth=sizeof(methodname)/sizeof(methodname[0]);
86: PetscViewerGetFormat(viewer,&format);
87: if (format == PETSC_VIEWER_ASCII_INFO || format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
88: if (ds->bs>1) PetscViewerASCIIPrintf(viewer,"block size: %" PetscInt_FMT "\n",ds->bs);
89: if (ds->method<nmeth) PetscViewerASCIIPrintf(viewer,"solving the problem with: %s\n",methodname[ds->method]);
90: PetscFunctionReturn(0);
91: }
92: if (ds->compact) {
93: PetscViewerASCIIUseTabs(viewer,PETSC_FALSE);
94: rows = ds->extrarow? ds->n+1: ds->n;
95: if (format == PETSC_VIEWER_ASCII_MATLAB) {
96: PetscViewerASCIIPrintf(viewer,"%% Size = %" PetscInt_FMT " %" PetscInt_FMT "\n",rows,ds->n);
97: PetscViewerASCIIPrintf(viewer,"zzz = zeros(%" PetscInt_FMT ",3);\n",3*ds->n);
98: PetscViewerASCIIPrintf(viewer,"zzz = [\n");
99: for (i=0;i<ds->n;i++) PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",i+1,i+1,(double)*(ds->rmat[DS_MAT_T]+i));
100: for (i=0;i<rows-1;i++) {
101: r = PetscMax(i+2,ds->k+1);
102: c = i+1;
103: PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",r,c,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
104: if (i<ds->n-1 && ds->k<ds->n) { /* do not print vertical arrow when k=n */
105: PetscViewerASCIIPrintf(viewer,"%" PetscInt_FMT " %" PetscInt_FMT " %18.16e\n",c,r,(double)*(ds->rmat[DS_MAT_T]+ds->ld+i));
106: }
107: }
108: PetscViewerASCIIPrintf(viewer,"];\n%s = spconvert(zzz);\n",DSMatName[DS_MAT_T]);
109: } else {
110: for (i=0;i<rows;i++) {
111: for (j=0;j<ds->n;j++) {
112: if (i==j) value = *(ds->rmat[DS_MAT_T]+i);
113: else if ((i<ds->k && j==ds->k) || (i==ds->k && j<ds->k)) value = *(ds->rmat[DS_MAT_T]+ds->ld+PetscMin(i,j));
114: else if (i==j+1 && i>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+i-1);
115: else if (i+1==j && j>ds->k) value = *(ds->rmat[DS_MAT_T]+ds->ld+j-1);
116: else value = 0.0;
117: PetscViewerASCIIPrintf(viewer," %18.16e ",(double)value);
118: }
119: PetscViewerASCIIPrintf(viewer,"\n");
120: }
121: }
122: PetscViewerASCIIUseTabs(viewer,PETSC_TRUE);
123: PetscViewerFlush(viewer);
124: } else DSViewMat(ds,viewer,DS_MAT_A);
125: if (ds->state>DS_STATE_INTERMEDIATE) DSViewMat(ds,viewer,DS_MAT_Q);
126: PetscFunctionReturn(0);
127: }
129: PetscErrorCode DSVectors_HEP(DS ds,DSMatType mat,PetscInt *j,PetscReal *rnorm)
130: {
131: PetscScalar *Q = ds->mat[DS_MAT_Q];
132: PetscInt ld = ds->ld;
134: switch (mat) {
135: case DS_MAT_X:
136: case DS_MAT_Y:
137: if (j) {
138: if (ds->state>=DS_STATE_CONDENSED) PetscArraycpy(ds->mat[mat]+(*j)*ld,Q+(*j)*ld,ld);
139: else {
140: PetscArrayzero(ds->mat[mat]+(*j)*ld,ld);
141: *(ds->mat[mat]+(*j)+(*j)*ld) = 1.0;
142: }
143: } else {
144: if (ds->state>=DS_STATE_CONDENSED) PetscArraycpy(ds->mat[mat],Q,ld*ld);
145: else DSSetIdentity(ds,mat);
146: }
147: if (rnorm && j) *rnorm = PetscAbsScalar(Q[ds->n-1+(*j)*ld]);
148: break;
149: case DS_MAT_U:
150: case DS_MAT_V:
151: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_SUP,"Not implemented yet");
152: default:
153: SETERRQ(PetscObjectComm((PetscObject)ds),PETSC_ERR_ARG_OUTOFRANGE,"Invalid mat parameter");
154: }
155: PetscFunctionReturn(0);
156: }
158: /*
159: ARROWTRIDIAG reduces a symmetric arrowhead matrix of the form
161: [ d 0 0 0 e ]
162: [ 0 d 0 0 e ]
163: A = [ 0 0 d 0 e ]
164: [ 0 0 0 d e ]
165: [ e e e e d ]
167: to tridiagonal form
169: [ d e 0 0 0 ]
170: [ e d e 0 0 ]
171: T = Q'*A*Q = [ 0 e d e 0 ],
172: [ 0 0 e d e ]
173: [ 0 0 0 e d ]
175: where Q is an orthogonal matrix. Rutishauser's algorithm is used to
176: perform the reduction, which requires O(n**2) flops. The accumulation
177: of the orthogonal factor Q, however, requires O(n**3) flops.
179: Arguments
180: =========
182: N (input) INTEGER
183: The order of the matrix A. N >= 0.
185: D (input/output) DOUBLE PRECISION array, dimension (N)
186: On entry, the diagonal entries of the matrix A to be
187: reduced.
188: On exit, the diagonal entries of the reduced matrix T.
190: E (input/output) DOUBLE PRECISION array, dimension (N-1)
191: On entry, the off-diagonal entries of the matrix A to be
192: reduced.
193: On exit, the subdiagonal entries of the reduced matrix T.
195: Q (input/output) DOUBLE PRECISION array, dimension (LDQ, N)
196: On exit, the orthogonal matrix Q.
198: LDQ (input) INTEGER
199: The leading dimension of the array Q.
201: Note
202: ====
203: Based on Fortran code contributed by Daniel Kressner
204: */
205: static PetscErrorCode ArrowTridiag(PetscBLASInt n,PetscReal *d,PetscReal *e,PetscScalar *Q,PetscBLASInt ld)
206: {
207: PetscBLASInt i,j,j2,one=1;
208: PetscReal c,s,p,off,temp;
210: if (n<=2) PetscFunctionReturn(0);
212: for (j=0;j<n-2;j++) {
214: /* Eliminate entry e(j) by a rotation in the planes (j,j+1) */
215: temp = e[j+1];
216: PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&e[j],&c,&s,&e[j+1]));
217: s = -s;
219: /* Apply rotation to diagonal elements */
220: temp = d[j+1];
221: e[j] = c*s*(temp-d[j]);
222: d[j+1] = s*s*d[j] + c*c*temp;
223: d[j] = c*c*d[j] + s*s*temp;
225: /* Apply rotation to Q */
226: j2 = j+2;
227: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+j*ld,&one,Q+(j+1)*ld,&one,&c,&s));
229: /* Chase newly introduced off-diagonal entry to the top left corner */
230: for (i=j-1;i>=0;i--) {
231: off = -s*e[i];
232: e[i] = c*e[i];
233: temp = e[i+1];
234: PetscStackCallBLAS("LAPACKlartg",LAPACKREALlartg_(&temp,&off,&c,&s,&e[i+1]));
235: s = -s;
236: temp = (d[i]-d[i+1])*s - 2.0*c*e[i];
237: p = s*temp;
238: d[i+1] += p;
239: d[i] -= p;
240: e[i] = -e[i] - c*temp;
241: j2 = j+2;
242: PetscStackCallBLAS("BLASrot",BLASMIXEDrot_(&j2,Q+i*ld,&one,Q+(i+1)*ld,&one,&c,&s));
243: }
244: }
245: PetscFunctionReturn(0);
246: }
248: /*
249: Reduce to tridiagonal form by means of ArrowTridiag.
250: */
251: static PetscErrorCode DSIntermediate_HEP(DS ds)
252: {
253: PetscInt i;
254: PetscBLASInt n1 = 0,n2,lwork,info,l = 0,n = 0,ld,off;
255: PetscScalar *A,*Q,*work,*tau;
256: PetscReal *d,*e;
258: PetscBLASIntCast(ds->n,&n);
259: PetscBLASIntCast(ds->l,&l);
260: PetscBLASIntCast(ds->ld,&ld);
261: PetscBLASIntCast(PetscMax(0,ds->k-l+1),&n1); /* size of leading block, excl. locked */
262: n2 = n-l; /* n2 = n1 + size of trailing block */
263: off = l+l*ld;
264: A = ds->mat[DS_MAT_A];
265: Q = ds->mat[DS_MAT_Q];
266: d = ds->rmat[DS_MAT_T];
267: e = ds->rmat[DS_MAT_T]+ld;
268: PetscArrayzero(Q,ld*ld);
269: for (i=0;i<n;i++) Q[i+i*ld] = 1.0;
271: if (ds->compact) {
273: if (ds->state<DS_STATE_INTERMEDIATE) ArrowTridiag(n1,d+l,e+l,Q+off,ld);
275: } else {
277: for (i=0;i<l;i++) { d[i] = PetscRealPart(A[i+i*ld]); e[i] = 0.0; }
279: if (ds->state<DS_STATE_INTERMEDIATE) {
280: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
281: DSAllocateWork_Private(ds,ld+ld*ld,0,0);
282: tau = ds->work;
283: work = ds->work+ld;
284: lwork = ld*ld;
285: PetscStackCallBLAS("LAPACKsytrd",LAPACKsytrd_("L",&n2,Q+off,&ld,d+l,e+l,tau,work,&lwork,&info));
286: SlepcCheckLapackInfo("sytrd",info);
287: PetscStackCallBLAS("LAPACKorgtr",LAPACKorgtr_("L",&n2,Q+off,&ld,tau,work,&lwork,&info));
288: SlepcCheckLapackInfo("orgtr",info);
289: } else {
290: /* copy tridiagonal to d,e */
291: for (i=l;i<n;i++) d[i] = PetscRealPart(A[i+i*ld]);
292: for (i=l;i<n-1;i++) e[i] = PetscRealPart(A[(i+1)+i*ld]);
293: }
294: }
295: PetscFunctionReturn(0);
296: }
298: PetscErrorCode DSSort_HEP(DS ds,PetscScalar *wr,PetscScalar *wi,PetscScalar *rr,PetscScalar *ri,PetscInt *k)
299: {
300: PetscInt n,l,i,*perm,ld=ds->ld;
301: PetscScalar *A;
302: PetscReal *d;
304: if (!ds->sc) PetscFunctionReturn(0);
305: n = ds->n;
306: l = ds->l;
307: A = ds->mat[DS_MAT_A];
308: d = ds->rmat[DS_MAT_T];
309: perm = ds->perm;
310: if (!rr) DSSortEigenvaluesReal_Private(ds,d,perm);
311: else DSSortEigenvalues_Private(ds,rr,ri,perm,PETSC_FALSE);
312: for (i=l;i<n;i++) wr[i] = d[perm[i]];
313: DSPermuteColumns_Private(ds,l,n,n,DS_MAT_Q,perm);
314: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
315: if (!ds->compact) {
316: for (i=l;i<n;i++) A[i+i*ld] = wr[i];
317: }
318: PetscFunctionReturn(0);
319: }
321: PetscErrorCode DSUpdateExtraRow_HEP(DS ds)
322: {
323: PetscInt i;
324: PetscBLASInt n,ld,incx=1;
325: PetscScalar *A,*Q,*x,*y,one=1.0,zero=0.0;
326: PetscReal *e,beta;
328: PetscBLASIntCast(ds->n,&n);
329: PetscBLASIntCast(ds->ld,&ld);
330: A = ds->mat[DS_MAT_A];
331: Q = ds->mat[DS_MAT_Q];
332: e = ds->rmat[DS_MAT_T]+ld;
334: if (ds->compact) {
335: beta = e[n-1]; /* in compact, we assume all entries are zero except the last one */
336: for (i=0;i<n;i++) e[i] = PetscRealPart(beta*Q[n-1+i*ld]);
337: ds->k = n;
338: } else {
339: DSAllocateWork_Private(ds,2*ld,0,0);
340: x = ds->work;
341: y = ds->work+ld;
342: for (i=0;i<n;i++) x[i] = PetscConj(A[n+i*ld]);
343: PetscStackCallBLAS("BLASgemv",BLASgemv_("C",&n,&n,&one,Q,&ld,x,&incx,&zero,y,&incx));
344: for (i=0;i<n;i++) A[n+i*ld] = PetscConj(y[i]);
345: ds->k = n;
346: }
347: PetscFunctionReturn(0);
348: }
350: PetscErrorCode DSSolve_HEP_QR(DS ds,PetscScalar *wr,PetscScalar *wi)
351: {
352: PetscInt i;
353: PetscBLASInt n1,info,l = 0,n = 0,ld,off;
354: PetscScalar *Q,*A;
355: PetscReal *d,*e;
358: PetscBLASIntCast(ds->n,&n);
359: PetscBLASIntCast(ds->l,&l);
360: PetscBLASIntCast(ds->ld,&ld);
361: n1 = n-l; /* n1 = size of leading block, excl. locked + size of trailing block */
362: off = l+l*ld;
363: Q = ds->mat[DS_MAT_Q];
364: A = ds->mat[DS_MAT_A];
365: d = ds->rmat[DS_MAT_T];
366: e = ds->rmat[DS_MAT_T]+ld;
368: /* Reduce to tridiagonal form */
369: DSIntermediate_HEP(ds);
371: /* Solve the tridiagonal eigenproblem */
372: for (i=0;i<l;i++) wr[i] = d[i];
374: DSAllocateWork_Private(ds,0,2*ld,0);
375: PetscStackCallBLAS("LAPACKsteqr",LAPACKsteqr_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&info));
376: SlepcCheckLapackInfo("steqr",info);
377: for (i=l;i<n;i++) wr[i] = d[i];
379: /* Create diagonal matrix as a result */
380: if (ds->compact) PetscArrayzero(e,n-1);
381: else {
382: for (i=l;i<n;i++) PetscArrayzero(A+l+i*ld,n-l);
383: for (i=l;i<n;i++) A[i+i*ld] = d[i];
384: }
386: /* Set zero wi */
387: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
388: PetscFunctionReturn(0);
389: }
391: PetscErrorCode DSSolve_HEP_MRRR(DS ds,PetscScalar *wr,PetscScalar *wi)
392: {
393: PetscInt i;
394: PetscBLASInt n1 = 0,n2 = 0,n3,lwork,liwork,info,l = 0,n = 0,m = 0,ld,off,il,iu,*isuppz;
395: PetscScalar *A,*Q,*W=NULL,one=1.0,zero=0.0;
396: PetscReal *d,*e,abstol=0.0,vl,vu;
397: #if defined(PETSC_USE_COMPLEX)
398: PetscInt j;
399: PetscReal *ritz;
400: #endif
403: PetscBLASIntCast(ds->n,&n);
404: PetscBLASIntCast(ds->l,&l);
405: PetscBLASIntCast(ds->ld,&ld);
406: PetscBLASIntCast(ds->k-l+1,&n1); /* size of leading block, excl. locked */
407: PetscBLASIntCast(n-ds->k-1,&n2); /* size of trailing block */
408: n3 = n1+n2;
409: off = l+l*ld;
410: A = ds->mat[DS_MAT_A];
411: Q = ds->mat[DS_MAT_Q];
412: d = ds->rmat[DS_MAT_T];
413: e = ds->rmat[DS_MAT_T]+ld;
415: /* Reduce to tridiagonal form */
416: DSIntermediate_HEP(ds);
418: /* Solve the tridiagonal eigenproblem */
419: for (i=0;i<l;i++) wr[i] = d[i];
421: if (ds->state<DS_STATE_INTERMEDIATE) { /* Q contains useful info */
422: DSAllocateMat_Private(ds,DS_MAT_W);
423: DSCopyMatrix_Private(ds,DS_MAT_W,DS_MAT_Q);
424: W = ds->mat[DS_MAT_W];
425: }
426: #if defined(PETSC_USE_COMPLEX)
427: DSAllocateMatReal_Private(ds,DS_MAT_Q);
428: #endif
429: lwork = 20*ld;
430: liwork = 10*ld;
431: DSAllocateWork_Private(ds,0,lwork+ld,liwork+2*ld);
432: isuppz = ds->iwork+liwork;
433: #if defined(PETSC_USE_COMPLEX)
434: ritz = ds->rwork+lwork;
435: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,ritz+l,ds->rmat[DS_MAT_Q]+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
436: for (i=l;i<n;i++) wr[i] = ritz[i];
437: #else
438: PetscStackCallBLAS("LAPACKstevr",LAPACKstevr_("V","A",&n3,d+l,e+l,&vl,&vu,&il,&iu,&abstol,&m,wr+l,Q+off,&ld,isuppz,ds->rwork,&lwork,ds->iwork,&liwork,&info));
439: #endif
440: SlepcCheckLapackInfo("stevr",info);
441: #if defined(PETSC_USE_COMPLEX)
442: for (i=l;i<n;i++)
443: for (j=l;j<n;j++)
444: Q[i+j*ld] = (ds->rmat[DS_MAT_Q])[i+j*ld];
445: #endif
446: if (ds->state<DS_STATE_INTERMEDIATE) { /* accumulate previous Q */
447: PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&n3,&n3,&n3,&one,W+off,&ld,Q+off,&ld,&zero,A+off,&ld));
448: DSCopyMatrix_Private(ds,DS_MAT_Q,DS_MAT_A);
449: }
450: for (i=l;i<n;i++) d[i] = PetscRealPart(wr[i]);
452: /* Create diagonal matrix as a result */
453: if (ds->compact) PetscArrayzero(e,n-1);
454: else {
455: for (i=l;i<n;i++) PetscArrayzero(A+l+i*ld,n-l);
456: for (i=l;i<n;i++) A[i+i*ld] = d[i];
457: }
459: /* Set zero wi */
460: if (wi) for (i=l;i<n;i++) wi[i] = 0.0;
461: PetscFunctionReturn(0);
462: }
464: PetscErrorCode DSSolve_HEP_DC(DS ds,PetscScalar *wr,PetscScalar *wi)
465: {
466: PetscInt i;
467: PetscBLASInt n1,info,l = 0,ld,off,lrwork,liwork;
468: PetscScalar *Q,*A;
469: PetscReal *d,*e;
470: #if defined(PETSC_USE_COMPLEX)
471: PetscBLASInt lwork;
472: PetscInt j;
473: #endif
476: PetscBLASIntCast(ds->l,&l);
477: PetscBLASIntCast(ds->ld,&ld);
478: PetscBLASIntCast(ds->n-ds->l,&n1);
479: off = l+l*ld;
480: Q = ds->mat[DS_MAT_Q];
481: A = ds->mat[DS_MAT_A];
482: d = ds->rmat[DS_MAT_T];
483: e = ds->rmat[DS_MAT_T]+ld;
485: /* Reduce to tridiagonal form */
486: DSIntermediate_HEP(ds);
488: /* Solve the tridiagonal eigenproblem */
489: for (i=0;i<l;i++) wr[i] = d[i];
491: lrwork = 5*n1*n1+3*n1+1;
492: liwork = 5*n1*n1+6*n1+6;
493: #if !defined(PETSC_USE_COMPLEX)
494: DSAllocateWork_Private(ds,0,lrwork,liwork);
495: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
496: #else
497: lwork = ld*ld;
498: DSAllocateWork_Private(ds,lwork,lrwork,liwork);
499: PetscStackCallBLAS("LAPACKstedc",LAPACKstedc_("V",&n1,d+l,e+l,Q+off,&ld,ds->work,&lwork,ds->rwork,&lrwork,ds->iwork,&liwork,&info));
500: /* Fixing Lapack bug*/
501: for (j=ds->l;j<ds->n;j++)
502: for (i=0;i<ds->l;i++) Q[i+j*ld] = 0.0;
503: #endif
504: SlepcCheckLapackInfo("stedc",info);
505: for (i=l;i<ds->n;i++) wr[i] = d[i];
507: /* Create diagonal matrix as a result */
508: if (ds->compact) PetscArrayzero(e,ds->n-1);
509: else {
510: for (i=l;i<ds->n;i++) PetscArrayzero(A+l+i*ld,ds->n-l);
511: for (i=l;i<ds->n;i++) A[i+i*ld] = d[i];
512: }
514: /* Set zero wi */
515: if (wi) for (i=l;i<ds->n;i++) wi[i] = 0.0;
516: PetscFunctionReturn(0);
517: }
519: #if !defined(PETSC_USE_COMPLEX)
520: PetscErrorCode DSSolve_HEP_BDC(DS ds,PetscScalar *wr,PetscScalar *wi)
521: {
522: PetscBLASInt i,j,k,m,n = 0,info,nblks,bs = 0,ld = 0,lde,lrwork,liwork,*ksizes,*iwork,mingapi;
523: PetscScalar *Q,*A;
524: PetscReal *D,*E,*d,*e,tol=PETSC_MACHINE_EPSILON/2,tau1=1e-16,tau2=1e-18,*rwork,mingap;
528: PetscBLASIntCast(ds->ld,&ld);
529: PetscBLASIntCast(ds->bs,&bs);
530: PetscBLASIntCast(ds->n,&n);
531: nblks = n/bs;
532: Q = ds->mat[DS_MAT_Q];
533: A = ds->mat[DS_MAT_A];
534: d = ds->rmat[DS_MAT_T];
535: e = ds->rmat[DS_MAT_T]+ld;
536: lrwork = 4*n*n+60*n+1;
537: liwork = 5*n+5*nblks-1;
538: lde = 2*bs+1;
539: DSAllocateWork_Private(ds,bs*n+lde*lde*(nblks-1),lrwork,nblks+liwork);
540: D = ds->work;
541: E = ds->work+bs*n;
542: rwork = ds->rwork;
543: ksizes = ds->iwork;
544: iwork = ds->iwork+nblks;
545: PetscArrayzero(iwork,liwork);
547: /* Copy matrix to block tridiagonal format */
548: j=0;
549: for (i=0;i<nblks;i++) {
550: ksizes[i]=bs;
551: for (k=0;k<bs;k++)
552: for (m=0;m<bs;m++)
553: D[k+m*bs+i*bs*bs] = PetscRealPart(A[j+k+(j+m)*n]);
554: j = j + bs;
555: }
556: j=0;
557: for (i=0;i<nblks-1;i++) {
558: for (k=0;k<bs;k++)
559: for (m=0;m<bs;m++)
560: E[k+m*lde+i*lde*lde] = PetscRealPart(A[j+bs+k+(j+m)*n]);
561: j = j + bs;
562: }
564: /* Solve the block tridiagonal eigenproblem */
565: BDC_dsbtdc_("D","A",n,nblks,ksizes,D,bs,bs,E,lde,lde,tol,tau1,tau2,d,
566: Q,n,rwork,lrwork,iwork,liwork,&mingap,&mingapi,&info,1,1);
567: for (i=0;i<ds->n;i++) wr[i] = d[i];
569: /* Create diagonal matrix as a result */
570: if (ds->compact) PetscArrayzero(e,ds->n-1);
571: else {
572: for (i=0;i<ds->n;i++) PetscArrayzero(A+i*ld,ds->n);
573: for (i=0;i<ds->n;i++) A[i+i*ld] = wr[i];
574: }
576: /* Set zero wi */
577: if (wi) for (i=0;i<ds->n;i++) wi[i] = 0.0;
578: PetscFunctionReturn(0);
579: }
580: #endif
582: PetscErrorCode DSTruncate_HEP(DS ds,PetscInt n,PetscBool trim)
583: {
584: PetscInt i,ld=ds->ld,l=ds->l;
585: PetscScalar *A = ds->mat[DS_MAT_A];
587: if (trim) {
588: if (!ds->compact && ds->extrarow) { /* clean extra row */
589: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
590: }
591: ds->l = 0;
592: ds->k = 0;
593: ds->n = n;
594: ds->t = ds->n; /* truncated length equal to the new dimension */
595: } else {
596: if (!ds->compact && ds->extrarow && ds->k==ds->n) {
597: /* copy entries of extra row to the new position, then clean last row */
598: for (i=l;i<n;i++) A[n+i*ld] = A[ds->n+i*ld];
599: for (i=l;i<ds->n;i++) A[ds->n+i*ld] = 0.0;
600: }
601: ds->k = (ds->extrarow)? n: 0;
602: ds->t = ds->n; /* truncated length equal to previous dimension */
603: ds->n = n;
604: }
605: PetscFunctionReturn(0);
606: }
608: PetscErrorCode DSSynchronize_HEP(DS ds,PetscScalar eigr[],PetscScalar eigi[])
609: {
610: PetscInt ld=ds->ld,l=ds->l,k=0,kr=0;
611: PetscMPIInt n,rank,off=0,size,ldn,ld3;
613: if (ds->compact) kr = 3*ld;
614: else k = (ds->n-l)*ld;
615: if (ds->state>DS_STATE_RAW) k += (ds->n-l)*ld;
616: if (eigr) k += (ds->n-l);
617: DSAllocateWork_Private(ds,k+kr,0,0);
618: PetscMPIIntCast(k*sizeof(PetscScalar)+kr*sizeof(PetscReal),&size);
619: PetscMPIIntCast(ds->n-l,&n);
620: PetscMPIIntCast(ld*(ds->n-l),&ldn);
621: PetscMPIIntCast(ld*3,&ld3);
622: MPI_Comm_rank(PetscObjectComm((PetscObject)ds),&rank);
623: if (!rank) {
624: if (ds->compact) MPI_Pack(ds->rmat[DS_MAT_T],ld3,MPIU_REAL,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
625: else MPI_Pack(ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
626: if (ds->state>DS_STATE_RAW) MPI_Pack(ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
627: if (eigr) MPI_Pack(eigr+l,n,MPIU_SCALAR,ds->work,size,&off,PetscObjectComm((PetscObject)ds));
628: }
629: MPI_Bcast(ds->work,size,MPI_BYTE,0,PetscObjectComm((PetscObject)ds));
630: if (rank) {
631: if (ds->compact) MPI_Unpack(ds->work,size,&off,ds->rmat[DS_MAT_T],ld3,MPIU_REAL,PetscObjectComm((PetscObject)ds));
632: else MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_A]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
633: if (ds->state>DS_STATE_RAW) MPI_Unpack(ds->work,size,&off,ds->mat[DS_MAT_Q]+l*ld,ldn,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
634: if (eigr) MPI_Unpack(ds->work,size,&off,eigr+l,n,MPIU_SCALAR,PetscObjectComm((PetscObject)ds));
635: }
636: PetscFunctionReturn(0);
637: }
639: PetscErrorCode DSCond_HEP(DS ds,PetscReal *cond)
640: {
641: PetscScalar *work;
642: PetscReal *rwork;
643: PetscBLASInt *ipiv;
644: PetscBLASInt lwork,info,n,ld;
645: PetscReal hn,hin;
646: PetscScalar *A;
648: PetscBLASIntCast(ds->n,&n);
649: PetscBLASIntCast(ds->ld,&ld);
650: lwork = 8*ld;
651: DSAllocateWork_Private(ds,lwork,ld,ld);
652: work = ds->work;
653: rwork = ds->rwork;
654: ipiv = ds->iwork;
655: DSSwitchFormat_HEP(ds);
657: /* use workspace matrix W to avoid overwriting A */
658: DSAllocateMat_Private(ds,DS_MAT_W);
659: A = ds->mat[DS_MAT_W];
660: PetscArraycpy(A,ds->mat[DS_MAT_A],ds->ld*ds->ld);
662: /* norm of A */
663: hn = LAPACKlange_("I",&n,&n,A,&ld,rwork);
665: /* norm of inv(A) */
666: PetscStackCallBLAS("LAPACKgetrf",LAPACKgetrf_(&n,&n,A,&ld,ipiv,&info));
667: SlepcCheckLapackInfo("getrf",info);
668: PetscStackCallBLAS("LAPACKgetri",LAPACKgetri_(&n,A,&ld,ipiv,work,&lwork,&info));
669: SlepcCheckLapackInfo("getri",info);
670: hin = LAPACKlange_("I",&n,&n,A,&ld,rwork);
672: *cond = hn*hin;
673: PetscFunctionReturn(0);
674: }
676: PetscErrorCode DSTranslateRKS_HEP(DS ds,PetscScalar alpha)
677: {
678: PetscInt i,j,k=ds->k;
679: PetscScalar *Q,*A,*R,*tau,*work;
680: PetscBLASInt ld,n1,n0,lwork,info;
682: PetscBLASIntCast(ds->ld,&ld);
683: DSAllocateWork_Private(ds,ld*ld,0,0);
684: tau = ds->work;
685: work = ds->work+ld;
686: PetscBLASIntCast(ld*(ld-1),&lwork);
687: DSAllocateMat_Private(ds,DS_MAT_W);
688: A = ds->mat[DS_MAT_A];
689: Q = ds->mat[DS_MAT_Q];
690: R = ds->mat[DS_MAT_W];
692: /* copy I+alpha*A */
693: PetscArrayzero(Q,ld*ld);
694: PetscArrayzero(R,ld*ld);
695: for (i=0;i<k;i++) {
696: Q[i+i*ld] = 1.0 + alpha*A[i+i*ld];
697: Q[k+i*ld] = alpha*A[k+i*ld];
698: }
700: /* compute qr */
701: PetscBLASIntCast(k+1,&n1);
702: PetscBLASIntCast(k,&n0);
703: PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&n1,&n0,Q,&ld,tau,work,&lwork,&info));
704: SlepcCheckLapackInfo("geqrf",info);
706: /* copy R from Q */
707: for (j=0;j<k;j++)
708: for (i=0;i<=j;i++)
709: R[i+j*ld] = Q[i+j*ld];
711: /* compute orthogonal matrix in Q */
712: PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&n1,&n1,&n0,Q,&ld,tau,work,&lwork,&info));
713: SlepcCheckLapackInfo("orgqr",info);
715: /* compute the updated matrix of projected problem */
716: for (j=0;j<k;j++)
717: for (i=0;i<k+1;i++)
718: A[j*ld+i] = Q[i*ld+j];
719: alpha = -1.0/alpha;
720: PetscStackCallBLAS("BLAStrsm",BLAStrsm_("R","U","N","N",&n1,&n0,&alpha,R,&ld,A,&ld));
721: for (i=0;i<k;i++)
722: A[ld*i+i] -= alpha;
723: PetscFunctionReturn(0);
724: }
726: PetscErrorCode DSHermitian_HEP(DS ds,DSMatType m,PetscBool *flg)
727: {
728: if (m==DS_MAT_A && !ds->extrarow) *flg = PETSC_TRUE;
729: else *flg = PETSC_FALSE;
730: PetscFunctionReturn(0);
731: }
733: /*MC
734: DSHEP - Dense Hermitian Eigenvalue Problem.
736: Level: beginner
738: Notes:
739: The problem is expressed as A*X = X*Lambda, where A is real symmetric
740: (or complex Hermitian). Lambda is a diagonal matrix whose diagonal
741: elements are the arguments of DSSolve(). After solve, A is overwritten
742: with Lambda.
744: In the intermediate state A is reduced to tridiagonal form. In compact
745: storage format, the symmetric tridiagonal matrix is stored in T.
747: Used DS matrices:
748: + DS_MAT_A - problem matrix
749: . DS_MAT_T - symmetric tridiagonal matrix
750: - DS_MAT_Q - orthogonal/unitary transformation that reduces to tridiagonal form
751: (intermediate step) or matrix of orthogonal eigenvectors, which is equal to X
753: Implemented methods:
754: + 0 - Implicit QR (_steqr)
755: . 1 - Multiple Relatively Robust Representations (_stevr)
756: . 2 - Divide and Conquer (_stedc)
757: - 3 - Block Divide and Conquer (real scalars only)
759: .seealso: DSCreate(), DSSetType(), DSType
760: M*/
761: SLEPC_EXTERN PetscErrorCode DSCreate_HEP(DS ds)
762: {
763: ds->ops->allocate = DSAllocate_HEP;
764: ds->ops->view = DSView_HEP;
765: ds->ops->vectors = DSVectors_HEP;
766: ds->ops->solve[0] = DSSolve_HEP_QR;
767: ds->ops->solve[1] = DSSolve_HEP_MRRR;
768: ds->ops->solve[2] = DSSolve_HEP_DC;
769: #if !defined(PETSC_USE_COMPLEX)
770: ds->ops->solve[3] = DSSolve_HEP_BDC;
771: #endif
772: ds->ops->sort = DSSort_HEP;
773: ds->ops->synchronize = DSSynchronize_HEP;
774: ds->ops->truncate = DSTruncate_HEP;
775: ds->ops->update = DSUpdateExtraRow_HEP;
776: ds->ops->cond = DSCond_HEP;
777: ds->ops->transrks = DSTranslateRKS_HEP;
778: ds->ops->hermitian = DSHermitian_HEP;
779: PetscFunctionReturn(0);
780: }