Actual source code: fnimpl.h
slepc-3.17.0 2022-03-31
1: /*
2: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3: SLEPc - Scalable Library for Eigenvalue Problem Computations
4: Copyright (c) 2002-, Universitat Politecnica de Valencia, Spain
6: This file is part of SLEPc.
7: SLEPc is distributed under a 2-clause BSD license (see LICENSE).
8: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
9: */
11: #if !defined(SLEPCFNIMPL_H)
12: #define SLEPCFNIMPL_H
14: #include <slepcfn.h>
15: #include <slepc/private/slepcimpl.h>
17: SLEPC_EXTERN PetscBool FNRegisterAllCalled;
18: SLEPC_EXTERN PetscErrorCode FNRegisterAll(void);
19: SLEPC_EXTERN PetscLogEvent FN_Evaluate;
21: typedef struct _FNOps *FNOps;
23: struct _FNOps {
24: PetscErrorCode (*evaluatefunction)(FN,PetscScalar,PetscScalar*);
25: PetscErrorCode (*evaluatederivative)(FN,PetscScalar,PetscScalar*);
26: PetscErrorCode (*evaluatefunctionmat[FN_MAX_SOLVE])(FN,Mat,Mat);
27: PetscErrorCode (*evaluatefunctionmatvec[FN_MAX_SOLVE])(FN,Mat,Vec);
28: PetscErrorCode (*setfromoptions)(PetscOptionItems*,FN);
29: PetscErrorCode (*view)(FN,PetscViewer);
30: PetscErrorCode (*duplicate)(FN,MPI_Comm,FN*);
31: PetscErrorCode (*destroy)(FN);
32: };
34: #define FN_MAX_W 6
36: struct _p_FN {
37: PETSCHEADER(struct _FNOps);
38: /*------------------------- User parameters --------------------------*/
39: PetscScalar alpha; /* inner scaling (argument) */
40: PetscScalar beta; /* outer scaling (result) */
41: PetscInt method; /* the method to compute matrix functions */
42: FNParallelType pmode; /* parallel mode (redundant or synchronized) */
44: /*---------------------- Cached data and workspace -------------------*/
45: Mat W[FN_MAX_W]; /* workspace matrices */
46: PetscInt nw; /* number of allocated W matrices */
47: PetscInt cw; /* current W matrix */
48: void *data;
49: };
51: /*
52: FN_AllocateWorkMat - Allocate a work Mat of the same dimension of A and copy
53: its contents. The work matrix is returned in M and should be freed with
54: FN_FreeWorkMat().
55: */
56: static inline PetscErrorCode FN_AllocateWorkMat(FN fn,Mat A,Mat *M)
57: {
58: PetscInt n,na;
59: PetscBool create=PETSC_FALSE;
61: *M = NULL;
63: if (fn->nw<=fn->cw) {
64: create=PETSC_TRUE;
65: fn->nw++;
66: } else {
67: MatGetSize(fn->W[fn->cw],&n,NULL);
68: MatGetSize(A,&na,NULL);
69: if (n!=na) {
70: MatDestroy(&fn->W[fn->cw]);
71: create=PETSC_TRUE;
72: }
73: }
74: if (create) {
75: MatDuplicate(A,MAT_COPY_VALUES,&fn->W[fn->cw]);
76: PetscLogObjectParent((PetscObject)fn,(PetscObject)fn->W[fn->cw]);
77: } else MatCopy(A,fn->W[fn->cw],SAME_NONZERO_PATTERN);
78: *M = fn->W[fn->cw];
79: fn->cw++;
80: PetscFunctionReturn(0);
81: }
83: /*
84: FN_FreeWorkMat - Release a work matrix created with FN_AllocateWorkMat().
85: */
86: static inline PetscErrorCode FN_FreeWorkMat(FN fn,Mat *M)
87: {
89: fn->cw--;
91: *M = NULL;
92: PetscFunctionReturn(0);
93: }
95: SLEPC_INTERN PetscErrorCode FNSqrtmSchur(FN,PetscBLASInt,PetscScalar*,PetscBLASInt,PetscBool);
96: SLEPC_INTERN PetscErrorCode FNSqrtmDenmanBeavers(FN,PetscBLASInt,PetscScalar*,PetscBLASInt,PetscBool);
97: SLEPC_INTERN PetscErrorCode FNSqrtmNewtonSchulz(FN,PetscBLASInt,PetscScalar*,PetscBLASInt,PetscBool);
98: SLEPC_INTERN PetscErrorCode FNSqrtmSadeghi(FN,PetscBLASInt,PetscScalar*,PetscBLASInt);
99: SLEPC_INTERN PetscErrorCode SlepcNormAm(PetscBLASInt,PetscScalar*,PetscInt,PetscScalar*,PetscRandom,PetscReal*);
100: SLEPC_INTERN PetscErrorCode FNEvaluateFunctionMat_Private(FN,Mat,Mat,PetscBool);
101: SLEPC_INTERN PetscErrorCode FNEvaluateFunctionMatVec_Private(FN,Mat,Vec,PetscBool);
102: SLEPC_INTERN PetscErrorCode FNEvaluateFunctionMat_Exp_Higham(FN,Mat,Mat); /* used in FNPHI */
103: #if defined(PETSC_HAVE_CUDA)
104: SLEPC_INTERN PetscErrorCode FNSqrtmDenmanBeavers_CUDAm(FN,PetscBLASInt,PetscScalar*,PetscBLASInt,PetscBool);
105: SLEPC_INTERN PetscErrorCode FNSqrtmNewtonSchulz_CUDA(FN,PetscBLASInt,PetscScalar*,PetscBLASInt,PetscBool);
106: SLEPC_INTERN PetscErrorCode FNSqrtmSadeghi_CUDAm(FN,PetscBLASInt,PetscScalar*,PetscBLASInt);
107: #endif
109: #endif