42 int ChebSI(
int nev,
double *intv,
int maxit,
43 double tol,
double *vinit,
polparams *pol,
int *nevo,
44 double **lamo,
double **Yo,
double **reso, FILE *fstats) {
46 double tm, tall=0.0, tmv=0.0;
48 double tolP = tol*0.01;
59 double done=1.0,dmone=-1.0,dzero=0.0;
60 if (check_intv(intv, fstats) < 0) {
74 double gamB=pol->
gam, bar=pol->
bar;
75 fprintf(fstats,
"Cheb Poly- deg = %d, gam = %.15e, bar: %.15e\n",
78 if (gamB > 1.0 || gamB < -1.0) {
79 fprintf(stdout,
"gamB error %.15e\n", gamB);
83 fprintf(fstats,
"Step 2: ChebSI, block size = %d\n", nev);
87 double *T, *evecT, *evalT;
89 Malloc(evecT, nev2,
double);
90 Malloc(evalT, nev,
double);
97 double *V, *Lam, *PV, *R, *res;
98 double *V_out, *Lam_out, *res_out;
106 int nlock, nact, nlock_ab;
107 int *act_idx, *lock_idx;
108 Malloc(act_idx, nev,
int);
109 Malloc(lock_idx, nev,
int);
117 Malloc(work, work_size,
double);
118 Malloc(buf, nnev,
double);
121 orth(vinit,n,nev,V,work);
124 for (icol = 0; icol<nev; icol++) {
125 ChebAv(pol, V+icol*n, PV+icol*n, work);
132 while ( (it < maxit) && (find_more) ) {
135 int nnlock = n*nlock;
137 DCOPY(&nnact, PV+nnlock, &one, V+nnlock, &one);
140 DGEMM(&cT, &cN, &nlock, &nact, &n, &done, V, &n, V+nnlock, &n, &dzero, T, &nev);
141 DGEMM(&cN, &cN, &n, &nact, &nlock, &dmone, V, &n, T, &nev, &done, V+nnlock, &n);
144 orth(V+nnlock, n, nact, buf, work);
145 DCOPY(&nnact, buf, &one, V+nnlock, &one);
149 for (icol = nlock; icol<nlock+nact; icol++)
150 ChebAv(pol, V+icol*n, PV+icol*n, work);
155 DGEMM(&cT, &cN, &nlock, &nact, &n, &done, V, &n, PV+nnlock, &n, &dzero, T, &nev);
156 DGEMM(&cN, &cN, &n, &nact, &nlock, &dmone, V, &n, T, &nev, &done, PV+nnlock, &n);
160 if ( ((it+1)%cvcheck == 0) || ((it+1)==maxit) ) {
163 DGEMM(&cT,&cN,&nact,&nact,&n,&done,V+nnlock,&n,PV+nnlock,&n,&dzero,T,&nev);
166 DGEMM(&cN,&cN,&n,&nact,&nact,&done,V+nnlock,&n,evecT,&nev,&dzero,buf,&n);
167 DCOPY(&nnact, buf, &one, V+nnlock, &one);
168 DGEMM(&cN,&cN,&n,&nact,&nact,&done,PV+nnlock,&n,evecT,&nev,&dzero,buf,&n);
169 DCOPY(&nnact, buf, &one, PV+nnlock, &one);
171 for (i=nlock; i<nev; i++) {
172 double t = -evalT[i];
174 for (j=0; j<n; j++) {
175 R[i*n+j] = PV[i*n+j]+t*V[i*n+j];
180 int nlock_ab_new = 0;
182 for (i=nlock; i<nev; i++) {
184 double resP = sqrt(
DDOT(&n, R+i*n, &one, R+i*n, &one));
188 matvec_A(V+i*n, buf);
191 double rq =
DDOT(&n, V+i*n, &one, buf, &one);
192 for (j=0; j < n; j++) {
193 R[i*n+j] = buf[j] - rq*V[i*n+j];
195 double resA = sqrt(
DDOT(&n, R+i*n, &one, R+i*n, &one));
197 lock_idx[nlock_new] = i;
198 res[nlock+nlock_new] = resA;
199 Lam[nlock+nlock_new] = rq;
202 if ( rq >= aa - DBL_EPSILON && rq <= bb + DBL_EPSILON ) {
216 for (i = 0; i<nlock_new; i++) {
217 DCOPY(&n, V+lock_idx[i]*n, &one, buf+nnlock+i*n, &one);
218 DCOPY(&n, PV+lock_idx[i]*n, &one, R+nnlock+i*n, &one);
221 for (i = 0; i<nact; i++) {
222 DCOPY(&n, V+act_idx[i]*n, &one, buf+nnlock+(nlock_new+i)*n, &one);
223 DCOPY(&n, PV+act_idx[i]*n, &one, R+nnlock+(nlock_new+i)*n, &one);
225 DCOPY(&nnact, buf+nnlock, &one, V+nnlock, &one);
226 DCOPY(&nnact, R+nnlock, &one, PV+nnlock, &one);
229 nlock_ab += nlock_ab_new;
231 fprintf(fstats,
"it %4d: nMV %7d, nlock %3d, nlock_ab %3d\n",
232 it+1, nmv, nlock, nlock_ab);
237 if ( ((nlock-nlock_ab)>=
NBUF) || (nlock == nev) ) {
239 fprintf(fstats,
"-------------------------------------\n");
240 fprintf(fstats,
" No ev.s left to be computed\n");
241 fprintf(fstats,
" Number of evals found = %d\n", nlock_ab);
242 fprintf(fstats,
"-------------------------------------------------------------------\n");
249 Malloc(Lam_out, nev,
double);
250 Malloc(res_out, nev,
double);
251 Malloc(V_out, nnev,
double);
253 for (i=0; i<nlock;i++) {
255 if ( t >= aa - DBL_EPSILON && t <= bb + DBL_EPSILON ) {
257 res_out[idx] = res[i];
258 DCOPY(&n, V+i*n, &one, V_out+idx*n, &one);
284 fprintf(fstats,
"------This slice consumed: \n");
285 fprintf(fstats,
"Matvecs : %d\n", nmv);
286 fprintf(fstats,
"total time : %.2f\n", tall);
287 fprintf(fstats,
"matvec time : %.2f\n", tmv);
288 fprintf(fstats,
"======================================================\n");
void SymEigenSolver(int n, double *A, int lda, double *Q, int ldq, double *lam)
interface to LAPACK SYMMETRIC EIGEN-SOLVER
void DGEMM(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)
double DDOT(int *n, double *x, int *incx, double *y, int *incy)
int ChebSI(int nev, double *intv, int maxit, double tol, double *vinit, polparams *pol, int *nevo, double **lamo, double **Yo, double **reso, FILE *fstats)
Chebyshev polynomial filtering Subspace Iteration.
This file contains function prototypes and constant definitions internally used in EVSL...
void orth(double *V, int n, int k, double *Vo, double *work)
Orthogonalize columns of n-by-k matrix V.
#define Malloc(base, nmem, type)
Defs for blaslapack routines.
int ChebAv(polparams *pol, double *v, double *y, double *w)
Computes y=P(A) v, where pn is a Cheb. polynomial expansion
evslData evsldata
global variable of EVSL
parameters for polynomial filter
double evsl_timer()
evsl timer for mac
void DCOPY(int *n, double *dx, int *incx, double *dy, int *incy)