18 #define FILTER_VINIT 1
62 int ChebLanTr(
int lanm,
int nev,
double *intv,
int maxit,
63 double tol,
double *vinit,
polparams *pol,
int *nev2,
64 double **vals,
double **W,
double **resW, FILE *fstats) {
82 size_t lanm1_l = lanm1;
85 maxit =
min(maxit, n);
97 double done=1.0, dmone=-1.0, dzero=0.0;
99 int Ntest =
min(lanm, nev+50);
102 int i, ll, jl, last_jl;
106 if (check_intv(intv, fstats) < 0) {
108 *vals = NULL; *W = NULL; *resW = NULL;
117 double gamB=pol->
gam, bar=pol->
bar;
119 if (gamB > 1.0 || gamB < -1.0) {
121 fprintf(fstats,
"gamB error %.15e\n", gamB);
129 fprintf(fstats,
" Cheb-LanTR, dim %d cycle %d \n",lanm, cycle);
134 int min_inner_step = 5;
139 Malloc(V, n_l*lanm1,
double);
143 Malloc(Z, n_l*lanm1,
double);
148 Calloc(T, lanm1_l*lanm1_l,
double);
151 double *Y, *Lam, *res;
152 Malloc(Y, n_l*nev,
double);
158 Malloc(BY, n_l*nev,
double);
163 int trlen = 0, prtrlen=-1;
165 double *Rval, *Rvec, *resi, *BRvec = NULL;
166 Malloc(Rval, lanm,
double);
167 Malloc(resi, lanm,
double);
168 Malloc(Rvec, n_l*lanm,
double);
170 Malloc(BRvec, n_l*lanm,
double);
174 Malloc(EvecT, lanm1_l*lanm1_l,
double);
179 double *work, *vrand = NULL;
180 size_t work_size = ifGenEv ? 4*n_l : 3*n_l;
181 Malloc(work, work_size,
double);
184 ChebAv(pol, vinit, V, work);
188 DCOPY(&n, vinit, &one, V, &one);
195 t = 1.0 / sqrt(
DDOT(&n, V, &one, Z, &one));
197 DSCAL(&n, &t, Z, &one);
200 t = 1.0 /
DNRM2(&n, V, &one);
203 DSCAL(&n, &t, V, &one);
210 double beta = 0.0, r, res0;
212 int k = trlen > 0 ? trlen + 1 : 0;
215 if (k+min_inner_step > lanm1) {
217 fprintf(fstats,
"Krylov dim too small for this problem. Try a larger dim\n");
225 double *v = &V[k1*n_l];
226 double *z = &Z[k1*n_l];
228 double *vnew = v + n;
229 double *znew = z + n;
232 ChebAv(pol, z, znew, work);
245 for (i=0; i<trlen; i++) {
246 T[i*lanm1_l+i] = Rval[i];
250 s[k1] =
DDOT(&n, v, &one, znew, &one);
252 DGEMV(&cN, &n, &k, &dmone, Z, &n, s, &one, &done, znew, &one);
255 for (i=0; i<k1; i++) {
256 T[trlen*lanm1_l+i] = s[i];
257 T[i*lanm1_l+trlen] = s[i];
258 wn += 2.0 * fabs(s[i]);
260 T[trlen*lanm1_l+trlen] = s[k1];
266 beta = sqrt(
DDOT(&n, vnew, &one, znew, &one));
269 beta =
DNRM2(&n, vnew, &one);
276 fprintf(fstats,
"it %4d: Lucky breakdown, beta = %.15e\n", k, beta);
281 ChebAv(pol, vrand, vnew, work);
290 matvec_B(vnew, znew);
291 beta = sqrt(
DDOT(&n, vnew, &one, znew, &one));
292 double ibeta = 1.0 / beta;
293 DSCAL(&n, &ibeta, vnew, &one);
294 DSCAL(&n, &ibeta, znew, &one);
302 double ibeta = 1.0 / beta;
303 DSCAL(&n, &ibeta, vnew, &one);
308 double ibeta = 1.0 / beta;
309 DSCAL(&n, &ibeta, vnew, &one);
311 DSCAL(&n, &ibeta, znew, &one);
315 T[k1*lanm1_l+k] = beta;
316 T[k*lanm1_l+k1] = beta;
320 Ntest =
max(20,nev-lock+10);
321 last_jl = 0; last_tr = 0.0;
325 double *zold = k > 0 ? Z+(k-1)*n_l : NULL;
329 while (k < lanm && it < maxit) {
332 double *v = &V[(k-1)*n_l];
333 double *z = &Z[(k-1)*n_l];
335 double *vnew = v + n;
336 double *znew = z + n;
339 ChebAv(pol, z, znew, work);
353 double nbeta = -beta;
354 DAXPY(&n, &nbeta, zold, &one, znew, &one);
357 double alpha =
DDOT(&n, v, &one, znew, &one);
359 T[(k-1)*lanm1_l+(k-1)] = alpha;
362 double nalpha = -alpha;
363 DAXPY(&n, &nalpha, z, &one, znew, &one);
371 beta = sqrt(
DDOT(&n, vnew, &one, znew, &one));
384 fprintf(fstats,
"it %4d: Lucky breakdown, beta = %.15e\n", it, beta);
389 ChebAv(pol, vrand, vnew, work);
399 matvec_B(vnew, znew);
400 beta = sqrt(
DDOT(&n, vnew, &one, znew, &one));
401 double ibeta = 1.0 / beta;
402 DSCAL(&n, &ibeta, vnew, &one);
403 DSCAL(&n, &ibeta, znew, &one);
411 double ibeta = 1.0 / beta;
412 DSCAL(&n, &ibeta, vnew, &one);
417 double ibeta = 1.0 / beta;
418 DSCAL(&n, &ibeta, vnew, &one);
421 DSCAL(&n, &ibeta, znew, &one);
425 T[k*lanm1_l+(k-1)] = beta;
426 T[(k-1)*lanm1_l+k] = beta;
428 int k1 = k-trlen-Ntest;
429 if ( ((k1>=0) && (k1 % cycle == 0)) || (k == lanm) || it == maxit) {
435 if (k == lanm || it == maxit) {
443 for (i=0; i<k; i++) {
444 if (Rval[i] + DBL_EPSILON >= bar) {
449 if (fabs(tr-last_tr) <= 2e-12 && jl == last_jl) {
451 fprintf(fstats,
"break: [it %d, k %d]: last_tr %.15e, tr %.15e, last_jl %d jl %d\n",
452 it, k, last_tr, tr, last_jl, jl);
457 fprintf(fstats,
"[it %d, k %d] testing: last_tr %.15e, tr %.15e, last_jl %d jl %d\n",
458 it, k, last_tr, tr, last_jl, jl);
468 for (i=0; i<k; i++) {
473 r = fabs(beta*EvecT[i*lanm1+(k-1)]);
482 fprintf(fstats,
"inner: testing conv k %4d, it %4d, count %3d jl %3d trlen %3d\n",
483 k, it, count, jl, trlen);
487 if (count*evFrac >= nev-lock) {
488 fprintf(fstats,
"count * evFrac >= nev-lock: %d * %d >= %d - %d\n", count, evFrac, nev, lock);
492 if ((count<=last_count) && (jl<=last_jl)) {
493 fprintf(fstats,
"inner: count <= last_count && jl <= last_jl: %d <= %d && %d <= %d\n", count, last_count, jl, last_jl);
511 for (i=0; i<k; i++) {
514 if (Rval[i] >= bar) {
517 DCOPY(&k, EvecT+i*lanm1_l, &one, EvecT+jl*lanm1_l, &one);
521 resi[jl] = fabs(beta*EvecT[i*lanm1_l+(k-1)]);
530 DGEMM(&cN, &cN, &n, &jl, &k, &done, V, &n, EvecT, &lanm1, &dzero, Rvec, &n);
532 DGEMM(&cN, &cN, &n, &jl, &k, &done, Z, &n, EvecT, &lanm1, &dzero, BRvec, &n);
540 for (i=0; i<jl; i++) {
541 double *y = Rvec + i*n_l;
552 t = sqrt(
DDOT(&n, y, &one, w2, &one));
555 t =
DNRM2(&n, y, &one);
563 DSCAL(&n, &t, y, &one);
566 DSCAL(&n, &t, w2, &one);
573 double t3 =
DDOT(&n, y, &one, w, &one);
575 if (t3 >= aa - DBL_EPSILON && t3 <= bb + DBL_EPSILON) {
581 DAXPY(&n, &nt3, w2, &one, w, &one);
584 DAXPY(&n, &nt3, y, &one, w, &one);
587 res0 =
DNRM2(&n, w, &one);
594 nev += 1 + (int) (nev*nevInc);
596 fprintf(fstats,
"-- More eigval found: realloc space for %d evs\n", nev);
606 DCOPY(&n, y, &one, Y+lock*n_l, &one);
608 DCOPY(&n, By, &one, BY+lock*n_l, &one);
615 Rval[trlen] = Rval[i];
616 DCOPY(&n, y, &one, V+trlen*n_l, &one);
618 DCOPY(&n, By, &one, Z+trlen*n_l, &one);
622 s[trlen] = beta * EvecT[i*lanm1_l+(k-1)];
637 fprintf(fstats,
"it %4d: k %3d, jl %3d, ll %3d, lock %3d, trlen %3d\n",
638 it, k, jl, ll, lock, trlen);
641 if ((prtrlen == 0) && (ll==0)) {
644 fprintf(fstats,
"--------------------------------------------------\n");
645 fprintf(fstats,
" --> No ev.s left to be computed\n");
651 fprintf(fstats,
"--------------------------------------------------\n");
652 fprintf(fstats,
" --> Max its reached without convergence\n");
657 memset(T, 0, lanm1_l*lanm1_l*
sizeof(
double));
659 DCOPY(&n, V+k*n_l, &one, V+trlen*n_l, &one);
661 DCOPY(&n, Z+k*n_l, &one, Z+trlen*n_l, &one);
666 fprintf(fstats,
" Number of evals found = %d\n", lock);
667 fprintf(fstats,
"--------------------------------------------------\n");
void SymEigenSolver(int n, double *A, int lda, double *Q, int ldq, double *lam)
interface to LAPACK SYMMETRIC EIGEN-SOLVER
int ChebLanTr(int lanm, int nev, double *intv, int maxit, double tol, double *vinit, polparams *pol, int *nev2, double **vals, double **W, double **resW, FILE *fstats)
Chebyshev polynomial filtering Lanczos process [Thick restart version].
void DGEMM(char *transa, char *transb, int *m, int *n, int *k, double *alpha, double *a, int *lda, double *b, int *ldb, double *beta, double *c, int *ldc)
double DDOT(int *n, double *x, int *incx, double *y, int *incy)
void rand_double(int n, double *v)
void CGS_DGKS(int n, int k, int i_max, double *Q, double *v, double *nrmv, double *w)
Classical GS reortho with Daniel, Gragg, Kaufman, Stewart test.
void DSCAL(int *n, double *a, double *x, int *incx)
#define Calloc(base, nmem, type)
This file contains function prototypes and constant definitions internally used in EVSL...
#define Malloc(base, nmem, type)
void DGEMV(char *trans, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy)
evslStat evslstat
global statistics of EVSL
void CGS_DGKS2(int n, int k, int i_max, double *Z, double *Q, double *v, double *w)
Classical GS reortho. No test. just do i_max times used in generalized ev problems.
void DAXPY(int *n, double *alpha, double *x, int *incx, double *y, int *incy)
double DNRM2(int *n, double *x, int *incx)
Defs for blaslapack routines.
int ChebAv(polparams *pol, double *v, double *y, double *w)
Computes y=P(A) v, where pn is a Cheb. polynomial expansion
evslData evsldata
global variable of EVSL
parameters for polynomial filter
#define Realloc(base, nmem, type)
double evsl_timer()
evsl timer for mac
void DCOPY(int *n, double *dx, int *incx, double *dy, int *incy)