EVSL  1.1.0 EigenValues Slicing Library
cheblanNr.c
Go to the documentation of this file.
1 #include <stdlib.h>
2 #include <stdio.h>
3 #include <math.h>
4 #include <string.h>
5 #include <float.h>
6 #include "def.h"
7 #include "blaslapack.h"
8 #include "struct.h"
9 #include "internal_proto.h"
18 #define FILTER_VINIT 1
19
20 /* -----------------------------------------------------------------------
21  * @brief Chebyshev polynomial filtering Lanczos process [NON-restarted version]
22  *
23  * @param[in] intv An array of length 4 \n
24  * [intv[0], intv[1]] is the interval of desired eigenvalues \n
25  * [intv[2], intv[3]] is the global interval of all eigenvalues \n
26  * Must contain all eigenvalues of A
27  *
28  * @param[in] maxit Max number of outer Lanczos steps allowed --[max dim of Krylov
29  * subspace]
30  *
31  * @param[in] tol
32  * Tolerance for convergence. The code uses a stopping criterion based
33  * on the convergence of the restricted trace. i.e., the sum of the
34  * eigenvalues of T_k that are in the desired interval. This test is
35  * rather simple since these eigenvalues are between `bar' and 1.0.
36  * We want the relative error on this restricted trace to be less
37  * than tol. Note that the test performed on filtered matrix only
38  * - *but* the actual residual norm associated with the original
39  * matrix A is returned
40  *
41  * @param[in] vinit initial vector for Lanczos -- [optional]
42  *
43  * @param[in] pol A struct containing the parameters of the polynomial..
44  * This is set up by a call to find_deg prior to calling chenlanNr
45  *
46  * @b Modifies:
47  * @param[out] nevOut Number of eigenvalues/vectors computed
48  * @param[out] Wo A set of eigenvectors [n x nevOut matrix]
49  * @param[out] reso Associated residual norms [nev x 1 vector]
50  * @param[out] lamo Lambda computed
51  * @param[out] fstats File stream which stats are printed to
52  *
53  * @return Returns 0 on success (or if check_intv() is non-positive), -1
54  * if |gamB| < 1
55  *
56  *
57  * @warning memory allocation for Wo/lamo/reso within this function
58  *
59  * ------------------------------------------------------------ */
60 int ChebLanNr(double *intv, int maxit, double tol, double *vinit,
61  polparams *pol, int *nevOut, double **lamo, double **Wo,
62  double **reso, FILE *fstats) {
63  //-------------------- to report timings/
64  double tall, tm1 = 0.0, tt;
65  tall = evsl_timer();
66  const int ifGenEv = evsldata.ifGenEv;
67  double tr0, tr1;
68  double *y, flami;
69  int i, k, kdim;
70  /* handle case where fstats is NULL. Then no output. Needed for openMP */
71  int do_print = 1;
72  if (fstats == NULL) {
73  do_print = 0;
74  }
75  /*-------------------- frequently used constants */
76  char cN = 'N';
77  int one = 1;
78  double done = 1.0, dzero = 0.0;
79  /*-------------------- Ntest = when to start testing convergence */
80  int Ntest = 30;
81  /*-------------------- how often to test */
82  int cycle = 20;
83  /* size of the matrix */
84  int n = evsldata.n;
85  /* max num of its */
86  maxit = min(n, maxit);
87  /*-------------------- Caveat !!!: To prevent integer overflow, we save
88  * another size_t version of n
89  * Note n itself may not overflow but something like
90  * (maxit * n) is much more likely
91  * When using size_t n, in all the multiplications
92  * integer is promoted to size_t */
93  size_t n_l = n;
94  /*-------------------- polynomial filter approximates the delta
95  function centered at 'gamB'.
96  bar: a bar value to threshold Ritz values of p(A) */
97  double bar = pol->bar;
98  double gamB = pol->gam;
99  /*-------------------- interval [aa, bb], used for testing only at end */
100  if (check_intv(intv, fstats) < 0) {
101  *nevOut = 0;
102  *lamo = NULL; *Wo = NULL; *reso = NULL;
103  return 0;
104  }
105  double aa = intv[0];
106  double bb = intv[1];
107  int deg = pol->deg;
108  if (do_print) {
109  fprintf(fstats, " ** Cheb Poly of deg = %d, gam = %.15e, bar: %.15e\n",
110  deg, gamB, bar);
111  }
112  /*-------------------- gamB must be within [-1, 1] */
113  if (gamB > 1.0 || gamB < -1.0) {
114  fprintf(stdout, "gamB error %.15e\n", gamB);
115  return -1;
116  }
117  /*-----------------------------------------------------------------------*
118  * *Non-restarted* Lanczos iteration
119  *-----------------------------------------------------------------------*/
120  if (do_print) {
121  fprintf(fstats, " ** Cheb-LanNr \n");
122  }
123  /*-------------------- Lanczos vectors V_m and tridiagonal matrix T_m */
124  double *V, *dT, *eT, *Z;
125  Malloc(V, n_l*(maxit+1), double);
126  if (ifGenEv) {
127  /* storage for Z = B * V */
128  Malloc(Z, n_l*(maxit+1), double);
129  } else {
130  /* Z and V are the same */
131  Z = V;
132  }
133  /*-------------------- diag. subdiag of Tridiagional matrix */
134  Malloc(dT, maxit, double);
135  Malloc(eT, maxit, double);
136  double *Rvec, *Lam, *res, *EvalT, *EvecT;
137  /*-------------------- Lam, Rvec: the converged (locked) Ritz values vecs*/
138  Malloc(Lam, maxit, double); // holds computed Ritz values
139  Malloc(res, maxit, double); // residual norms (w.r.t. ro(A))
140  Malloc(EvalT, maxit, double); // eigenvalues of tridiag matrix T
141  /*-------------------- nev = current number of converged e-pairs
142  nconv = converged eigenpairs from looking at Tk alone */
143  int nev, nconv = 0;
144  /*-------------------- u is just a pointer. wk == work space */
145  double *u, *wk, *w2, *vrand = NULL;
146  size_t wk_size = ifGenEv ? 4*n_l : 3*n_l;
147  Malloc(wk, wk_size, double);
148  w2 = wk + n;
149  /*-------------------- copy initial vector to Z(:,1) */
150 #if FILTER_VINIT
151  /*-------------------- compute w = p[(A-cc)/dd] * v */
152  /*-------------------- Filter the initial vector */
153  ChebAv(pol, vinit, V, wk);
154  Malloc(vrand, n, double);
155 #else
156  /*-------------------- copy initial vector to V(:,1) */
157  DCOPY(&n, vinit, &one, V, &one);
158 #endif
159  /*-------------------- normalize it */
160  double t, nt, res0;
161  if (ifGenEv) {
162  /* B norm */
163  matvec_B(V, Z);
164  t = 1.0 / sqrt(DDOT(&n, V, &one, Z, &one));
165  DSCAL(&n, &t, Z, &one);
166  } else {
167  /* 2-norm */
168  t = 1.0 / DNRM2(&n, V, &one); // add a test here.
169  }
170  /* unit B-norm or 2-norm */
171  DSCAL(&n, &t, V, &one);
172  /*-------------------- for ortho test */
173  double wn = 0.0;
174  int nwn = 0;
175  /*-------------------- for stopping test [restricted trace]*/
176  tr0 = 0;
177  /*-------------------- lanczos vectors updated by rotating pointer*/
178  /*-------------------- pointers to Lanczos vectors */
179  double *zold, *z, *znew;
180  double *v, *vnew;
181  /*-------------------- Lanczos recurrence coefficients */
182  double alpha, nalpha, beta=0.0, nbeta;
183  int count = 0;
184  // ---------------- main Lanczos loop
185  for (k=0; k<maxit; k++) {
186  /*-------------------- quick reference to Z(:,k-1) when k>0 */
187  zold = k > 0 ? Z+(k-1)*n_l : NULL;
188  /*-------------------- a quick reference to V(:,k) */
189  v = &V[k*n_l];
190  /*-------------------- a quick reference to Z(:,k) */
191  z = &Z[k*n_l];
192  /*-------------------- next Lanczos vector V(:,k+1)*/
193  vnew = v + n;
194  /*-------------------- next Lanczos vector Z(:,k+1)*/
195  znew = z + n;
196  /*-------------------- compute w = p[(A-cc)/dd] * v */
197  /*-------------------- NOTE: z is used!!! [TODO: FIX ME] */
198  ChebAv(pol, z, znew, wk);
199  /*------------------ znew = znew - beta*zold */
200  if (zold) {
201  nbeta = -beta;
202  DAXPY(&n, &nbeta, zold, &one, znew, &one);
203  }
204  /*-------------------- alpha = znew'*v */
205  alpha = DDOT(&n, v, &one, znew, &one);
206  /*-------------------- T(k,k) = alpha */
207  dT[k] = alpha;
208  wn += fabs(alpha);
209  /*-------------------- znew = znew - alpha*z */
210  nalpha = -alpha;
211  DAXPY(&n, &nalpha, z, &one, znew, &one);
212  /*-------------------- FULL reortho to all previous Lan vectors */
213  if (ifGenEv) {
214  /* znew = znew - Z(:,1:k)*V(:,1:k)'*znew */
215  CGS_DGKS2(n, k+1, NGS_MAX, Z, V, znew, wk);
216  /* vnew = B \ znew */
217  solve_B(znew, vnew);
218  /*-------------------- beta = (vnew, znew)^{1/2} */
219  beta = sqrt(DDOT(&n, vnew, &one, znew, &one));
220  } else {
221  /* vnew = vnew - V(:,1:k)*V(:,1:k)'*vnew */
222  /* beta = norm(vnew) */
223  CGS_DGKS(n, k+1, NGS_MAX, V, vnew, &beta, wk);
224  }
225  wn += 2.0 * beta;
226  nwn += 3;
227  /*-------------------- lucky breakdown test */
228  if (beta*nwn < orthTol*wn) {
229  if (do_print) {
230  fprintf(fstats, "it %4d: Lucky breakdown, beta = %.15e\n", k, beta);
231  }
232 #if FILTER_VINIT
233  /*------------------ generate a new init vector */
234  rand_double(n, vrand);
235  /*------------------ Filter the initial vector*/
236  ChebAv(pol, vrand, vnew, wk);
237 #else
238  rand_double(n, vnew);
239 #endif
240  if (ifGenEv) {
241  /* vnew = vnew - V(:,1:k)*Z(:,1:k)'*vnew */
242  CGS_DGKS2(n, k+1, NGS_MAX, V, Z, vnew, wk);
243  matvec_B(vnew, znew);
244  beta = sqrt(DDOT(&n, vnew, &one, znew, &one));
245  /*-------------------- vnew = vnew / beta */
246  t = 1.0 / beta;
247  DSCAL(&n, &t, vnew, &one);
248  /*-------------------- znew = znew / beta */
249  DSCAL(&n, &t, znew, &one);
250  beta = 0.0;
251  } else {
252  /* vnew = vnew - V(:,1:k)*V(:,1:k)'*vnew */
253  /* beta = norm(vnew) */
254  CGS_DGKS(n, k+1, NGS_MAX, V, vnew, &beta, wk);
255  /*-------------------- vnew = vnew / beta */
256  t = 1.0 / beta;
257  DSCAL(&n, &t, vnew, &one);
258  beta = 0.0;
259  }
260  } else {
261  /*-------------------- vnew = vnew / beta */
262  t = 1.0 / beta;
263  DSCAL(&n, &t, vnew, &one);
264  if (ifGenEv) {
265  /*-------------------- znew = znew / beta */
266  DSCAL(&n, &t, znew, &one);
267  }
268  }
269  /*-------------------- T(k+1,k) = beta */
270  eT[k] = beta;
271 #if 0
272  /*-------------------- Reallocate memory if maxit is smaller than # of eigs */
273  if (k == maxit-1) {
274  maxit = 1 + (int) (maxit * 1.5);
275  Realloc(V, (maxit+1)*n_l, double);
276  if (ifGenEv) {
277  Realloc(Z, (maxit+1)*n_l, double);
278  } else {
279  /* make sure Z == V since V may be changed in the Realloc above */
280  Z = V;
281  }
282  Realloc(dT, maxit, double);
283  Realloc(eT, maxit, double);
284  Realloc(Lam, maxit, double);
285  Realloc(res, maxit, double);
286  Realloc(EvalT, maxit, double);
287  }
288 #endif
289  /*---------------------- test for Ritz vectors */
290  if ( (k < Ntest || (k-Ntest) % cycle != 0) && k != maxit-1 ) {
291  continue;
292  }
293  /*---------------------- diagonalize T(1:k,1:k)
294  vals in EvalT, vecs in EvecT */
295  kdim = k+1;
296 #if 1
297  /*-------------------- THIS uses dsetv, do not need eig vector */
298  SymmTridEig(EvalT, NULL, kdim, dT, eT);
299  count = kdim;
300 #else
301  /*-------------------- THIS uses dstemr */
302  double vl = bar - DBL_EPSILON, vu = 2.0; /* needed by SymmTridEigS */
303  SymmTridEigS(EvalT, EvecT, kdim, vl, vu, &count, dT, eT);
304 #endif
305  /*-------------------- restricted trace: used for convergence test */
306  tr1 = 0;
307  /*-------------------- get residual norms and check acceptance of Ritz
308  * values for p(A). nconv records number of
309  * eigenvalues whose residual for p(A) is smaller
310  * than tol. */
311  nconv = 0;
312  for (i=0; i<count; i++) {
313  flami = EvalT[i];
314  if (flami + DBL_EPSILON >= bar) {
315  tr1+= flami;
316  nconv++;
317  }
318  }
319
320  if (do_print) {
321  fprintf(fstats, "k %4d: nconv %4d tr1 %21.15e\n",
322  k, nconv,tr1);
323  }
324  /* -------------------- simple test because all eigenvalues
325  are between gamB and ~1. */
326  if ( (fabs(tr1-tr0) < 2e-12) || (fabs(tr1)+fabs(tr0) < 1e-10) ) {
327  break;
328  }
329  tr0 = tr1;
330  } /* end of the main loop */
331
332  if (k >= maxit) {
333  fprintf(fstats, "The max number of iterations [%d] has been reached. The eigenvalues computed may not have converged\n", maxit);
334  }
335
336  /*-------------------- compute eig vals and vector */
337  size_t kdim_l = kdim; /* just in case if kdim is > 65K */
338  Malloc(EvecT, kdim_l*kdim_l, double); // Eigen vectors of T
339  SymmTridEig(EvalT, EvecT, kdim, dT, eT);
340
341  tt = evsl_timer();
342  /*-------------------- done == compute Ritz vectors */
343  Malloc(Rvec, nconv*n_l, double); // holds computed Ritz vectors
344
345  nev = 0;
346  for (i=0; i<count; i++) {
347  flami = EvalT[i];
348  //-------------------- reject eigenvalue if rho(lam)<bar
349  if (flami < bar) {
350  continue;
351  }
352  y = &EvecT[i*kdim_l];
353  /*-------------------- make sure to normalize */
354  /*
355  t = DNRM2(&kdim, y, &one);
356  t = 1.0 / t;
357  DSCAL(&kdim, &t, y, &one);
358  */
359  /*-------------------- compute Ritz vectors */
360  u = &Rvec[nev*n_l];
361  DGEMV(&cN, &n, &kdim, &done, V, &n, y, &one, &dzero, u, &one);
362  /*-------------------- normalize u */
363  if (ifGenEv) {
364  /* B-norm, w2 = B*u */
365  matvec_B(u, w2);
366  t = sqrt(DDOT(&n, u, &one, w2, &one)); /* should be one */
367  } else {
368  /* 2-norm */
369  t = DNRM2(&n, u, &one); /* should be one */
370  }
371  /*-------------------- return code 2 --> zero eigenvector found */
372  if (t == 0.0) {
373  return 2;
374  }
375  /*-------------------- scal u */
376  t = 1.0 / t;
377  DSCAL(&n, &t, u, &one);
378  /*-------------------- scal B*u */
379  if (ifGenEv) {
380  /*------------------ w2 = B*u */
381  DSCAL(&n, &t, w2, &one);
382  }
383  /*-------------------- w = A*u */
384  matvec_A(u, wk);
385  /*-------------------- Ritz val: t = (u'*w)/(u'*u)
386  t = (u'*w)/(u'*B*u) */
387  t = DDOT(&n, wk, &one, u, &one);
388  /*-------------------- if lambda (==t) is in [a,b] */
389  if (t < aa - DBL_EPSILON || t > bb + DBL_EPSILON) {
390  continue;
391  }
392  /*-------------------- compute residual wrt A for this pair */
393  nt = -t;
394  if (ifGenEv) {
395  /*-------------------- w = w - t*B*u */
396  DAXPY(&n, &nt, w2, &one, wk, &one);
397  } else {
398  /*-------------------- w = w - t*u */
399  DAXPY(&n, &nt, u, &one, wk, &one);
400  }
401  /*-------------------- res0 = 2-norm(wk) */
402  res0 = DNRM2(&n, wk, &one);
403  /*-------------------- accept (t, y) */
404  if (res0 < tol) {
405  Lam[nev] = t;
406  res[nev] = res0;
407  nev++;
408  }
409  }
410  tm1 = evsl_timer() - tt;
411
412  /*-------------------- Done. output : */
413  *nevOut = nev;
414  *lamo = Lam;
415  *Wo = Rvec;
416  *reso = res;
417  /*-------------------- free arrays */
418  free(V);
419  free(dT);
420  free(eT);
421  free(EvalT);
422  free(EvecT);
423  free(wk);
424  if (vrand) {
425  free(vrand);
426  }
427  if (ifGenEv) {
428  free(Z);
429  }
430  /*-------------------- record stats */
431  tall = evsl_timer() - tall;
432
433  evslstat.t_iter = tall;
434  evslstat.t_ritz = tm1;
435
436  return 0;
437 }
438
int deg
Definition: struct.h:70
defs in EVSL
int ifGenEv
Definition: struct.h:166
#define orthTol
Definition: def.h:17
double DDOT(int *n, double *x, int *incx, double *y, int *incy)
void rand_double(int n, double *v)
Definition: vect.c:11
void CGS_DGKS(int n, int k, int i_max, double *Q, double *v, double *nrmv, double *w)
Classical GS reortho with Daniel, Gragg, Kaufman, Stewart test.
Definition: misc_la.c:195
double gam
Definition: struct.h:64
#define NGS_MAX
Definition: misc_la.c:260
int n
Definition: struct.h:165
void DSCAL(int *n, double *a, double *x, int *incx)
int SymmTridEig(double *eigVal, double *eigVec, int n, const double *diag, const double *sdiag)
compute all eigenvalues and eigenvectors of a symmetric tridiagonal matrix
Definition: misc_la.c:36
This file contains function prototypes and constant definitions internally used in EVSL...
#define min(a, b)
Definition: def.h:62
#define Malloc(base, nmem, type)
Definition: def.h:22
void DGEMV(char *trans, int *m, int *n, double *alpha, double *a, int *lda, double *x, int *incx, double *beta, double *y, int *incy)
evslStat evslstat
global statistics of EVSL
Definition: evsl.c:21
void CGS_DGKS2(int n, int k, int i_max, double *Z, double *Q, double *v, double *w)
Classical GS reortho. No test. just do i_max times used in generalized ev problems.
Definition: misc_la.c:235
double t_ritz
Definition: struct.h:210
structs used in evsl
void DAXPY(int *n, double *alpha, double *x, int *incx, double *y, int *incy)
int ChebLanNr(double *intv, int maxit, double tol, double *vinit, polparams *pol, int *nevOut, double **lamo, double **Wo, double **reso, FILE *fstats)
Definition: cheblanNr.c:60
double DNRM2(int *n, double *x, int *incx)
Defs for blaslapack routines.
int SymmTridEigS(double *eigVal, double *eigVec, int n, double vl, double vu, int *nevO, const double *diag, const double *sdiag)
compute eigenvalues and eigenvectors of a symmetric tridiagonal matrix in a slice ...
Definition: misc_la.c:93
int ChebAv(polparams *pol, double *v, double *y, double *w)
Computes y=P(A) v, where pn is a Cheb. polynomial expansion
Definition: chebpoly.c:510
evslData evsldata
global variable of EVSL
Definition: evsl.c:15
parameters for polynomial filter
Definition: struct.h:45
#define Realloc(base, nmem, type)
Definition: def.h:42
double evsl_timer()
evsl timer for mac
Definition: mactime.c:14
double bar
Definition: struct.h:65
double t_iter
Definition: struct.h:200
void DCOPY(int *n, double *dx, int *incx, double *dy, int *incy)