ergo
template_lapack_lar1v.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LAR1V_HEADER
36 #define TEMPLATE_LAPACK_LAR1V_HEADER
37 
38 template<class Treal>
39 int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal
40  *lambda, Treal *d__, Treal *l, Treal *ld, Treal *
41  lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical
42  *wantnc, integer *negcnt, Treal *ztz, Treal *mingma,
43  integer *r__, integer *isuppz, Treal *nrminv, Treal *resid,
44  Treal *rqcorr, Treal *work)
45 {
46  /* System generated locals */
47  integer i__1;
48  Treal d__1, d__2, d__3;
49 
50 
51  /* Local variables */
52  integer i__;
53  Treal s;
54  integer r1, r2;
55  Treal eps, tmp;
56  integer neg1, neg2, indp, inds;
57  Treal dplus;
58  integer indlpl, indumn;
59  Treal dminus;
60  logical sawnan1, sawnan2;
61 
62 
63 /* -- LAPACK auxiliary routine (version 3.2) -- */
64 /* Univ. of Tennessee, Univ. of California Berkeley and NAG Ltd.. */
65 /* November 2006 */
66 
67 /* .. Scalar Arguments .. */
68 /* .. */
69 /* .. Array Arguments .. */
70 /* .. */
71 
72 /* Purpose */
73 /* ======= */
74 
75 /* DLAR1V computes the (scaled) r-th column of the inverse of */
76 /* the sumbmatrix in rows B1 through BN of the tridiagonal matrix */
77 /* L D L^T - sigma I. When sigma is close to an eigenvalue, the */
78 /* computed vector is an accurate eigenvector. Usually, r corresponds */
79 /* to the index where the eigenvector is largest in magnitude. */
80 /* The following steps accomplish this computation : */
81 /* (a) Stationary qd transform, L D L^T - sigma I = L(+) D(+) L(+)^T, */
82 /* (b) Progressive qd transform, L D L^T - sigma I = U(-) D(-) U(-)^T, */
83 /* (c) Computation of the diagonal elements of the inverse of */
84 /* L D L^T - sigma I by combining the above transforms, and choosing */
85 /* r as the index where the diagonal of the inverse is (one of the) */
86 /* largest in magnitude. */
87 /* (d) Computation of the (scaled) r-th column of the inverse using the */
88 /* twisted factorization obtained by combining the top part of the */
89 /* the stationary and the bottom part of the progressive transform. */
90 
91 /* Arguments */
92 /* ========= */
93 
94 /* N (input) INTEGER */
95 /* The order of the matrix L D L^T. */
96 
97 /* B1 (input) INTEGER */
98 /* First index of the submatrix of L D L^T. */
99 
100 /* BN (input) INTEGER */
101 /* Last index of the submatrix of L D L^T. */
102 
103 /* LAMBDA (input) DOUBLE PRECISION */
104 /* The shift. In order to compute an accurate eigenvector, */
105 /* LAMBDA should be a good approximation to an eigenvalue */
106 /* of L D L^T. */
107 
108 /* L (input) DOUBLE PRECISION array, dimension (N-1) */
109 /* The (n-1) subdiagonal elements of the unit bidiagonal matrix */
110 /* L, in elements 1 to N-1. */
111 
112 /* D (input) DOUBLE PRECISION array, dimension (N) */
113 /* The n diagonal elements of the diagonal matrix D. */
114 
115 /* LD (input) DOUBLE PRECISION array, dimension (N-1) */
116 /* The n-1 elements L(i)*D(i). */
117 
118 /* LLD (input) DOUBLE PRECISION array, dimension (N-1) */
119 /* The n-1 elements L(i)*L(i)*D(i). */
120 
121 /* PIVMIN (input) DOUBLE PRECISION */
122 /* The minimum pivot in the Sturm sequence. */
123 
124 /* GAPTOL (input) DOUBLE PRECISION */
125 /* Tolerance that indicates when eigenvector entries are negligible */
126 /* w.r.t. their contribution to the residual. */
127 
128 /* Z (input/output) DOUBLE PRECISION array, dimension (N) */
129 /* On input, all entries of Z must be set to 0. */
130 /* On output, Z contains the (scaled) r-th column of the */
131 /* inverse. The scaling is such that Z(R) equals 1. */
132 
133 /* WANTNC (input) LOGICAL */
134 /* Specifies whether NEGCNT has to be computed. */
135 
136 /* NEGCNT (output) INTEGER */
137 /* If WANTNC is .TRUE. then NEGCNT = the number of pivots < pivmin */
138 /* in the matrix factorization L D L^T, and NEGCNT = -1 otherwise. */
139 
140 /* ZTZ (output) DOUBLE PRECISION */
141 /* The square of the 2-norm of Z. */
142 
143 /* MINGMA (output) DOUBLE PRECISION */
144 /* The reciprocal of the largest (in magnitude) diagonal */
145 /* element of the inverse of L D L^T - sigma I. */
146 
147 /* R (input/output) INTEGER */
148 /* The twist index for the twisted factorization used to */
149 /* compute Z. */
150 /* On input, 0 <= R <= N. If R is input as 0, R is set to */
151 /* the index where (L D L^T - sigma I)^{-1} is largest */
152 /* in magnitude. If 1 <= R <= N, R is unchanged. */
153 /* On output, R contains the twist index used to compute Z. */
154 /* Ideally, R designates the position of the maximum entry in the */
155 /* eigenvector. */
156 
157 /* ISUPPZ (output) INTEGER array, dimension (2) */
158 /* The support of the vector in Z, i.e., the vector Z is */
159 /* nonzero only in elements ISUPPZ(1) through ISUPPZ( 2 ). */
160 
161 /* NRMINV (output) DOUBLE PRECISION */
162 /* NRMINV = 1/SQRT( ZTZ ) */
163 
164 /* RESID (output) DOUBLE PRECISION */
165 /* The residual of the FP vector. */
166 /* RESID = ABS( MINGMA )/SQRT( ZTZ ) */
167 
168 /* RQCORR (output) DOUBLE PRECISION */
169 /* The Rayleigh Quotient correction to LAMBDA. */
170 /* RQCORR = MINGMA*TMP */
171 
172 /* WORK (workspace) DOUBLE PRECISION array, dimension (4*N) */
173 
174 /* Further Details */
175 /* =============== */
176 
177 /* Based on contributions by */
178 /* Beresford Parlett, University of California, Berkeley, USA */
179 /* Jim Demmel, University of California, Berkeley, USA */
180 /* Inderjit Dhillon, University of Texas, Austin, USA */
181 /* Osni Marques, LBNL/NERSC, USA */
182 /* Christof Voemel, University of California, Berkeley, USA */
183 
184 /* ===================================================================== */
185 
186 /* .. Parameters .. */
187 /* .. */
188 /* .. Local Scalars .. */
189 /* .. */
190 /* .. External Functions .. */
191 /* .. */
192 /* .. Intrinsic Functions .. */
193 /* .. */
194 /* .. Executable Statements .. */
195 
196  /* Parameter adjustments */
197  --work;
198  --isuppz;
199  --z__;
200  --lld;
201  --ld;
202  --l;
203  --d__;
204 
205  /* Function Body */
206  eps = template_lapack_lamch("Precision", (Treal)0);
207  if (*r__ == 0) {
208  r1 = *b1;
209  r2 = *bn;
210  } else {
211  r1 = *r__;
212  r2 = *r__;
213  }
214 /* Storage for LPLUS */
215  indlpl = 0;
216 /* Storage for UMINUS */
217  indumn = *n;
218  inds = (*n << 1) + 1;
219  indp = *n * 3 + 1;
220  if (*b1 == 1) {
221  work[inds] = 0.;
222  } else {
223  work[inds + *b1 - 1] = lld[*b1 - 1];
224  }
225 
226 /* Compute the stationary transform (using the differential form) */
227 /* until the index R2. */
228 
229  sawnan1 = FALSE_;
230  neg1 = 0;
231  s = work[inds + *b1 - 1] - *lambda;
232  i__1 = r1 - 1;
233  for (i__ = *b1; i__ <= i__1; ++i__) {
234  dplus = d__[i__] + s;
235  work[indlpl + i__] = ld[i__] / dplus;
236  if (dplus < 0.) {
237  ++neg1;
238  }
239  work[inds + i__] = s * work[indlpl + i__] * l[i__];
240  s = work[inds + i__] - *lambda;
241 /* L50: */
242  }
243  sawnan1 = template_lapack_isnan(&s);
244  if (sawnan1) {
245  goto L60;
246  }
247  i__1 = r2 - 1;
248  for (i__ = r1; i__ <= i__1; ++i__) {
249  dplus = d__[i__] + s;
250  work[indlpl + i__] = ld[i__] / dplus;
251  work[inds + i__] = s * work[indlpl + i__] * l[i__];
252  s = work[inds + i__] - *lambda;
253 /* L51: */
254  }
255  sawnan1 = template_lapack_isnan(&s);
256 
257 L60:
258  if (sawnan1) {
259 /* Runs a slower version of the above loop if a NaN is detected */
260  neg1 = 0;
261  s = work[inds + *b1 - 1] - *lambda;
262  i__1 = r1 - 1;
263  for (i__ = *b1; i__ <= i__1; ++i__) {
264  dplus = d__[i__] + s;
265  if (absMACRO(dplus) < *pivmin) {
266  dplus = -(*pivmin);
267  }
268  work[indlpl + i__] = ld[i__] / dplus;
269  if (dplus < 0.) {
270  ++neg1;
271  }
272  work[inds + i__] = s * work[indlpl + i__] * l[i__];
273  if (work[indlpl + i__] == 0.) {
274  work[inds + i__] = lld[i__];
275  }
276  s = work[inds + i__] - *lambda;
277 /* L70: */
278  }
279  i__1 = r2 - 1;
280  for (i__ = r1; i__ <= i__1; ++i__) {
281  dplus = d__[i__] + s;
282  if (absMACRO(dplus) < *pivmin) {
283  dplus = -(*pivmin);
284  }
285  work[indlpl + i__] = ld[i__] / dplus;
286  work[inds + i__] = s * work[indlpl + i__] * l[i__];
287  if (work[indlpl + i__] == 0.) {
288  work[inds + i__] = lld[i__];
289  }
290  s = work[inds + i__] - *lambda;
291 /* L71: */
292  }
293  }
294 
295 /* Compute the progressive transform (using the differential form) */
296 /* until the index R1 */
297 
298  sawnan2 = FALSE_;
299  neg2 = 0;
300  work[indp + *bn - 1] = d__[*bn] - *lambda;
301  i__1 = r1;
302  for (i__ = *bn - 1; i__ >= i__1; --i__) {
303  dminus = lld[i__] + work[indp + i__];
304  tmp = d__[i__] / dminus;
305  if (dminus < 0.) {
306  ++neg2;
307  }
308  work[indumn + i__] = l[i__] * tmp;
309  work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
310 /* L80: */
311  }
312  tmp = work[indp + r1 - 1];
313  sawnan2 = template_lapack_isnan(&tmp);
314  if (sawnan2) {
315 /* Runs a slower version of the above loop if a NaN is detected */
316  neg2 = 0;
317  i__1 = r1;
318  for (i__ = *bn - 1; i__ >= i__1; --i__) {
319  dminus = lld[i__] + work[indp + i__];
320  if (absMACRO(dminus) < *pivmin) {
321  dminus = -(*pivmin);
322  }
323  tmp = d__[i__] / dminus;
324  if (dminus < 0.) {
325  ++neg2;
326  }
327  work[indumn + i__] = l[i__] * tmp;
328  work[indp + i__ - 1] = work[indp + i__] * tmp - *lambda;
329  if (tmp == 0.) {
330  work[indp + i__ - 1] = d__[i__] - *lambda;
331  }
332 /* L100: */
333  }
334  }
335 
336 /* Find the index (from R1 to R2) of the largest (in magnitude) */
337 /* diagonal element of the inverse */
338 
339  *mingma = work[inds + r1 - 1] + work[indp + r1 - 1];
340  if (*mingma < 0.) {
341  ++neg1;
342  }
343  if (*wantnc) {
344  *negcnt = neg1 + neg2;
345  } else {
346  *negcnt = -1;
347  }
348  if (absMACRO(*mingma) == 0.) {
349  *mingma = eps * work[inds + r1 - 1];
350  }
351  *r__ = r1;
352  i__1 = r2 - 1;
353  for (i__ = r1; i__ <= i__1; ++i__) {
354  tmp = work[inds + i__] + work[indp + i__];
355  if (tmp == 0.) {
356  tmp = eps * work[inds + i__];
357  }
358  if (absMACRO(tmp) <= absMACRO(*mingma)) {
359  *mingma = tmp;
360  *r__ = i__ + 1;
361  }
362 /* L110: */
363  }
364 
365 /* Compute the FP vector: solve N^T v = e_r */
366 
367  isuppz[1] = *b1;
368  isuppz[2] = *bn;
369  z__[*r__] = 1.;
370  *ztz = 1.;
371 
372 /* Compute the FP vector upwards from R */
373 
374  if (! sawnan1 && ! sawnan2) {
375  i__1 = *b1;
376  for (i__ = *r__ - 1; i__ >= i__1; --i__) {
377  z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
378  if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
379  d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
380  z__[i__] = 0.;
381  isuppz[1] = i__ + 1;
382  goto L220;
383  }
384  *ztz += z__[i__] * z__[i__];
385 /* L210: */
386  }
387 L220:
388  ;
389  } else {
390 /* Run slower loop if NaN occurred. */
391  i__1 = *b1;
392  for (i__ = *r__ - 1; i__ >= i__1; --i__) {
393  if (z__[i__ + 1] == 0.) {
394  z__[i__] = -(ld[i__ + 1] / ld[i__]) * z__[i__ + 2];
395  } else {
396  z__[i__] = -(work[indlpl + i__] * z__[i__ + 1]);
397  }
398  if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
399  d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
400  z__[i__] = 0.;
401  isuppz[1] = i__ + 1;
402  goto L240;
403  }
404  *ztz += z__[i__] * z__[i__];
405 /* L230: */
406  }
407 L240:
408  ;
409  }
410 /* Compute the FP vector downwards from R in blocks of size BLKSIZ */
411  if (! sawnan1 && ! sawnan2) {
412  i__1 = *bn - 1;
413  for (i__ = *r__; i__ <= i__1; ++i__) {
414  z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
415  if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
416  d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
417  z__[i__ + 1] = 0.;
418  isuppz[2] = i__;
419  goto L260;
420  }
421  *ztz += z__[i__ + 1] * z__[i__ + 1];
422 /* L250: */
423  }
424 L260:
425  ;
426  } else {
427 /* Run slower loop if NaN occurred. */
428  i__1 = *bn - 1;
429  for (i__ = *r__; i__ <= i__1; ++i__) {
430  if (z__[i__] == 0.) {
431  z__[i__ + 1] = -(ld[i__ - 1] / ld[i__]) * z__[i__ - 1];
432  } else {
433  z__[i__ + 1] = -(work[indumn + i__] * z__[i__]);
434  }
435  if (((d__1 = z__[i__], absMACRO(d__1)) + (d__2 = z__[i__ + 1], absMACRO(
436  d__2))) * (d__3 = ld[i__], absMACRO(d__3)) < *gaptol) {
437  z__[i__ + 1] = 0.;
438  isuppz[2] = i__;
439  goto L280;
440  }
441  *ztz += z__[i__ + 1] * z__[i__ + 1];
442 /* L270: */
443  }
444 L280:
445  ;
446  }
447 
448 /* Compute quantities for convergence test */
449 
450  tmp = 1. / *ztz;
451  *nrminv = template_blas_sqrt(tmp);
452  *resid = absMACRO(*mingma) * *nrminv;
453  *rqcorr = *mingma * tmp;
454 
455 
456  return 0;
457 
458 /* End of DLAR1V */
459 
460 } /* dlar1v_ */
461 
462 #endif
#define absMACRO(x)
Definition: template_blas_common.h:45
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:43
int integer
Definition: template_blas_common.h:38
int template_lapack_lar1v(integer *n, integer *b1, integer *bn, Treal *lambda, Treal *d__, Treal *l, Treal *ld, Treal *lld, Treal *pivmin, Treal *gaptol, Treal *z__, logical *wantnc, integer *negcnt, Treal *ztz, Treal *mingma, integer *r__, integer *isuppz, Treal *nrminv, Treal *resid, Treal *rqcorr, Treal *work)
Definition: template_lapack_lar1v.h:39
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
bool logical
Definition: template_blas_common.h:39
#define FALSE_
Definition: template_lapack_common.h:41
Treal template_blas_sqrt(Treal x)