ergo
template_lapack_lasq3.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_LASQ3_HEADER
36 #define TEMPLATE_LAPACK_LASQ3_HEADER
37 
38 
39 #include "template_lapack_lasq4.h"
40 #include "template_lapack_lasq5.h"
41 #include "template_lapack_lasq6.h"
42 
43 
44 template<class Treal>
45 int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__,
46  integer *pp, Treal *dmin__, Treal *sigma, Treal *desig,
47  Treal *qmax, integer *nfail, integer *iter, integer *ndiv,
48  logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2,
49  Treal *dn, Treal *dn1, Treal *dn2, Treal *g,
50  Treal *tau)
51 {
52  /* System generated locals */
53  integer i__1;
54  Treal d__1, d__2;
55 
56 
57  /* Local variables */
58  Treal s, t;
59  integer j4, nn;
60  Treal eps, tol;
61  integer n0in, ipn4;
62  Treal tol2, temp;
63 
64 
65 /* -- LAPACK routine (version 3.2) -- */
66 
67 /* -- Contributed by Osni Marques of the Lawrence Berkeley National -- */
68 /* -- Laboratory and Beresford Parlett of the Univ. of California at -- */
69 /* -- Berkeley -- */
70 /* -- November 2008 -- */
71 
72 /* -- LAPACK is a software package provided by Univ. of Tennessee, -- */
73 /* -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..-- */
74 
75 /* .. Scalar Arguments .. */
76 /* .. */
77 /* .. Array Arguments .. */
78 /* .. */
79 
80 /* Purpose */
81 /* ======= */
82 
83 /* DLASQ3 checks for deflation, computes a shift (TAU) and calls dqds. */
84 /* In case of failure it changes shifts, and tries again until output */
85 /* is positive. */
86 
87 /* Arguments */
88 /* ========= */
89 
90 /* I0 (input) INTEGER */
91 /* First index. */
92 
93 /* N0 (input) INTEGER */
94 /* Last index. */
95 
96 /* Z (input) DOUBLE PRECISION array, dimension ( 4*N ) */
97 /* Z holds the qd array. */
98 
99 /* PP (input/output) INTEGER */
100 /* PP=0 for ping, PP=1 for pong. */
101 /* PP=2 indicates that flipping was applied to the Z array */
102 /* and that the initial tests for deflation should not be */
103 /* performed. */
104 
105 /* DMIN (output) DOUBLE PRECISION */
106 /* Minimum value of d. */
107 
108 /* SIGMA (output) DOUBLE PRECISION */
109 /* Sum of shifts used in current segment. */
110 
111 /* DESIG (input/output) DOUBLE PRECISION */
112 /* Lower order part of SIGMA */
113 
114 /* QMAX (input) DOUBLE PRECISION */
115 /* Maximum value of q. */
116 
117 /* NFAIL (output) INTEGER */
118 /* Number of times shift was too big. */
119 
120 /* ITER (output) INTEGER */
121 /* Number of iterations. */
122 
123 /* NDIV (output) INTEGER */
124 /* Number of divisions. */
125 
126 /* IEEE (input) LOGICAL */
127 /* Flag for IEEE or non IEEE arithmetic (passed to DLASQ5). */
128 
129 /* TTYPE (input/output) INTEGER */
130 /* Shift type. */
131 
132 /* DMIN1, DMIN2, DN, DN1, DN2, G, TAU (input/output) DOUBLE PRECISION */
133 /* These are passed as arguments in order to save their values */
134 /* between calls to DLASQ3. */
135 
136 /* ===================================================================== */
137 
138 /* .. Parameters .. */
139 /* .. */
140 /* .. Local Scalars .. */
141 /* .. */
142 /* .. External Subroutines .. */
143 /* .. */
144 /* .. External Function .. */
145 /* .. */
146 /* .. Intrinsic Functions .. */
147 /* .. */
148 /* .. Executable Statements .. */
149 
150  /* Parameter adjustments */
151  --z__;
152 
153  /* Function Body */
154  n0in = *n0;
155  eps = template_lapack_lamch("Precision", (Treal)0);
156  tol = eps * 100.;
157 /* Computing 2nd power */
158  d__1 = tol;
159  tol2 = d__1 * d__1;
160 
161 /* Check for deflation. */
162 
163 L10:
164 
165  if (*n0 < *i0) {
166  return 0;
167  }
168  if (*n0 == *i0) {
169  goto L20;
170  }
171  nn = (*n0 << 2) + *pp;
172  if (*n0 == *i0 + 1) {
173  goto L40;
174  }
175 
176 /* Check whether E(N0-1) is negligible, 1 eigenvalue. */
177 
178  if (z__[nn - 5] > tol2 * (*sigma + z__[nn - 3]) && z__[nn - (*pp << 1) -
179  4] > tol2 * z__[nn - 7]) {
180  goto L30;
181  }
182 
183 L20:
184 
185  z__[(*n0 << 2) - 3] = z__[(*n0 << 2) + *pp - 3] + *sigma;
186  --(*n0);
187  goto L10;
188 
189 /* Check whether E(N0-2) is negligible, 2 eigenvalues. */
190 
191 L30:
192 
193  if (z__[nn - 9] > tol2 * *sigma && z__[nn - (*pp << 1) - 8] > tol2 * z__[
194  nn - 11]) {
195  goto L50;
196  }
197 
198 L40:
199 
200  if (z__[nn - 3] > z__[nn - 7]) {
201  s = z__[nn - 3];
202  z__[nn - 3] = z__[nn - 7];
203  z__[nn - 7] = s;
204  }
205  if (z__[nn - 5] > z__[nn - 3] * tol2) {
206  t = (z__[nn - 7] - z__[nn - 3] + z__[nn - 5]) * .5;
207  s = z__[nn - 3] * (z__[nn - 5] / t);
208  if (s <= t) {
209  s = z__[nn - 3] * (z__[nn - 5] / (t * (template_blas_sqrt(s / t + 1.) + 1.)));
210  } else {
211  s = z__[nn - 3] * (z__[nn - 5] / (t + template_blas_sqrt(t) * template_blas_sqrt(t + s)));
212  }
213  t = z__[nn - 7] + (s + z__[nn - 5]);
214  z__[nn - 3] *= z__[nn - 7] / t;
215  z__[nn - 7] = t;
216  }
217  z__[(*n0 << 2) - 7] = z__[nn - 7] + *sigma;
218  z__[(*n0 << 2) - 3] = z__[nn - 3] + *sigma;
219  *n0 += -2;
220  goto L10;
221 
222 L50:
223  if (*pp == 2) {
224  *pp = 0;
225  }
226 
227 /* Reverse the qd-array, if warranted. */
228 
229  if (*dmin__ <= 0. || *n0 < n0in) {
230  if (z__[(*i0 << 2) + *pp - 3] * 1.5 < z__[(*n0 << 2) + *pp - 3]) {
231  ipn4 = ( *i0 + *n0 ) << 2;
232  i__1 = ( *i0 + *n0 - 1 ) << 1;
233  for (j4 = *i0 << 2; j4 <= i__1; j4 += 4) {
234  temp = z__[j4 - 3];
235  z__[j4 - 3] = z__[ipn4 - j4 - 3];
236  z__[ipn4 - j4 - 3] = temp;
237  temp = z__[j4 - 2];
238  z__[j4 - 2] = z__[ipn4 - j4 - 2];
239  z__[ipn4 - j4 - 2] = temp;
240  temp = z__[j4 - 1];
241  z__[j4 - 1] = z__[ipn4 - j4 - 5];
242  z__[ipn4 - j4 - 5] = temp;
243  temp = z__[j4];
244  z__[j4] = z__[ipn4 - j4 - 4];
245  z__[ipn4 - j4 - 4] = temp;
246 /* L60: */
247  }
248  if (*n0 - *i0 <= 4) {
249  z__[(*n0 << 2) + *pp - 1] = z__[(*i0 << 2) + *pp - 1];
250  z__[(*n0 << 2) - *pp] = z__[(*i0 << 2) - *pp];
251  }
252 /* Computing MIN */
253  d__1 = *dmin2, d__2 = z__[(*n0 << 2) + *pp - 1];
254  *dmin2 = minMACRO(d__1,d__2);
255 /* Computing MIN */
256  d__1 = z__[(*n0 << 2) + *pp - 1], d__2 = z__[(*i0 << 2) + *pp - 1]
257  , d__1 = minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) + *pp + 3];
258  z__[(*n0 << 2) + *pp - 1] = minMACRO(d__1,d__2);
259 /* Computing MIN */
260  d__1 = z__[(*n0 << 2) - *pp], d__2 = z__[(*i0 << 2) - *pp], d__1 =
261  minMACRO(d__1,d__2), d__2 = z__[(*i0 << 2) - *pp + 4];
262  z__[(*n0 << 2) - *pp] = minMACRO(d__1,d__2);
263 /* Computing MAX */
264  d__1 = *qmax, d__2 = z__[(*i0 << 2) + *pp - 3], d__1 = maxMACRO(d__1,
265  d__2), d__2 = z__[(*i0 << 2) + *pp + 1];
266  *qmax = maxMACRO(d__1,d__2);
267  *dmin__ = -0.;
268  }
269  }
270 
271 /* Choose a shift. */
272 
273  template_lapack_lasq4(i0, n0, &z__[1], pp, &n0in, dmin__, dmin1, dmin2, dn, dn1, dn2,
274  tau, ttype, g);
275 
276 /* Call dqds until DMIN > 0. */
277 
278 L70:
279 
280  template_lapack_lasq5(i0, n0, &z__[1], pp, tau, dmin__, dmin1, dmin2, dn, dn1, dn2,
281  ieee);
282 
283  *ndiv += *n0 - *i0 + 2;
284  ++(*iter);
285 
286 /* Check status. */
287 
288  if (*dmin__ >= 0. && *dmin1 > 0.) {
289 
290 /* Success. */
291 
292  goto L90;
293 
294  } else if (*dmin__ < 0. && *dmin1 > 0. && z__[( ( *n0 - 1 ) << 2) - *pp] < tol
295  * (*sigma + *dn1) && absMACRO(*dn) < tol * *sigma) {
296 
297 /* Convergence hidden by negative DN. */
298 
299  z__[( ( *n0 - 1 ) << 2) - *pp + 2] = 0.;
300  *dmin__ = 0.;
301  goto L90;
302  } else if (*dmin__ < 0.) {
303 
304 /* TAU too big. Select new TAU and try again. */
305 
306  ++(*nfail);
307  if (*ttype < -22) {
308 
309 /* Failed twice. Play it safe. */
310 
311  *tau = 0.;
312  } else if (*dmin1 > 0.) {
313 
314 /* Late failure. Gives excellent shift. */
315 
316  *tau = (*tau + *dmin__) * (1. - eps * 2.);
317  *ttype += -11;
318  } else {
319 
320 /* Early failure. Divide by 4. */
321 
322  *tau *= .25;
323  *ttype += -12;
324  }
325  goto L70;
326  } else if (template_lapack_isnan(dmin__)) {
327 
328 /* NaN. */
329 
330  if (*tau == 0.) {
331  goto L80;
332  } else {
333  *tau = 0.;
334  goto L70;
335  }
336  } else {
337 
338 /* Possible underflow. Play it safe. */
339 
340  goto L80;
341  }
342 
343 /* Risk of underflow. */
344 
345 L80:
346  template_lapack_lasq6(i0, n0, &z__[1], pp, dmin__, dmin1, dmin2, dn, dn1, dn2);
347  *ndiv += *n0 - *i0 + 2;
348  ++(*iter);
349  *tau = 0.;
350 
351 L90:
352  if (*tau < *sigma) {
353  *desig += *tau;
354  t = *sigma + *desig;
355  *desig -= t - *sigma;
356  } else {
357  t = *sigma + *tau;
358  *desig = *sigma - (t - *tau) + *desig;
359  }
360  *sigma = t;
361 
362  return 0;
363 
364 /* End of DLASQ3 */
365 
366 } /* dlasq3_ */
367 
368 #endif
#define absMACRO(x)
Definition: template_blas_common.h:45
logical template_lapack_isnan(Treal *din)
Definition: template_lapack_isnan.h:43
int integer
Definition: template_blas_common.h:38
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
int template_lapack_lasq6(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2)
Definition: template_lapack_lasq6.h:39
#define minMACRO(a, b)
Definition: template_blas_common.h:44
int template_lapack_lasq5(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *tau, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dnm1, Treal *dnm2, logical *ieee)
Definition: template_lapack_lasq5.h:39
int template_lapack_lasq3(integer *i0, integer *n0, Treal *z__, integer *pp, Treal *dmin__, Treal *sigma, Treal *desig, Treal *qmax, integer *nfail, integer *iter, integer *ndiv, logical *ieee, integer *ttype, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *g, Treal *tau)
Definition: template_lapack_lasq3.h:45
Treal template_lapack_lamch(const char *cmach, Treal dummyReal)
Definition: template_lapack_lamch.h:199
bool logical
Definition: template_blas_common.h:39
Treal template_blas_sqrt(Treal x)
int template_lapack_lasq4(integer *i0, integer *n0, Treal *z__, integer *pp, integer *n0in, Treal *dmin__, Treal *dmin1, Treal *dmin2, Treal *dn, Treal *dn1, Treal *dn2, Treal *tau, integer *ttype, Treal *g)
Definition: template_lapack_lasq4.h:39