ergo
template_lapack_sytrd.h
Go to the documentation of this file.
1 /* Ergo, version 3.3, a program for linear scaling electronic structure
2  * calculations.
3  * Copyright (C) 2013 Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek.
4  *
5  * This program is free software: you can redistribute it and/or modify
6  * it under the terms of the GNU General Public License as published by
7  * the Free Software Foundation, either version 3 of the License, or
8  * (at your option) any later version.
9  *
10  * This program is distributed in the hope that it will be useful,
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13  * GNU General Public License for more details.
14  *
15  * You should have received a copy of the GNU General Public License
16  * along with this program. If not, see <http://www.gnu.org/licenses/>.
17  *
18  * Primary academic reference:
19  * Kohn−Sham Density Functional Theory Electronic Structure Calculations
20  * with Linearly Scaling Computational Time and Memory Usage,
21  * Elias Rudberg, Emanuel H. Rubensson, and Pawel Salek,
22  * J. Chem. Theory Comput. 7, 340 (2011),
23  * <http://dx.doi.org/10.1021/ct100611z>
24  *
25  * For further information about Ergo, see <http://www.ergoscf.org>.
26  */
27 
28  /* This file belongs to the template_lapack part of the Ergo source
29  * code. The source files in the template_lapack directory are modified
30  * versions of files originally distributed as CLAPACK, see the
31  * Copyright/license notice in the file template_lapack/COPYING.
32  */
33 
34 
35 #ifndef TEMPLATE_LAPACK_SYTRD_HEADER
36 #define TEMPLATE_LAPACK_SYTRD_HEADER
37 
38 #include "template_lapack_common.h"
39 
40 template<class Treal>
41 int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer *
42  lda, Treal *d__, Treal *e, Treal *tau, Treal *
43  work, const integer *lwork, integer *info)
44 {
45 /* -- LAPACK routine (version 3.0) --
46  Univ. of Tennessee, Univ. of California Berkeley, NAG Ltd.,
47  Courant Institute, Argonne National Lab, and Rice University
48  June 30, 1999
49 
50 
51  Purpose
52  =======
53 
54  DSYTRD reduces a real symmetric matrix A to real symmetric
55  tridiagonal form T by an orthogonal similarity transformation:
56  Q**T * A * Q = T.
57 
58  Arguments
59  =========
60 
61  UPLO (input) CHARACTER*1
62  = 'U': Upper triangle of A is stored;
63  = 'L': Lower triangle of A is stored.
64 
65  N (input) INTEGER
66  The order of the matrix A. N >= 0.
67 
68  A (input/output) DOUBLE PRECISION array, dimension (LDA,N)
69  On entry, the symmetric matrix A. If UPLO = 'U', the leading
70  N-by-N upper triangular part of A contains the upper
71  triangular part of the matrix A, and the strictly lower
72  triangular part of A is not referenced. If UPLO = 'L', the
73  leading N-by-N lower triangular part of A contains the lower
74  triangular part of the matrix A, and the strictly upper
75  triangular part of A is not referenced.
76  On exit, if UPLO = 'U', the diagonal and first superdiagonal
77  of A are overwritten by the corresponding elements of the
78  tridiagonal matrix T, and the elements above the first
79  superdiagonal, with the array TAU, represent the orthogonal
80  matrix Q as a product of elementary reflectors; if UPLO
81  = 'L', the diagonal and first subdiagonal of A are over-
82  written by the corresponding elements of the tridiagonal
83  matrix T, and the elements below the first subdiagonal, with
84  the array TAU, represent the orthogonal matrix Q as a product
85  of elementary reflectors. See Further Details.
86 
87  LDA (input) INTEGER
88  The leading dimension of the array A. LDA >= max(1,N).
89 
90  D (output) DOUBLE PRECISION array, dimension (N)
91  The diagonal elements of the tridiagonal matrix T:
92  D(i) = A(i,i).
93 
94  E (output) DOUBLE PRECISION array, dimension (N-1)
95  The off-diagonal elements of the tridiagonal matrix T:
96  E(i) = A(i,i+1) if UPLO = 'U', E(i) = A(i+1,i) if UPLO = 'L'.
97 
98  TAU (output) DOUBLE PRECISION array, dimension (N-1)
99  The scalar factors of the elementary reflectors (see Further
100  Details).
101 
102  WORK (workspace/output) DOUBLE PRECISION array, dimension (LWORK)
103  On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
104 
105  LWORK (input) INTEGER
106  The dimension of the array WORK. LWORK >= 1.
107  For optimum performance LWORK >= N*NB, where NB is the
108  optimal blocksize.
109 
110  If LWORK = -1, then a workspace query is assumed; the routine
111  only calculates the optimal size of the WORK array, returns
112  this value as the first entry of the WORK array, and no error
113  message related to LWORK is issued by XERBLA.
114 
115  INFO (output) INTEGER
116  = 0: successful exit
117  < 0: if INFO = -i, the i-th argument had an illegal value
118 
119  Further Details
120  ===============
121 
122  If UPLO = 'U', the matrix Q is represented as a product of elementary
123  reflectors
124 
125  Q = H(n-1) . . . H(2) H(1).
126 
127  Each H(i) has the form
128 
129  H(i) = I - tau * v * v'
130 
131  where tau is a real scalar, and v is a real vector with
132  v(i+1:n) = 0 and v(i) = 1; v(1:i-1) is stored on exit in
133  A(1:i-1,i+1), and tau in TAU(i).
134 
135  If UPLO = 'L', the matrix Q is represented as a product of elementary
136  reflectors
137 
138  Q = H(1) H(2) . . . H(n-1).
139 
140  Each H(i) has the form
141 
142  H(i) = I - tau * v * v'
143 
144  where tau is a real scalar, and v is a real vector with
145  v(1:i) = 0 and v(i+1) = 1; v(i+2:n) is stored on exit in A(i+2:n,i),
146  and tau in TAU(i).
147 
148  The contents of A on exit are illustrated by the following examples
149  with n = 5:
150 
151  if UPLO = 'U': if UPLO = 'L':
152 
153  ( d e v2 v3 v4 ) ( d )
154  ( d e v3 v4 ) ( e d )
155  ( d e v4 ) ( v1 e d )
156  ( d e ) ( v1 v2 e d )
157  ( d ) ( v1 v2 v3 e d )
158 
159  where d and e denote diagonal and off-diagonal elements of T, and vi
160  denotes an element of the vector defining H(i).
161 
162  =====================================================================
163 
164 
165  Test the input parameters
166 
167  Parameter adjustments */
168  /* Table of constant values */
169  integer c__1 = 1;
170  integer c_n1 = -1;
171  integer c__3 = 3;
172  integer c__2 = 2;
173  Treal c_b22 = -1.;
174  Treal c_b23 = 1.;
175 
176  /* System generated locals */
177  integer a_dim1, a_offset, i__1, i__2, i__3;
178  /* Local variables */
179  integer i__, j;
180  integer nbmin, iinfo;
181  logical upper;
182  integer nb, kk, nx;
183  integer ldwork, lwkopt;
184  logical lquery;
185  integer iws;
186 #define a_ref(a_1,a_2) a[(a_2)*a_dim1 + a_1]
187 
188 
189  a_dim1 = *lda;
190  a_offset = 1 + a_dim1 * 1;
191  a -= a_offset;
192  --d__;
193  --e;
194  --tau;
195  --work;
196 
197  /* Initialization added by Elias to get rid of compiler warnings. */
198  lwkopt = 0;
199  /* Function Body */
200  *info = 0;
201  upper = template_blas_lsame(uplo, "U");
202  lquery = *lwork == -1;
203  if (! upper && ! template_blas_lsame(uplo, "L")) {
204  *info = -1;
205  } else if (*n < 0) {
206  *info = -2;
207  } else if (*lda < maxMACRO(1,*n)) {
208  *info = -4;
209  } else if (*lwork < 1 && ! lquery) {
210  *info = -9;
211  }
212 
213  if (*info == 0) {
214 
215 /* Determine the block size. */
216 
217  nb = template_lapack_ilaenv(&c__1, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1, (ftnlen)6,
218  (ftnlen)1);
219  lwkopt = *n * nb;
220  work[1] = (Treal) lwkopt;
221  }
222 
223  if (*info != 0) {
224  i__1 = -(*info);
225  template_blas_erbla("SYTRD ", &i__1);
226  return 0;
227  } else if (lquery) {
228  return 0;
229  }
230 
231 /* Quick return if possible */
232 
233  if (*n == 0) {
234  work[1] = 1.;
235  return 0;
236  }
237 
238  nx = *n;
239  iws = 1;
240  if (nb > 1 && nb < *n) {
241 
242 /* Determine when to cross over from blocked to unblocked code
243  (last block is always handled by unblocked code).
244 
245  Computing MAX */
246  i__1 = nb, i__2 = template_lapack_ilaenv(&c__3, "DSYTRD", uplo, n, &c_n1, &c_n1, &
247  c_n1, (ftnlen)6, (ftnlen)1);
248  nx = maxMACRO(i__1,i__2);
249  if (nx < *n) {
250 
251 /* Determine if workspace is large enough for blocked code. */
252 
253  ldwork = *n;
254  iws = ldwork * nb;
255  if (*lwork < iws) {
256 
257 /* Not enough workspace to use optimal NB: determine the
258  minimum value of NB, and reduce NB or force use of
259  unblocked code by setting NX = N.
260 
261  Computing MAX */
262  i__1 = *lwork / ldwork;
263  nb = maxMACRO(i__1,1);
264  nbmin = template_lapack_ilaenv(&c__2, "DSYTRD", uplo, n, &c_n1, &c_n1, &c_n1,
265  (ftnlen)6, (ftnlen)1);
266  if (nb < nbmin) {
267  nx = *n;
268  }
269  }
270  } else {
271  nx = *n;
272  }
273  } else {
274  nb = 1;
275  }
276 
277  if (upper) {
278 
279 /* Reduce the upper triangle of A.
280  Columns 1:kk are handled by the unblocked method. */
281 
282  kk = *n - (*n - nx + nb - 1) / nb * nb;
283  i__1 = kk + 1;
284  i__2 = -nb;
285  for (i__ = *n - nb + 1; i__2 < 0 ? i__ >= i__1 : i__ <= i__1; i__ +=
286  i__2) {
287 
288 /* Reduce columns i:i+nb-1 to tridiagonal form and form the
289  matrix W which is needed to update the unreduced part of
290  the matrix */
291 
292  i__3 = i__ + nb - 1;
293  template_lapack_latrd(uplo, &i__3, &nb, &a[a_offset], lda, &e[1], &tau[1], &
294  work[1], &ldwork);
295 
296 /* Update the unreduced submatrix A(1:i-1,1:i-1), using an
297  update of the form: A := A - V*W' - W*V' */
298 
299  i__3 = i__ - 1;
300  template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(1, i__),
301  lda, &work[1], &ldwork, &c_b23, &a[a_offset], lda);
302 
303 /* Copy superdiagonal elements back into A, and diagonal
304  elements into D */
305 
306  i__3 = i__ + nb - 1;
307  for (j = i__; j <= i__3; ++j) {
308  a_ref(j - 1, j) = e[j - 1];
309  d__[j] = a_ref(j, j);
310 /* L10: */
311  }
312 /* L20: */
313  }
314 
315 /* Use unblocked code to reduce the last or only block */
316 
317  template_lapack_sytd2(uplo, &kk, &a[a_offset], lda, &d__[1], &e[1], &tau[1], &iinfo);
318  } else {
319 
320 /* Reduce the lower triangle of A */
321 
322  i__2 = *n - nx;
323  i__1 = nb;
324  for (i__ = 1; i__1 < 0 ? i__ >= i__2 : i__ <= i__2; i__ += i__1) {
325 
326 /* Reduce columns i:i+nb-1 to tridiagonal form and form the
327  matrix W which is needed to update the unreduced part of
328  the matrix */
329 
330  i__3 = *n - i__ + 1;
331  template_lapack_latrd(uplo, &i__3, &nb, &a_ref(i__, i__), lda, &e[i__], &tau[
332  i__], &work[1], &ldwork);
333 
334 /* Update the unreduced submatrix A(i+ib:n,i+ib:n), using
335  an update of the form: A := A - V*W' - W*V' */
336 
337  i__3 = *n - i__ - nb + 1;
338  template_blas_syr2k(uplo, "No transpose", &i__3, &nb, &c_b22, &a_ref(i__ + nb,
339  i__), lda, &work[nb + 1], &ldwork, &c_b23, &a_ref(i__ +
340  nb, i__ + nb), lda);
341 
342 /* Copy subdiagonal elements back into A, and diagonal
343  elements into D */
344 
345  i__3 = i__ + nb - 1;
346  for (j = i__; j <= i__3; ++j) {
347  a_ref(j + 1, j) = e[j];
348  d__[j] = a_ref(j, j);
349 /* L30: */
350  }
351 /* L40: */
352  }
353 
354 /* Use unblocked code to reduce the last or only block */
355 
356  i__1 = *n - i__ + 1;
357  template_lapack_sytd2(uplo, &i__1, &a_ref(i__, i__), lda, &d__[i__], &e[i__], &tau[
358  i__], &iinfo);
359  }
360 
361  work[1] = (Treal) lwkopt;
362  return 0;
363 
364 /* End of DSYTRD */
365 
366 } /* dsytrd_ */
367 
368 #undef a_ref
369 
370 
371 #endif
int template_lapack_sytd2(const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *d__, Treal *e, Treal *tau, integer *info)
Definition: template_lapack_sytd2.h:41
int template_lapack_sytrd(const char *uplo, const integer *n, Treal *a, const integer *lda, Treal *d__, Treal *e, Treal *tau, Treal *work, const integer *lwork, integer *info)
Definition: template_lapack_sytrd.h:41
int integer
Definition: template_blas_common.h:38
integer template_lapack_ilaenv(const integer *ispec, const char *name__, const char *opts, const integer *n1, const integer *n2, const integer *n3, const integer *n4, ftnlen name_len, ftnlen opts_len)
Definition: template_lapack_common.cc:279
#define maxMACRO(a, b)
Definition: template_blas_common.h:43
int template_blas_erbla(const char *srname, integer *info)
Definition: template_blas_common.cc:144
int template_lapack_latrd(const char *uplo, const integer *n, const integer *nb, Treal *a, const integer *lda, Treal *e, Treal *tau, Treal *w, const integer *ldw)
Definition: template_lapack_latrd.h:40
#define a_ref(a_1, a_2)
bool logical
Definition: template_blas_common.h:39
int ftnlen
Definition: template_blas_common.h:40
int template_blas_syr2k(const char *uplo, const char *trans, const integer *n, const integer *k, const Treal *alpha, const Treal *a, const integer *lda, const Treal *b, const integer *ldb, const Treal *beta, Treal *c__, const integer *ldc)
Definition: template_blas_syr2k.h:40
logical template_blas_lsame(const char *ca, const char *cb)
Definition: template_blas_common.cc:44