CVM Class Library  8.1
This C++ class library encapsulates concepts of vector and different matrices including square, band, symmetric and hermitian ones in Euclidean space of real and complex numbers.
 All Classes Files Functions Variables Typedefs Friends Macros Pages
rmatrix.cpp
Go to the documentation of this file.
1 // CVM Class Library
2 // http://cvmlib.com
3 //
4 // Copyright Sergei Nikolaev 1992-2014
5 // Distributed under the Boost Software License, Version 1.0.
6 // (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
8 
9 #include "cvm.h"
10 #include "blas.h"
11 
13 
14 template<>
15 CVM_API void
16 __gemm<float, basic_rmatrix<float> >
17  (const basic_rmatrix<float>& ml, bool bTrans1,
18  const basic_rmatrix<float>& mr, bool bTrans2,
19  float dAlpha,
21  float dBeta)
22 {
23 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
24  SGEMM (bTrans1 ? Chars::pT() : Chars::pN(), 1, bTrans2 ? Chars::pT() : Chars::pN(), 1,
25 #else
26  SGEMM (bTrans1 ? Chars::pT() : Chars::pN(), bTrans2 ? Chars::pT() : Chars::pN(),
27 #endif
28  bTrans1 ? ml._pn() : ml._pm(),
29  bTrans2 ? mr._pm() : mr._pn(),
30  bTrans1 ? ml._pm() : ml._pn(),
31  &dAlpha,
32  ml._pd(), ml._pldm(),
33  mr._pd(), mr._pldm(),
34  &dBeta,
35  mRes, mRes._pld());
36 }
37 
38 template<>
39 CVM_API void
40 __gemm<double, basic_rmatrix<double> >
41  (const basic_rmatrix<double>& ml, bool bTrans1,
42  const basic_rmatrix<double>& mr, bool bTrans2,
43  double dAlpha,
45  double dBeta)
46 {
47 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
48  DGEMM (bTrans1 ? Chars::pT() : Chars::pN(), 1, bTrans2 ? Chars::pT() : Chars::pN(), 1,
49 #else
50  DGEMM (bTrans1 ? Chars::pT() : Chars::pN(), bTrans2 ? Chars::pT() : Chars::pN(),
51 #endif
52  bTrans1 ? ml._pn() : ml._pm(),
53  bTrans2 ? mr._pm() : mr._pn(),
54  bTrans1 ? ml._pm() : ml._pn(),
55  &dAlpha,
56  ml._pd(), ml._pldm(),
57  mr._pd(), mr._pldm(),
58  &dBeta,
59  mRes, mRes._pld());
60 }
61 
62 template<>
63 CVM_API void
64 __symm<float, basic_srsmatrix<float>, basic_rmatrix<float> >
65  (bool bLeft,
66  const basic_srsmatrix<float>& ml,
67  const basic_rmatrix<float>& mr,
68  float dAlpha,
70  float dBeta)
71 {
72  SSYMM (bLeft ? Chars::pL() : Chars::pR(),
73 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
74  1,
75 #endif
76  Chars::pU(),
77 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
78  1,
79 #endif
80  mRes._pm(), mRes._pn(),
81  &dAlpha,
82  ml._pd(), ml._pld(),
83  mr._pd(), mr._pld(),
84  &dBeta,
85  mRes, mRes._pld());
86 }
87 
88 template<>
89 CVM_API void
90 __symm<double, basic_srsmatrix<double>, basic_rmatrix<double> >
91  (bool bLeft,
92  const basic_srsmatrix<double>& ml,
93  const basic_rmatrix<double>& mr,
94  double dAlpha,
96  double dBeta)
97 {
98  DSYMM (bLeft ? Chars::pL() : Chars::pR(),
99 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
100  1,
101 #endif
102  Chars::pU(),
103 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
104  1,
105 #endif
106  mRes._pm(), mRes._pn(),
107  &dAlpha,
108  ml._pd(), ml._pld(),
109  mr._pd(), mr._pld(),
110  &dBeta,
111  mRes, mRes._pld());
112 }
113 
114 template <>
115 CVM_API void
116 __syrk<float, basic_srsmatrix<float> >
117  (bool bTransp,
118  float alpha, tint k,
119  const float* pA, tint ldA,
120  float beta, basic_srsmatrix<float>& m)
121 {
122  SSYRK (Chars::pU(),
123 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
124  1,
125 #endif
126  bTransp ? Chars::pT() : Chars::pN(),
127 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
128  1,
129 #endif
130  m._pm(), &k, &alpha, pA, &ldA, &beta, m, m._pld());
131 }
132 
133 template <>
134 CVM_API void
135 __syrk<double, basic_srsmatrix<double> >
136  (bool bTransp,
137  double alpha, tint k,
138  const double* pA, tint ldA,
139  double beta, basic_srsmatrix<double>& m)
140 {
141  DSYRK (Chars::pU(),
142 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
143  1,
144 #endif
145  bTransp ? Chars::pT() : Chars::pN(),
146 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
147  1,
148 #endif
149  m._pm(), &k, &alpha, pA, &ldA, &beta, m, m._pld());
150 }
151 
152 template <>
153 CVM_API void
154 __syr2k<float, basic_srsmatrix<float> >
155  (bool bTransp,
156  float alpha, tint k,
157  const float* pA, tint ldA,
158  const float* pB, tint ldB,
159  float beta, basic_srsmatrix<float>& m)
160 {
161  SSYR2K (Chars::pU(),
162 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
163  1,
164 #endif
165  bTransp ? Chars::pT() : Chars::pN(),
166 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
167  1,
168 #endif
169  m._pm(), &k, &alpha, pA, &ldA, pB, &ldB, &beta, m, m._pld());
170 }
171 
172 template <>
173 CVM_API void
174 __syr2k<double, basic_srsmatrix<double> >
175  (bool bTransp,
176  double alpha, tint k,
177  const double* pA, tint ldA,
178  const double* pB, tint ldB,
179  double beta, basic_srsmatrix<double>& m)
180 {
181  DSYR2K (Chars::pU(),
182 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
183  1,
184 #endif
185  bTransp ? Chars::pT() : Chars::pN(),
186 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
187  1,
188 #endif
189  m._pm(), &k, &alpha, pA, &ldA, pB, &ldB, &beta, m, m._pld());
190 }
191 
192 // QR Case 1: "economy" mode, A is (m x n) and Q is (m x n) and R is (n x n)
193 template <>
194 CVM_API void
195 __qre<basic_rmatrix<float>, basic_srmatrix<float> >
196  (const basic_rmatrix<float>& mArg,
199 {
200  const tint nM = mArg.msize();
201  const tint nN = mArg.nsize();
202  const tint nK = _cvm_min<tint>(nM, nN);
203 
204  // we will eventually overwrite mQ to be the output matrix
205  mQ = mArg;
206  basic_rvector<float> vTau (nK);
207 
208  tint lWork = -1;
209  tint nOutInfo = 0;
210  float dWork;
211 
212  // calculate size of workspace
213  SGEQRF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
214  lWork = static_cast<tint>(dWork);
215  basic_rvector<float> vWork(lWork);
216 
217  // now do most of hardwork, find R and TAU and Householderish bits of Q
218  SGEQRF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
219  _check_negative(CVM_WRONGMKLARG, nOutInfo);
220 
221  // get upper-triangular R from overwritten A
222  mR.vanish();
223  for (tint row = CVM0; row < nK + CVM0; ++row)
224  for (tint col = row; col < nN + CVM0; ++col)
225  mR(row,col) = mQ(row,col);
226 
227  // calculate size of workspace for finding Q
228  lWork = -1;
229  SORGQR (&nM, &nK, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
230  lWork = static_cast<tint>(dWork);
231  if (lWork > vWork.size()) vWork.resize(lWork);
232 
233  // find Q
234  SORGQR (&nM, &nK, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
235  _check_negative(CVM_WRONGMKLARG, nOutInfo);
236 }
237 
238 // QR Case 1: "economy" mode, A is (m x n) and Q is (m x n) and R is (n x n)
239 template <>
240 CVM_API void
241 __qre<basic_rmatrix<double>, basic_srmatrix<double> >
242  (const basic_rmatrix<double>& mArg,
245 {
246  const tint nM = mArg.msize();
247  const tint nN = mArg.nsize();
248  const tint nK = _cvm_min<tint>(nM, nN);
249 
250  // we will eventually overwrite mQ to be the output matrix
251  mQ = mArg;
252  basic_rvector<double> vTau (nK);
253 
254  tint lWork = -1;
255  tint nOutInfo = 0;
256  double dWork;
257 
258  // calculate size of workspace
259  DGEQRF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
260  lWork = static_cast<tint>(dWork);
261  basic_rvector<double> vWork(lWork);
262 
263  // now do most of hardwork, find R and TAU and Householderish bits of Q
264  DGEQRF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
265  _check_negative(CVM_WRONGMKLARG, nOutInfo);
266 
267  // get upper-triangular R from overwritten A
268  mR.vanish();
269  for (tint row = CVM0; row < nK + CVM0; ++row)
270  for (tint col = row; col < nN + CVM0; ++col)
271  mR(row,col) = mQ(row,col);
272 
273  // calculate size of workspace for finding Q
274  lWork = -1;
275  DORGQR (&nM, &nK, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
276  lWork = static_cast<tint>(dWork);
277  if (lWork > vWork.size()) vWork.resize(lWork);
278 
279  // find Q
280  DORGQR (&nM, &nK, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
281  _check_negative(CVM_WRONGMKLARG, nOutInfo);
282 }
283 
284 // QR Case 2: full mode, A is (m x n) and Q is (m x m) and R is (m x n)
285 template <>
286 CVM_API void
287 __qrf<basic_rmatrix<float>, basic_srmatrix<float> >
288  (const basic_rmatrix<float>& mArg,
291 {
292  const tint nM = mArg.msize();
293  const tint nN = mArg.nsize();
294  const tint nK = _cvm_min<tint>(nM, nN);
295 
296  // unlike "economy" mode, we need a copy here since Q will be m x m and this may bigger than original A
297  basic_rmatrix<float> mA (nM, nN <= nM ? nM : nN);
298 
299  // copy over argument matrix
300  mA.assign (CVM0, CVM0, mArg);
301 
302  basic_rvector<float> vTau (nK);
303 
304  tint row, col;
305  tint lWork = -1;
306  tint nOutInfo = 0;
307  float dWork;
308 
309  // calculate size of workspace
310  SGEQRF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
311  lWork = static_cast<tint>(dWork);
312  basic_rvector<float> vWork(lWork);
313 
314  // now do most of hardwork, find R and TAU and Householderish bits of Q
315  SGEQRF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
316  _check_negative(CVM_WRONGMKLARG, nOutInfo);
317 
318  // get upper-triangular R which is now m x n from overwritten A
319  mR.vanish();
320  for (row = CVM0; row < nK + CVM0; ++row)
321  for (col = row; col < nN + CVM0; ++col)
322  mR(row,col) = mA(row,col);
323 
324  // calculate size of workspace for finding Q that is m x m
325  lWork = -1;
326  SORGQR (&nM, &nM, &nK, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
327  lWork = static_cast<tint>(dWork);
328  if (lWork > vWork.size()) vWork.resize(lWork);
329 
330  // find Q that is m x m
331  SORGQR (&nM, &nM, &nK, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
332  _check_negative(CVM_WRONGMKLARG, nOutInfo);
333 
334  // is Q big enough to have all conents of mA ?
335  if (nN <= nM)
336  mQ.assign (CVM0, CVM0, mA);
337  else
338  for (row = CVM0; row < nM + CVM0; ++row)
339  for (col = CVM0; col < nM + CVM0; ++col)
340  mQ(row,col) = mA(row,col);
341 }
342 
343 // QR Case 2: full mode, A is (m x n) and Q is (m x m) and R is (m x n)
344 template <>
345 CVM_API void
346 __qrf<basic_rmatrix<double>, basic_srmatrix<double> >
347  (const basic_rmatrix<double>& mArg,
350 {
351  const tint nM = mArg.msize();
352  const tint nN = mArg.nsize();
353  const tint nK = _cvm_min<tint>(nM, nN);
354 
355  // unlike "economy" mode, we need a copy here since Q will be m x m and this may bigger than original A
356  basic_rmatrix<double> mA (nM, nN <= nM ? nM : nN);
357 
358  // copy over argument matrix
359  mA.assign (CVM0, CVM0, mArg);
360 
361  basic_rvector<double> vTau (nK);
362 
363  tint row, col;
364  tint lWork = -1;
365  tint nOutInfo = 0;
366  double dWork;
367 
368  // calculate size of workspace
369  DGEQRF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
370  lWork = static_cast<tint>(dWork);
371  basic_rvector<double> vWork(lWork);
372 
373  // now do most of hardwork, find R and TAU and Householderish bits of Q
374  DGEQRF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
375  _check_negative(CVM_WRONGMKLARG, nOutInfo);
376 
377  // get upper-triangular R which is now m x n from overwritten A
378  mR.vanish();
379  for (row = CVM0; row < nK + CVM0; ++row)
380  for (col = row; col < nN + CVM0; ++col)
381  mR(row,col) = mA(row,col);
382 
383  // calculate size of workspace for finding Q that is m x m
384  lWork = -1;
385  DORGQR (&nM, &nM, &nK, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
386  lWork = static_cast<tint>(dWork);
387  if (lWork > vWork.size()) vWork.resize(lWork);
388 
389  // find Q that is m x m
390  DORGQR (&nM, &nM, &nK, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
391  _check_negative(CVM_WRONGMKLARG, nOutInfo);
392 
393  // is Q big enough to have all conents of mA ?
394  if (nN <= nM)
395  mQ.assign (CVM0, CVM0, mA);
396  else
397  for (row = CVM0; row < nM + CVM0; ++row)
398  for (col = CVM0; col < nM + CVM0; ++col)
399  mQ(row,col) = mA(row,col);
400 }
401 
402 
403 // RQ Case 1: "economy" mode, A is (m x n) and Q is (m x n) and R is (m x m)
404 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M <= N
405 template <>
406 CVM_API void
407 __rqe<basic_rmatrix<float>, basic_srmatrix<float> >
408  (const basic_rmatrix<float>& mArg,
411 {
412  const tint nM = mArg.msize();
413  const tint nN = mArg.nsize();
414 
415  // we will eventually overwrite mQ to be the output matrix
416  mQ = mArg;
417 
418  basic_rvector<float> vTau (nM);
419 
420  tint row, col;
421  tint lWork = -1;
422  tint nOutInfo = 0;
423  float dWork;
424 
425  // calculate size of workspace
426  SGERQF (&nM, &nN, mQ, mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
427  lWork = static_cast<tint>(dWork);
428  basic_rvector<float> vWork(lWork);
429 
430  // now do most of hardwork, find R and TAU and Householderish bits of Q
431  SGERQF (&nM, &nN, mQ, mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
432  _check_negative(CVM_WRONGMKLARG, nOutInfo);
433 
434  // get upper-triangular R which is now m x m from overwritten A
435  mR.vanish();
436  for (row = CVM0; row < nM + CVM0; ++row)
437  for (col = row; col < nM + CVM0; ++col)
438  mR(row,col) = mQ(row,nN - nM + col);
439 
440  // calculate size of workspace for finding Q that is m x n
441  lWork = -1;
442  SORGRQ (&nM, &nN, &nM, mQ, mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
443  lWork = static_cast<tint>(dWork);
444  if (lWork > vWork.size()) vWork.resize(lWork);
445 
446  // find Q that is m x n
447  SORGRQ (&nM, &nN, &nM, mQ, mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
448  _check_negative(CVM_WRONGMKLARG, nOutInfo);
449 }
450 
451 // RQ Case 1: "economy" mode, A is (m x n) and Q is (m x n) and R is (m x m)
452 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M <= N
453 template <>
454 CVM_API void
455 __rqe<basic_rmatrix<double>, basic_srmatrix<double> >
456  (const basic_rmatrix<double>& mArg,
459 {
460  const tint nM = mArg.msize();
461  const tint nN = mArg.nsize();
462 
463  // we will eventually overwrite mQ to be the output matrix
464  mQ = mArg;
465 
466  basic_rvector<double> vTau (nM);
467 
468  tint row, col;
469  tint lWork = -1;
470  tint nOutInfo = 0;
471  double dWork;
472 
473  // calculate size of workspace
474  DGERQF (&nM, &nN, mQ, mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
475  lWork = static_cast<tint>(dWork);
476  basic_rvector<double> vWork(lWork);
477 
478  // now do most of hardwork, find R and TAU and Householderish bits of Q
479  DGERQF (&nM, &nN, mQ, mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
480  _check_negative(CVM_WRONGMKLARG, nOutInfo);
481 
482  // get upper-triangular R which is now m x m from overwritten A
483  mR.vanish();
484  for (row = CVM0; row < nM + CVM0; ++row)
485  for (col = row; col < nM + CVM0; ++col)
486  mR(row,col) = mQ(row,nN - nM + col);
487 
488  // calculate size of workspace for finding Q that is m x n
489  lWork = -1;
490  DORGRQ (&nM, &nN, &nM, mQ, mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
491  lWork = static_cast<tint>(dWork);
492  if (lWork > vWork.size()) vWork.resize(lWork);
493 
494  // find Q that is m x n
495  DORGRQ (&nM, &nN, &nM, mQ, mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
496  _check_negative(CVM_WRONGMKLARG, nOutInfo);
497 }
498 
499 // RQ Case 2: full mode, A is (m x n) and Q is (n x n) and R is (m x n)
500 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M <= N
501 template <>
502 CVM_API void
503 __rqf<basic_rmatrix<float>, basic_srmatrix<float> >
504  (const basic_rmatrix<float>& mArg,
507 {
508  const tint nM = mArg.msize();
509  const tint nN = mArg.nsize();
510 
511  // unlike "economy" mode, we need a copy here since Q will be n x n and this may be bigger than original A
512  basic_rmatrix<float> mA (nN, nN);
513 
514  // copy over argument matrix
515  mA.assign (CVM0, CVM0, mArg);
516 
517  basic_rvector<float> vTau (nM);
518 
519  tint row, col;
520  tint lWork = -1;
521  tint nOutInfo = 0;
522  float dWork;
523 
524  // calculate size of workspace
525  SGERQF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
526  lWork = static_cast<tint>(dWork);
527  basic_rvector<float> vWork(lWork);
528 
529  // now do most of hardwork, find R and TAU and Householderish bits of Q
530  SGERQF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
531  _check_negative(CVM_WRONGMKLARG, nOutInfo);
532 
533  // get upper-trapezoidal (triangular) R which is now m x n from overwritten A
534  mR.vanish();
535  for (row = CVM0; row < nM + CVM0; ++row)
536  for (col = nN - nM + row; col < nN + CVM0; ++col)
537  mR(row,col) = mA(row,col);
538 
539  // calculate size of workspace for finding Q that is n x n
540  lWork = -1;
541  SORGRQ (&nM, &nN, &nM, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
542  lWork = static_cast<tint>(dWork);
543  if (lWork > vWork.size()) vWork.resize(lWork);
544 
545  // find Q that is n x n
546  SORGRQ (&nM, &nN, &nM, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
547  _check_negative(CVM_WRONGMKLARG, nOutInfo);
548 
549  for (row = CVM0; row < nM + CVM0; ++row)
550  for (col = CVM0; col < nN + CVM0; ++col)
551  mQ(nN - nM + row,col) = mA(row,col);
552 }
553 
554 // RQ Case 2: full mode, A is (m x n) and Q is (n x n) and R is (m x n)
555 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M <= N
556 template <>
557 CVM_API void
558 __rqf<basic_rmatrix<double>, basic_srmatrix<double> >
559  (const basic_rmatrix<double>& mArg,
562 {
563  const tint nM = mArg.msize();
564  const tint nN = mArg.nsize();
565 
566  // unlike "economy" mode, we need a copy here since Q will be n x n and this may be bigger than original A
567  basic_rmatrix<double> mA (nN, nN);
568 
569  // copy over argument matrix
570  mA.assign (CVM0, CVM0, mArg);
571 
572  basic_rvector<double> vTau (nM);
573 
574  tint row, col;
575  tint lWork = -1;
576  tint nOutInfo = 0;
577  double dWork;
578 
579  // calculate size of workspace
580  DGERQF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
581  lWork = static_cast<tint>(dWork);
582  basic_rvector<double> vWork(lWork);
583 
584  // now do most of hardwork, find R and TAU and Householderish bits of Q
585  DGERQF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
586  _check_negative(CVM_WRONGMKLARG, nOutInfo);
587 
588  // get upper-trapezoidal (triangular) R which is now m x n from overwritten A
589  mR.vanish();
590  for (row = CVM0; row < nM + CVM0; ++row)
591  for (col = nN - nM + row; col < nN + CVM0; ++col)
592  mR(row,col) = mA(row,col);
593 
594  // calculate size of workspace for finding Q that is n x n
595  lWork = -1;
596  DORGRQ (&nM, &nN, &nM, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
597  lWork = static_cast<tint>(dWork);
598  if (lWork > vWork.size()) vWork.resize(lWork);
599 
600  // find Q that is n x n
601  DORGRQ (&nM, &nN, &nM, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
602  _check_negative(CVM_WRONGMKLARG, nOutInfo);
603 
604  for (row = CVM0; row < nM + CVM0; ++row)
605  for (col = CVM0; col < nN + CVM0; ++col)
606  mQ(nN - nM + row,col) = mA(row,col);
607 }
608 
609 
610 // LQ Case 1: "economy" mode, A is (m x n) and L is (m x m) and Q is (m x n)
611 template <>
612 CVM_API void
613 __lqe<basic_rmatrix<float>, basic_srmatrix<float> >
614  (const basic_rmatrix<float>& mArg,
617 {
618  const tint nM = mArg.msize();
619  const tint nN = mArg.nsize();
620  const tint nK = _cvm_min<tint>(nM, nN);
621 
622  // we will eventually overwrite mQ to be the output matrix
623  mQ = mArg;
624  basic_rvector<float> vTau (nK);
625 
626  tint lWork = -1;
627  tint nOutInfo = 0;
628  float dWork;
629 
630  // calculate size of workspace
631  SGELQF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
632  lWork = static_cast<tint>(dWork);
633  basic_rvector<float> vWork(lWork);
634 
635  // now do most of hardwork, find R and TAU and Householderish bits of Q
636  SGELQF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
637  _check_negative(CVM_WRONGMKLARG, nOutInfo);
638 
639  // get lower-triangular L from overwritten A
640  mL.vanish();
641  for (tint col = CVM0; col < nK + CVM0; ++col)
642  for (tint row = col; row < nM + CVM0; ++row)
643  mL(row,col) = mQ(row,col);
644 
645  // calculate size of workspace for finding Q
646  lWork = -1;
647  SORGLQ (&nK, &nN, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
648  lWork = static_cast<tint>(dWork);
649  if (lWork > vWork.size()) vWork.resize(lWork);
650 
651  // find Q
652  SORGLQ (&nK, &nN, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
653  _check_negative(CVM_WRONGMKLARG, nOutInfo);
654 }
655 
656 template <>
657 CVM_API void
658 __lqe<basic_rmatrix<double>, basic_srmatrix<double> >
659  (const basic_rmatrix<double>& mArg,
662 {
663  const tint nM = mArg.msize();
664  const tint nN = mArg.nsize();
665  const tint nK = _cvm_min<tint>(nM, nN);
666 
667  // we will eventually overwrite mQ to be the output matrix
668  mQ = mArg;
669  basic_rvector<double> vTau (nK);
670 
671  tint lWork = -1;
672  tint nOutInfo = 0;
673  double dWork;
674 
675  // calculate size of workspace
676  DGELQF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
677  lWork = static_cast<tint>(dWork);
678  basic_rvector<double> vWork(lWork);
679 
680  // now do most of hardwork, find R and TAU and Householderish bits of Q
681  DGELQF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
682  _check_negative(CVM_WRONGMKLARG, nOutInfo);
683 
684  // get lower-triangular L from overwritten A
685  mL.vanish();
686  for (tint col = CVM0; col < nK + CVM0; ++col)
687  for (tint row = col; row < nM + CVM0; ++row)
688  mL(row,col) = mQ(row,col);
689 
690  // calculate size of workspace for finding Q
691  lWork = -1;
692  DORGLQ (&nK, &nN, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
693  lWork = static_cast<tint>(dWork);
694  if (lWork > vWork.size()) vWork.resize(lWork);
695 
696  // find Q
697  DORGLQ (&nK, &nN, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
698  _check_negative(CVM_WRONGMKLARG, nOutInfo);
699 }
700 
701 // LQ Case 2: full mode, A is (m x n) and L is (m x n) and Q is (n x n)
702 template <>
703 CVM_API void
704 __lqf<basic_rmatrix<float>, basic_srmatrix<float> >
705  (const basic_rmatrix<float>& mArg,
708 {
709  const tint nM = mArg.msize();
710  const tint nN = mArg.nsize();
711  const tint nK = _cvm_min<tint>(nM, nN);
712 
713  // unlike "economy" mode, we need a copy here since Q will be n x n and this may bigger than original A
714  basic_rmatrix<float> mA (nM <= nN ? nN : nM, nN);
715 
716  // copy over argument matrix
717  mA.assign (CVM0, CVM0, mArg);
718 
719  basic_rvector<float> vTau (nK);
720 
721  tint row, col;
722  tint lWork = -1;
723  tint nOutInfo = 0;
724  float dWork;
725 
726  // calculate size of workspace
727  SGELQF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
728  lWork = static_cast<tint>(dWork);
729  basic_rvector<float> vWork(lWork);
730 
731  // now do most of hardwork, find R and TAU and Householderish bits of Q
732  SGELQF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
733  _check_negative(CVM_WRONGMKLARG, nOutInfo);
734 
735  // get lower-triangular L from overwritten A
736  mL.vanish();
737  for (tint col = CVM0; col < nK + CVM0; ++col)
738  for (tint row = col; row < nM + CVM0; ++row)
739  mL(row,col) = mA(row,col);
740 
741  // calculate size of workspace for finding Q that is m x m
742  lWork = -1;
743  SORGLQ (&nN, &nN, &nK, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
744  lWork = static_cast<tint>(dWork);
745  if (lWork > vWork.size()) vWork.resize(lWork);
746 
747  // find Q that is n x n
748  SORGLQ (&nN, &nN, &nK, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
749  _check_negative(CVM_WRONGMKLARG, nOutInfo);
750 
751  // is Q big enough to have all conents of mA ?
752  if (nM <= nN)
753  mQ.assign (CVM0, CVM0, mA);
754  else
755  for (row = CVM0; row < nN + CVM0; ++row)
756  for (col = CVM0; col < nN + CVM0; ++col)
757  mQ(row,col) = mA(row,col);
758 }
759 
760 // LQ Case 2: full mode, A is (m x n) and L is (m x n) and Q is (n x n)
761 template <>
762 CVM_API void
763 __lqf<basic_rmatrix<double>, basic_srmatrix<double> >
764  (const basic_rmatrix<double>& mArg,
767 {
768  const tint nM = mArg.msize();
769  const tint nN = mArg.nsize();
770  const tint nK = _cvm_min<tint>(nM, nN);
771 
772  // unlike "economy" mode, we need a copy here since Q will be n x n and this may bigger than original A
773  basic_rmatrix<double> mA (nM <= nN ? nN : nM, nN);
774 
775  // copy over argument matrix
776  mA.assign (CVM0, CVM0, mArg);
777 
778  basic_rvector<double> vTau (nK);
779 
780  tint row, col;
781  tint lWork = -1;
782  tint nOutInfo = 0;
783  double dWork;
784 
785  // calculate size of workspace
786  DGELQF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
787  lWork = static_cast<tint>(dWork);
788  basic_rvector<double> vWork(lWork);
789 
790  // now do most of hardwork, find R and TAU and Householderish bits of Q
791  DGELQF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
792  _check_negative(CVM_WRONGMKLARG, nOutInfo);
793 
794  // get lower-triangular L from overwritten A
795  mL.vanish();
796  for (tint col = CVM0; col < nK + CVM0; ++col)
797  for (tint row = col; row < nM + CVM0; ++row)
798  mL(row,col) = mA(row,col);
799 
800  // calculate size of workspace for finding Q that is m x m
801  lWork = -1;
802  DORGLQ (&nN, &nN, &nK, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
803  lWork = static_cast<tint>(dWork);
804  if (lWork > vWork.size()) vWork.resize(lWork);
805 
806  // find Q that is n x n
807  DORGLQ (&nN, &nN, &nK, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
808  _check_negative(CVM_WRONGMKLARG, nOutInfo);
809 
810  // is Q big enough to have all conents of mA ?
811  if (nM <= nN)
812  mQ.assign (CVM0, CVM0, mA);
813  else
814  for (row = CVM0; row < nN + CVM0; ++row)
815  for (col = CVM0; col < nN + CVM0; ++col)
816  mQ(row,col) = mA(row,col);
817 }
818 
819 // QL Case 1: "economy" mode, A is (m x n) and Q is (m x n) and L is (n x n)
820 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M >= N
821 template <>
822 CVM_API void
823 __qle<basic_rmatrix<float>, basic_srmatrix<float> >
824  (const basic_rmatrix<float>& mArg,
827 {
828  const tint nM = mArg.msize();
829  const tint nN = mArg.nsize();
830  const tint nK = _cvm_min<tint>(nM, nN);
831 
832  // we will eventually overwrite mQ to be the output matrix
833  mQ = mArg;
834  basic_rvector<float> vTau (nK);
835 
836  tint lWork = -1;
837  tint nOutInfo = 0;
838  float dWork;
839 
840  // calculate size of workspace
841  SGEQLF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
842  lWork = static_cast<tint>(dWork);
843  basic_rvector<float> vWork(lWork);
844 
845  // now do most of hardwork, find R and TAU and Householderish bits of Q
846  SGEQLF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
847  _check_negative(CVM_WRONGMKLARG, nOutInfo);
848 
849  // get lower-triangular L from overwritten A
850  mL.vanish();
851  for (tint col = CVM0; col < nN + CVM0; ++col)
852  for (tint row = col; row < nK + CVM0; ++row)
853  mL(row,col) = mQ(nM - nN + row,col);
854 
855  // calculate size of workspace for finding Q
856  lWork = -1;
857  SORGQL (&nM, &nN, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
858  lWork = static_cast<tint>(dWork);
859  if (lWork > vWork.size()) vWork.resize(lWork);
860 
861  // find Q
862  SORGQL (&nM, &nN, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
863  _check_negative(CVM_WRONGMKLARG, nOutInfo);
864 }
865 
866 // QL Case 1: "economy" mode, A is (m x n) and Q is (m x n) and L is (n x n)
867 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M >= N
868 template <>
869 CVM_API void
870 __qle<basic_rmatrix<double>, basic_srmatrix<double> >
871  (const basic_rmatrix<double>& mArg,
874 {
875  const tint nM = mArg.msize();
876  const tint nN = mArg.nsize();
877  const tint nK = _cvm_min<tint>(nM, nN);
878 
879  // we will eventually overwrite mQ to be the output matrix
880  mQ = mArg;
881  basic_rvector<double> vTau (nK);
882 
883  tint lWork = -1;
884  tint nOutInfo = 0;
885  double dWork;
886 
887  // calculate size of workspace
888  DGEQLF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
889  lWork = static_cast<tint>(dWork);
890  basic_rvector<double> vWork(lWork);
891 
892  // now do most of hardwork, find R and TAU and Householderish bits of Q
893  DGEQLF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
894  _check_negative(CVM_WRONGMKLARG, nOutInfo);
895 
896  // get lower-triangular L from overwritten A
897  mL.vanish();
898  for (tint col = CVM0; col < nN + CVM0; ++col)
899  for (tint row = col; row < nK + CVM0; ++row)
900  mL(row,col) = mQ(nM - nN + row,col);
901 
902  // calculate size of workspace for finding Q
903  lWork = -1;
904  DORGQL (&nM, &nN, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
905  lWork = static_cast<tint>(dWork);
906  if (lWork > vWork.size()) vWork.resize(lWork);
907 
908  // find Q
909  DORGQL (&nM, &nN, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
910  _check_negative(CVM_WRONGMKLARG, nOutInfo);
911 }
912 
913 // QL Case 2: full mode, A is (m x n) and Q is (m x m) and L is (m x n)
914 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M >= N
915 template <>
916 CVM_API void
917 __qlf<basic_rmatrix<float>, basic_srmatrix<float> >
918  (const basic_rmatrix<float>& mArg,
921 {
922  const tint nM = mArg.msize();
923  const tint nN = mArg.nsize();
924 
925  // unlike "economy" mode, we need a copy here since Q will be m x m and this may be bigger than original A
926  basic_rmatrix<float> mA (nM, nM);
927 
928  // copy over argument matrix
929  mA.assign (CVM0, CVM0, mArg);
930 
931  basic_rvector<float> vTau (nN);
932 
933  tint row, col;
934  tint lWork = -1;
935  tint nOutInfo = 0;
936  float dWork;
937 
938  // calculate size of workspace
939  SGEQLF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
940  lWork = static_cast<tint>(dWork);
941  basic_rvector<float> vWork(lWork);
942 
943  // now do most of hardwork, find R and TAU and Householderish bits of Q
944  SGEQLF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
945  _check_negative(CVM_WRONGMKLARG, nOutInfo);
946 
947  // get lower triangular L which is now m x n from overwritten A
948  mL.vanish();
949  for (col = CVM0; col < nN + CVM0; ++col)
950  for (row = nM - nN + col; row < nM + CVM0; ++row)
951  mL(row,col) = mA(row,col);
952 
953  // calculate size of workspace for finding Q that is n x n
954  lWork = -1;
955  SORGQL (&nM, &nN, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
956  lWork = static_cast<tint>(dWork);
957  if (lWork > vWork.size()) vWork.resize(lWork);
958 
959  // find Q that is n x n
960  SORGQL (&nM, &nN, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
961  _check_negative(CVM_WRONGMKLARG, nOutInfo);
962 
963  for (row = CVM0; row < nM + CVM0; ++row)
964  for (col = CVM0; col < nN + CVM0; ++col)
965  mQ(row,nM - nN + col) = mA(row,col);
966 }
967 
968 // QL Case 2: full mode, A is (m x n) and Q is (m x m) and L is (m x n)
969 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M >= N
970 template <>
971 CVM_API void
972 __qlf<basic_rmatrix<double>, basic_srmatrix<double> >
973  (const basic_rmatrix<double>& mArg,
976 {
977  const tint nM = mArg.msize();
978  const tint nN = mArg.nsize();
979 
980  // unlike "economy" mode, we need a copy here since Q will be m x m and this may be bigger than original A
981  basic_rmatrix<double> mA (nM, nM);
982 
983  // copy over argument matrix
984  mA.assign (CVM0, CVM0, mArg);
985 
986  basic_rvector<double> vTau (nN);
987 
988  tint row, col;
989  tint lWork = -1;
990  tint nOutInfo = 0;
991  double dWork;
992 
993  // calculate size of workspace
994  DGEQLF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
995  lWork = static_cast<tint>(dWork);
996  basic_rvector<double> vWork(lWork);
997 
998  // now do most of hardwork, find R and TAU and Householderish bits of Q
999  DGEQLF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
1000  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1001 
1002  // get lower triangular L which is now m x n from overwritten A
1003  mL.vanish();
1004  for (col = CVM0; col < nN + CVM0; ++col)
1005  for (row = nM - nN + col; row < nM + CVM0; ++row)
1006  mL(row,col) = mA(row,col);
1007 
1008  // calculate size of workspace for finding Q that is n x n
1009  lWork = -1;
1010  DORGQL (&nM, &nN, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
1011  lWork = static_cast<tint>(dWork);
1012  if (lWork > vWork.size()) vWork.resize(lWork);
1013 
1014  // find Q that is n x n
1015  DORGQL (&nM, &nN, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
1016  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1017 
1018  for (row = CVM0; row < nM + CVM0; ++row)
1019  for (col = CVM0; col < nN + CVM0; ++col)
1020  mQ(row,nM - nN + col) = mA(row,col);
1021 }
1022 
1023 // LLS routines
1024 template <>
1025 CVM_API void
1026 __gels<basic_rmatrix<float>, basic_rvector<float> >
1027  (bool transpose,
1029  const basic_rmatrix<float>& mB,
1030  basic_rmatrix<float>& mX, // output: will be resized anyway, so better pass it empty
1031  basic_rvector<float>& vErr) throw (cvmexception)
1032 {
1033  const tint nM = mA.msize();
1034  const tint nN = mA.nsize();
1035  const tint nL = _cvm_min<tint>(nM, nN);
1036  const tint nK = _cvm_max<tint>(nM, nN);
1037  const tint nrhs = mB.nsize();
1038  tint lWork = -1;
1039  tint nOutInfo = 0;
1040  float dWork;
1041  const char* trans = transpose ? Chars::pT() : Chars::pN();
1042 
1043  mX.resize(nK, nrhs); // first we might need extra space for residuals
1044  mX.assign(CVM0, CVM0, mB);
1045 
1046  // calculate size of workspace
1047  SGELS (trans,
1048 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
1049  1,
1050 #endif
1051  &nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(), &dWork, &lWork, &nOutInfo);
1052  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1053  lWork = static_cast<tint>(dWork);
1054  basic_rvector<float> vWork(lWork);
1055 
1056  SGELS (trans,
1057 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
1058  1,
1059 #endif
1060  &nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(), vWork, &lWork, &nOutInfo);
1061  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1062 
1063  // collecting residuals if applicable
1064  vErr.set(0.F);
1065  if ((!transpose && nM > nN) || (transpose && nM < nN))
1066  {
1067  for (tint col = CVM0; col < nrhs + CVM0; col++)
1068  {
1069  for (tint row = nL + CVM0; row < nK + CVM0; row++)
1070  {
1071  vErr[col] += mX(row, col)*mX(row, col);
1072  }
1073  }
1074  mX.resize(nL, nrhs);
1075  }
1076 }
1077 
1078 template <>
1079 CVM_API void
1080 __gels<basic_rmatrix<double>, basic_rvector<double> >
1081  (bool transpose,
1083  const basic_rmatrix<double>& mB,
1084  basic_rmatrix<double>& mX, // output: will be resized anyway, so better pass it empty
1085  basic_rvector<double>& vErr) throw (cvmexception)
1086 {
1087  const tint nM = mA.msize();
1088  const tint nN = mA.nsize();
1089  const tint nL = _cvm_min<tint>(nM, nN);
1090  const tint nK = _cvm_max<tint>(nM, nN);
1091  const tint nrhs = mB.nsize();
1092  tint lWork = -1;
1093  tint nOutInfo = 0;
1094  double dWork;
1095  const char* trans = transpose ? Chars::pT() : Chars::pN();
1096 
1097  mX.resize(nK, nrhs); // first we might need extra space for residuals
1098  mX.assign(CVM0, CVM0, mB);
1099 
1100  // calculate size of workspace
1101  DGELS (trans,
1102 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
1103  1,
1104 #endif
1105  &nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(), &dWork, &lWork, &nOutInfo);
1106  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1107  lWork = static_cast<tint>(dWork);
1108  basic_rvector<double> vWork(lWork);
1109 
1110  DGELS (trans,
1111 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
1112  1,
1113 #endif
1114  &nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(), vWork, &lWork, &nOutInfo);
1115  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1116 
1117  // collecting residuals if applicable
1118  vErr.set(0.);
1119  if ((!transpose && nM > nN) || (transpose && nM < nN))
1120  {
1121  for (tint col = CVM0; col < nrhs + CVM0; col++)
1122  {
1123  for (tint row = nL + CVM0; row < nK + CVM0; row++)
1124  {
1125  vErr[col] += mX(row, col)*mX(row, col);
1126  }
1127  }
1128  mX.resize(nL, nrhs);
1129  }
1130 }
1131 
1132 template <>
1133 CVM_API void
1134 __gelsy<float, basic_rmatrix<float> >
1136  const basic_rmatrix<float>& mB,
1137  basic_rmatrix<float>& mX, // output: will be resized anyway, so better pass it empty
1138  float rcond,
1139  tint& rank) throw (cvmexception)
1140 {
1141  const tint nM = mA.msize();
1142  const tint nN = mA.nsize();
1143  const tint nrhs = mB.nsize();
1144  const tint nK = _cvm_max<tint>(nM, nN);
1145  tint lWork = -1;
1146  tint nOutInfo = 0;
1147  basic_array<tint,tint> jpvt(nN);
1148  float dWork;
1149 
1150  mX.resize(nK, nrhs); // we might need extra space here
1151  mX.assign(CVM0, CVM0, mB);
1152 
1153  // calculate size of workspace
1154  SGELSY (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1155  jpvt, &rcond, &rank, &dWork, &lWork, &nOutInfo);
1156  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1157  lWork = static_cast<tint>(dWork);
1158  basic_rvector<float> vWork(lWork);
1159 
1160  SGELSY (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1161  jpvt, &rcond, &rank, vWork, &lWork, &nOutInfo);
1162  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1163 
1164  mX.resize(nN, nrhs);
1165 }
1166 
1167 template <>
1168 CVM_API void
1169 __gelsy<double, basic_rmatrix<double> >
1171  const basic_rmatrix<double>& mB,
1172  basic_rmatrix<double>& mX, // output: will be resized anyway, so better pass it empty
1173  double rcond,
1174  tint& rank) throw (cvmexception)
1175 {
1176  const tint nM = mA.msize();
1177  const tint nN = mA.nsize();
1178  const tint nrhs = mB.nsize();
1179  const tint nK = _cvm_max<tint>(nM, nN);
1180  tint lWork = -1;
1181  tint nOutInfo = 0;
1182  basic_array<tint,tint> jpvt(nN);
1183  double dWork;
1184 
1185  mX.resize(nK, nrhs); // first we might need extra space for residuals
1186  mX.assign(CVM0, CVM0, mB);
1187 
1188  // calculate size of workspace
1189  DGELSY (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1190  jpvt, &rcond, &rank, &dWork, &lWork, &nOutInfo);
1191  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1192  lWork = static_cast<tint>(dWork);
1193  basic_rvector<double> vWork(lWork);
1194 
1195  DGELSY (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1196  jpvt, &rcond, &rank, vWork, &lWork, &nOutInfo);
1197  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1198 
1199  mX.resize(nN, nrhs);
1200 }
1201 
1202 template <>
1203 CVM_API void
1204 __gelss<float, basic_rvector<float>, basic_rmatrix<float> >
1206  const basic_rmatrix<float>& mB,
1207  basic_rmatrix<float>& mX, // output: will be resized anyway, so better pass it empty
1208  float rcond,
1209  basic_rvector<float>& vSV, // incr=1 required
1210  tint& rank) throw (cvmexception)
1211 {
1212  const tint nM = mA.msize();
1213  const tint nN = mA.nsize();
1214  const tint nrhs = mB.nsize();
1215  const tint nK = _cvm_max<tint>(nM, nN);
1216  tint lWork = -1;
1217  tint nOutInfo = 0;
1218  float dWork;
1219 
1220  mX.resize(nK, nrhs); // we might need extra space here
1221  mX.assign(CVM0, CVM0, mB);
1222 
1223  // calculate size of workspace
1224  SGELSS (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1225  vSV, &rcond, &rank, &dWork, &lWork, &nOutInfo);
1226  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1227  lWork = static_cast<tint>(dWork);
1228  basic_rvector<float> vWork(lWork);
1229 
1230  SGELSS (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1231  vSV, &rcond, &rank, vWork, &lWork, &nOutInfo);
1232  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1233 
1234  mX.resize(nN, nrhs);
1235 }
1236 
1237 template <>
1238 CVM_API void
1239 __gelss<double, basic_rvector<double>, basic_rmatrix<double> >
1241  const basic_rmatrix<double>& mB,
1242  basic_rmatrix<double>& mX, // output: will be resized anyway, so better pass it empty
1243  double rcond,
1244  basic_rvector<double>& vSV, // incr=1 required
1245  tint& rank) throw (cvmexception)
1246 {
1247  const tint nM = mA.msize();
1248  const tint nN = mA.nsize();
1249  const tint nrhs = mB.nsize();
1250  const tint nK = _cvm_max<tint>(nM, nN);
1251  tint lWork = -1;
1252  tint nOutInfo = 0;
1253  double dWork;
1254 
1255  mX.resize(nK, nrhs); // we might need extra space here
1256  mX.assign(CVM0, CVM0, mB);
1257 
1258  // calculate size of workspace
1259  DGELSS (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1260  vSV, &rcond, &rank, &dWork, &lWork, &nOutInfo);
1261  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1262  lWork = static_cast<tint>(dWork);
1263  basic_rvector<double> vWork(lWork);
1264 
1265  DGELSS (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1266  vSV, &rcond, &rank, vWork, &lWork, &nOutInfo);
1267  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1268 
1269  mX.resize(nN, nrhs);
1270 }
1271 
1272 template <>
1273 CVM_API void
1274 __gelsd<float, basic_rvector<float>, basic_rmatrix<float> >
1276  const basic_rmatrix<float>& mB,
1277  basic_rmatrix<float>& mX, // output: will be resized anyway, so better pass it empty
1278  float rcond,
1279  basic_rvector<float>& vSV, // incr=1 required
1280  tint& rank) throw (cvmexception)
1281 {
1282  const tint nM = mA.msize();
1283  const tint nN = mA.nsize();
1284  const tint nrhs = mB.nsize();
1285  const tint nK = _cvm_max<tint>(nM, nN);
1286  tint lWork = -1;
1287  tint nOutInfo = 0;
1288  float dWork;
1289  tint iWork = -1;
1290 
1291  mX.resize(nK, nrhs); // we might need extra space here
1292  mX.assign(CVM0, CVM0, mB);
1293 
1294  // calculate size of workspace
1295  SGELSD (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1296  vSV, &rcond, &rank, &dWork, &lWork, &iWork, &nOutInfo);
1297  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1298  lWork = static_cast<tint>(dWork);
1299  basic_rvector<float> vWork(lWork);
1300  basic_array<tint,tint> viWork(iWork);
1301 
1302  SGELSD (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1303  vSV, &rcond, &rank, vWork, &lWork, viWork, &nOutInfo);
1304  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1305 
1306  mX.resize(nN, nrhs);
1307 }
1308 
1309 template <>
1310 CVM_API void
1311 __gelsd<double, basic_rvector<double>, basic_rmatrix<double> >
1313  const basic_rmatrix<double>& mB,
1314  basic_rmatrix<double>& mX, // output: will be resized anyway, so better pass it empty
1315  double rcond,
1316  basic_rvector<double>& vSV, // incr=1 required
1317  tint& rank) throw (cvmexception)
1318 {
1319  const tint nM = mA.msize();
1320  const tint nN = mA.nsize();
1321  const tint nrhs = mB.nsize();
1322  const tint nK = _cvm_max<tint>(nM, nN);
1323  tint lWork = -1;
1324  tint nOutInfo = 0;
1325  double dWork;
1326  tint iWork = -1;
1327 
1328  mX.resize(nK, nrhs); // we might need extra space here
1329  mX.assign(CVM0, CVM0, mB);
1330 
1331  // calculate size of workspace
1332  DGELSD (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1333  vSV, &rcond, &rank, &dWork, &lWork, &iWork, &nOutInfo);
1334  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1335  lWork = static_cast<tint>(dWork);
1336  basic_rvector<double> vWork(lWork);
1337  basic_array<tint,tint> viWork(iWork);
1338 
1339  DGELSD (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1340  vSV, &rcond, &rank, vWork, &lWork, viWork, &nOutInfo);
1341  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1342 
1343  mX.resize(nN, nrhs);
1344 }
1345