CVM Class Library  8.1
This C++ class library encapsulates concepts of vector and different matrices including square, band, symmetric and hermitian ones in Euclidean space of real and complex numbers.
 All Classes Files Functions Variables Typedefs Friends Macros Pages
cmatrix.cpp
Go to the documentation of this file.
1 // CVM Class Library
2 // http://cvmlib.com
3 //
4 // Copyright Sergei Nikolaev 1992-2014
5 // Distributed under the Boost Software License, Version 1.0.
6 // (See accompanying file LICENSE_1_0.txt or copy at
7 // http://www.boost.org/LICENSE_1_0.txt)
8 
9 #include "cvm.h"
10 #include "blas.h"
11 
13 
15 template<>
16 CVM_API void
17 __gemm<std::complex<float>, basic_cmatrix<float, std::complex<float> > >
18  (const basic_cmatrix<float, std::complex<float> >& ml, bool bTrans1,
19  const basic_cmatrix<float, std::complex<float> >& mr, bool bTrans2,
20  std::complex<float> dAlpha,
22  std::complex<float> dBeta)
23 {
24 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
25  CGEMM (bTrans1 ? Chars::pC() : Chars::pN(), 1, bTrans2 ? Chars::pC() : Chars::pN(), 1,
26 #else
27  CGEMM (bTrans1 ? Chars::pC() : Chars::pN(), bTrans2 ? Chars::pC() : Chars::pN(),
28 #endif
29  bTrans1 ? ml._pn() : ml._pm(),
30  bTrans2 ? mr._pm() : mr._pn(),
31  bTrans1 ? ml._pm() : ml._pn(),
32  &dAlpha,
33  ml._pd(), ml._pldm(),
34  mr._pd(), mr._pldm(),
35  &dBeta,
36  mRes, mRes._pld());
37 }
38 
39 template<>
40 CVM_API void
41 __gemm<std::complex<double>, basic_cmatrix<double, std::complex<double> > >
42  (const basic_cmatrix<double, std::complex<double> >& ml, bool bTrans1,
43  const basic_cmatrix<double, std::complex<double> >& mr, bool bTrans2,
44  std::complex<double> dAlpha,
46  std::complex<double> dBeta)
47 {
48 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
49  ZGEMM (bTrans1 ? Chars::pC() : Chars::pN(), 1, bTrans2 ? Chars::pC() : Chars::pN(), 1,
50 #else
51  ZGEMM (bTrans1 ? Chars::pC() : Chars::pN(), bTrans2 ? Chars::pC() : Chars::pN(),
52 #endif
53  bTrans1 ? ml._pn() : ml._pm(),
54  bTrans2 ? mr._pm() : mr._pn(),
55  bTrans1 ? ml._pm() : ml._pn(),
56  &dAlpha,
57  ml._pd(), ml._pldm(),
58  mr._pd(), mr._pldm(),
59  &dBeta,
60  mRes, mRes._pld());
61 }
62 
63 template<>
64 CVM_API void
66  (bool bLeft,
67  const basic_schmatrix<float, std::complex<float> >& ml,
69  std::complex<float> dAlpha,
71  std::complex<float> dBeta)
72 {
73  CHEMM (bLeft ? Chars::pL() : Chars::pR(),
74 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
75  1,
76 #endif
77  Chars::pU(),
78 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
79  1,
80 #endif
81  mRes._pm(), mRes._pn(),
82  &dAlpha,
83  ml._pd(), ml._pld(),
84  mr._pd(), mr._pld(),
85  &dBeta,
86  mRes, mRes._pld());
87 }
88 
89 template<>
90 CVM_API void
92  (bool bLeft,
93  const basic_schmatrix<double, std::complex<double> >& ml,
95  std::complex<double> dAlpha,
97  std::complex<double> dBeta)
98 {
99  ZHEMM (bLeft ? Chars::pL() : Chars::pR(),
100 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
101  1,
102 #endif
103  Chars::pU(),
104 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
105  1,
106 #endif
107  mRes._pm(), mRes._pn(),
108  &dAlpha,
109  ml._pd(), ml._pld(),
110  mr._pd(), mr._pld(),
111  &dBeta,
112  mRes, mRes._pld());
113 }
114 
115 template <>
116 CVM_API void
117 __herk<float, std::complex<float>, basic_schmatrix<float, std::complex<float> > >
118  (bool bTransp,
119  float alpha, tint k,
120  const std::complex<float>* pA, tint ldA,
121  float beta, basic_schmatrix<float, std::complex<float> >& m)
122 {
123  CHERK (Chars::pU(),
124 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
125  1,
126 #endif
127  bTransp ? Chars::pC() : Chars::pN(),
128 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
129  1,
130 #endif
131  m._pm(), &k, &alpha, pA, &ldA, &beta, m, m._pld());
132 }
133 
134 template <>
135 CVM_API void
136 __herk<double, std::complex<double>, basic_schmatrix<double, std::complex<double> > >
137  (bool bTransp,
138  double alpha, tint k,
139  const std::complex<double>* pA, tint ldA,
140  double beta, basic_schmatrix<double, std::complex<double> >& m)
141 {
142  ZHERK (Chars::pU(),
143 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
144  1,
145 #endif
146  bTransp ? Chars::pC() : Chars::pN(),
147 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
148  1,
149 #endif
150  m._pm(), &k, &alpha, pA, &ldA, &beta, m, m._pld());
151 }
152 
153 template <>
154 CVM_API void
155 __her2k<float, std::complex<float>, basic_schmatrix<float, std::complex<float> > >
156  (bool bTransp,
157  std::complex<float> alpha, tint k,
158  const std::complex<float>* pA, tint ldA,
159  const std::complex<float>* pB, tint ldB,
160  float beta, basic_schmatrix<float, std::complex<float> >& m)
161 {
162  CHER2K (Chars::pU(),
163 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
164  1,
165 #endif
166  bTransp ? Chars::pC() : Chars::pN(),
167 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
168  1,
169 #endif
170  m._pm(), &k, &alpha, pA, &ldA, pB, &ldB, &beta, m, m._pld());
171 }
172 
173 template <>
174 CVM_API void
175 __her2k<double, std::complex<double>, basic_schmatrix<double, std::complex<double> > >
176  (bool bTransp,
177  std::complex<double> alpha, tint k,
178  const std::complex<double>* pA, tint ldA,
179  const std::complex<double>* pB, tint ldB,
180  double beta, basic_schmatrix<double, std::complex<double> >& m)
181 {
182  ZHER2K (Chars::pU(),
183 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
184  1,
185 #endif
186  bTransp ? Chars::pC() : Chars::pN(),
187 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
188  1,
189 #endif
190  m._pm(), &k, &alpha, pA, &ldA, pB, &ldB, &beta, m, m._pld());
191 }
192 
193 // QR Case 1: economy mode, A is (m x n) and Q is (m x n) and R is (n x n)
194 template <>
195 CVM_API void
196 __qre<basic_cmatrix<float, std::complex<float> >, basic_scmatrix<float, std::complex<float> > >
200 {
201  const tint nM = mArg.msize();
202  const tint nN = mArg.nsize();
203  const tint nK = _cvm_min<tint>(nM, nN);
204 
205  // we will eventually overwrite mQ to be the output matrix
206  mQ = mArg;
208 
209  tint lWork = -1;
210  tint nOutInfo = 0;
211  std::complex<float> dWork;
212 
213  // calculate size of workspace
214  CGEQRF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
215  lWork = static_cast<tint>(dWork.real());
217 
218  // now do most of hardwork, find R and TAU and Householderish bits of Q
219  CGEQRF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
220  _check_negative(CVM_WRONGMKLARG, nOutInfo);
221 
222  // get upper-triangular R from overwritten A
223  mR.vanish();
224  for (tint row = CVM0; row < nK + CVM0; ++row)
225  for (tint col = row; col < nN + CVM0; ++col)
226  mR(row,col) = mQ(row,col);
227 
228  // calculate size of workspace for finding Q
229  lWork = -1;
230  CUNGQR (&nM, &nK, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
231  lWork = static_cast<tint>(dWork.real());
232  if (lWork > vWork.size()) vWork.resize(lWork);
233 
234  // find Q
235  CUNGQR (&nM, &nK, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
236  _check_negative(CVM_WRONGMKLARG, nOutInfo);
237 }
238 
239 // QR Case 1: economy mode, A is (m x n) and Q is (m x n) and R is (n x n)
240 template <>
241 CVM_API void
242 __qre<basic_cmatrix<double, std::complex<double> >, basic_scmatrix<double, std::complex<double> > >
246 {
247  const tint nM = mArg.msize();
248  const tint nN = mArg.nsize();
249  const tint nK = _cvm_min<tint>(nM, nN);
250 
251  // we will eventually overwrite mQ to be the output matrix
252  mQ = mArg;
254 
255  tint lWork = -1;
256  tint nOutInfo = 0;
257  std::complex<double> dWork;
258 
259  // calculate size of workspace
260  ZGEQRF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
261  lWork = static_cast<tint>(dWork.real());
263 
264  // now do most of hardwork, find R and TAU and Householderish bits of Q
265  ZGEQRF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
266  _check_negative(CVM_WRONGMKLARG, nOutInfo);
267 
268  // get upper-triangular R from overwritten A
269  mR.vanish();
270  for (tint row = CVM0; row < nK + CVM0; ++row)
271  for (tint col = row; col < nN + CVM0; ++col)
272  mR(row,col) = mQ(row,col);
273 
274  // calculate size of workspace for finding Q
275  lWork = -1;
276  ZUNGQR (&nM, &nK, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
277  lWork = static_cast<tint>(dWork.real());
278  if (lWork > vWork.size()) vWork.resize(lWork);
279 
280  // find Q
281  ZUNGQR (&nM, &nK, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
282  _check_negative(CVM_WRONGMKLARG, nOutInfo);
283 }
284 
285 // QR Case 2: full mode, A is (m x n) and Q is (m x m) and R is (m x n)
286 template <>
287 CVM_API void
288 __qrf<basic_cmatrix<float, std::complex<float> >, basic_scmatrix<float, std::complex<float> > >
292 {
293  const tint nM = mArg.msize();
294  const tint nN = mArg.nsize();
295  const tint nK = _cvm_min<tint>(nM, nN);
296 
297  // unlike economy mode, we need a copy here since Q will be m x m and this may be bigger than original A
298  basic_cmatrix<float, std::complex<float> > mA (nM, nN <= nM ? nM : nN);
299 
300  // copy over argument matrix
301  mA.assign (CVM0, CVM0, mArg);
302 
304 
305  tint row, col;
306  tint lWork = -1;
307  tint nOutInfo = 0;
308  std::complex<float> dWork;
309 
310  // calculate size of workspace
311  CGEQRF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
312  lWork = static_cast<tint>(dWork.real());
314 
315  // now do most of hardwork, find R and TAU and Householderish bits of Q
316  CGEQRF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
317  _check_negative(CVM_WRONGMKLARG, nOutInfo);
318 
319  // get upper-triangular R which is now m x n from overwritten A
320  mR.vanish();
321  for (row = CVM0; row < nK + CVM0; ++row)
322  for (col = row; col < nN + CVM0; ++col)
323  mR(row,col) = mA(row,col);
324 
325  // calculate size of workspace for finding Q that is m x m
326  lWork = -1;
327  CUNGQR (&nM, &nM, &nK, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
328  lWork = static_cast<tint>(dWork.real());
329  if (lWork > vWork.size()) vWork.resize(lWork);
330 
331  // find Q that is m x m
332  CUNGQR (&nM, &nM, &nK, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
333  _check_negative(CVM_WRONGMKLARG, nOutInfo);
334 
335  // is Q big enough to have all conents of mA ?
336  if (nN <= nM)
337  mQ.assign (CVM0, CVM0, mA);
338  else
339  for (row = CVM0; row < nM + CVM0; ++row)
340  for (col = CVM0; col < nM + CVM0; ++col)
341  mQ(row,col) = mA(row,col);
342 }
343 
344 // QR Case 2: full mode, A is (m x n) and Q is (m x m) and R is (m x n)
345 template <>
346 CVM_API void
347 __qrf<basic_cmatrix<double, std::complex<double> >, basic_scmatrix<double, std::complex<double> > >
351 {
352  const tint nM = mArg.msize();
353  const tint nN = mArg.nsize();
354  const tint nK = _cvm_min<tint>(nM, nN);
355 
356  // unlike economy mode, we need a copy here since Q will be m x m and this may be bigger than original A
357  basic_cmatrix<double, std::complex<double> > mA (nM, nN <= nM ? nM : nN);
358 
359  // copy over argument matrix
360  mA.assign (CVM0, CVM0, mArg);
361 
363 
364  tint row, col;
365  tint lWork = -1;
366  tint nOutInfo = 0;
367  std::complex<double> dWork;
368 
369  // calculate size of workspace
370  ZGEQRF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
371  lWork = static_cast<tint>(dWork.real());
373 
374  // now do most of hardwork, find R and TAU and Householderish bits of Q
375  ZGEQRF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
376  _check_negative(CVM_WRONGMKLARG, nOutInfo);
377 
378  // get upper-triangular R which is now m x n from overwritten A
379  mR.vanish();
380  for (row = CVM0; row < nK + CVM0; ++row)
381  for (col = row; col < nN + CVM0; ++col)
382  mR(row,col) = mA(row,col);
383 
384  // calculate size of workspace for finding Q that is m x m
385  lWork = -1;
386  ZUNGQR (&nM, &nM, &nK, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
387  lWork = static_cast<tint>(dWork.real());
388  if (lWork > vWork.size()) vWork.resize(lWork);
389 
390  // find Q that is m x m
391  ZUNGQR (&nM, &nM, &nK, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
392  _check_negative(CVM_WRONGMKLARG, nOutInfo);
393 
394  // is Q big enough to have all conents of mA ?
395  if (nN <= nM)
396  mQ.assign (CVM0, CVM0, mA);
397  else
398  for (row = CVM0; row < nM + CVM0; ++row)
399  for (col = CVM0; col < nM + CVM0; ++col)
400  mQ(row,col) = mA(row,col);
401 }
402 
403 // RQ Case 1: economy mode, A is (m x n) and Q is (m x n) and R is (m x m)
404 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M <= N
405 template <>
406 CVM_API void
407 __rqe<basic_cmatrix<float, std::complex<float> >, basic_scmatrix<float, std::complex<float> > >
411 {
412  const tint nM = mArg.msize();
413  const tint nN = mArg.nsize();
414 
415  // we will eventually overwrite mQ to be the output matrix
416  mQ = mArg;
417 
419 
420  tint row, col;
421  tint lWork = -1;
422  tint nOutInfo = 0;
423  std::complex<float> dWork;
424 
425  // calculate size of workspace
426  CGERQF (&nM, &nN, mQ, mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
427  lWork = static_cast<tint>(dWork.real());
429 
430  // now do most of hardwork, find R and TAU and Householderish bits of Q
431  CGERQF (&nM, &nN, mQ, mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
432  _check_negative(CVM_WRONGMKLARG, nOutInfo);
433 
434  // get upper-triangular R which is now m x m from overwritten A
435  mR.vanish();
436  for (row = CVM0; row < nM + CVM0; ++row)
437  for (col = row; col < nM + CVM0; ++col)
438  mR(row,col) = mQ(row,nN - nM + col);
439 
440  // calculate size of workspace for finding Q that is m x n
441  lWork = -1;
442  CUNGRQ (&nM, &nN, &nM, mQ, mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
443  lWork = static_cast<tint>(dWork.real());
444  if (lWork > vWork.size()) vWork.resize(lWork);
445 
446  // find Q that is m x n
447  CUNGRQ (&nM, &nN, &nM, mQ, mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
448  _check_negative(CVM_WRONGMKLARG, nOutInfo);
449 }
450 
451 // RQ Case 1: economy mode, A is (m x n) and Q is (m x n) and R is (m x m)
452 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M <= N
453 template <>
454 CVM_API void
455 __rqe<basic_cmatrix<double, std::complex<double> >, basic_scmatrix<double, std::complex<double> > >
459 {
460  const tint nM = mArg.msize();
461  const tint nN = mArg.nsize();
462 
463  // we will eventually overwrite mQ to be the output matrix
464  mQ = mArg;
465 
467 
468  tint row, col;
469  tint lWork = -1;
470  tint nOutInfo = 0;
471  std::complex<double> dWork;
472 
473  // calculate size of workspace
474  ZGERQF (&nM, &nN, mQ, mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
475  lWork = static_cast<tint>(dWork.real());
477 
478  // now do most of hardwork, find R and TAU and Householderish bits of Q
479  ZGERQF (&nM, &nN, mQ, mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
480  _check_negative(CVM_WRONGMKLARG, nOutInfo);
481 
482  // get upper-triangular R which is now m x m from overwritten A
483  mR.vanish();
484  for (row = CVM0; row < nM + CVM0; ++row)
485  for (col = row; col < nM + CVM0; ++col)
486  mR(row,col) = mQ(row,nN - nM + col);
487 
488  // calculate size of workspace for finding Q that is m x n
489  lWork = -1;
490  ZUNGRQ (&nM, &nN, &nM, mQ, mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
491  lWork = static_cast<tint>(dWork.real());
492  if (lWork > vWork.size()) vWork.resize(lWork);
493 
494  // find Q that is m x n
495  ZUNGRQ (&nM, &nN, &nM, mQ, mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
496  _check_negative(CVM_WRONGMKLARG, nOutInfo);
497 }
498 
499 // RQ Case 2: full mode, A is (m x n) and Q is (n x n) and R is (m x n)
500 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M <= N
501 template <>
502 CVM_API void
503 __rqf<basic_cmatrix<float, std::complex<float> >, basic_scmatrix<float, std::complex<float> > >
507 {
508  const tint nM = mArg.msize();
509  const tint nN = mArg.nsize();
510 
511  // unlike economy mode, we need a copy here since Q will be n x n and this may be bigger than original A
513 
514  // copy over argument matrix
515  mA.assign (CVM0, CVM0, mArg);
516 
518 
519  tint row, col;
520  tint lWork = -1;
521  tint nOutInfo = 0;
522  std::complex<float> dWork;
523 
524  // calculate size of workspace
525  CGERQF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
526  lWork = static_cast<tint>(dWork.real());
528 
529  // now do most of hardwork, find R and TAU and Householderish bits of Q
530  CGERQF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
531  _check_negative(CVM_WRONGMKLARG, nOutInfo);
532 
533  // get upper-trapezoidal (triangular) R which is now m x n from overwritten A
534  mR.vanish();
535  for (row = CVM0; row < nM + CVM0; ++row)
536  for (col = nN - nM + row; col < nN + CVM0; ++col)
537  mR(row,col) = mA(row,col);
538 
539  // calculate size of workspace for finding Q that is n x n
540  lWork = -1;
541  CUNGRQ (&nM, &nN, &nM, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
542  lWork = static_cast<tint>(dWork.real());
543  if (lWork > vWork.size()) vWork.resize(lWork);
544 
545  // find Q that is n x n
546  CUNGRQ (&nM, &nN, &nM, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
547  _check_negative(CVM_WRONGMKLARG, nOutInfo);
548 
549  for (row = CVM0; row < nM + CVM0; ++row)
550  for (col = CVM0; col < nN + CVM0; ++col)
551  mQ(nN - nM + row,col) = mA(row,col);
552 }
553 
554 // RQ Case 2: full mode, A is (m x n) and Q is (n x n) and R is (m x n)
555 // Following this definition: http://www.netlib.org/scalapack/slug/node57.html we assume that M <= N
556 template <>
557 CVM_API void
558 __rqf<basic_cmatrix<double, std::complex<double> >, basic_scmatrix<double, std::complex<double> > >
562 {
563  const tint nM = mArg.msize();
564  const tint nN = mArg.nsize();
565 
566  // unlike economy mode, we need a copy here since Q will be n x n and this may be bigger than original A
568 
569  // copy over argument matrix
570  mA.assign (CVM0, CVM0, mArg);
571 
573 
574  tint row, col;
575  tint lWork = -1;
576  tint nOutInfo = 0;
577  std::complex<double> dWork;
578 
579  // calculate size of workspace
580  ZGERQF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
581  lWork = static_cast<tint>(dWork.real());
583 
584  // now do most of hardwork, find R and TAU and Householderish bits of Q
585  ZGERQF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
586  _check_negative(CVM_WRONGMKLARG, nOutInfo);
587 
588  // get upper-trapezoidal (triangular) R which is now m x n from overwritten A
589  mR.vanish();
590  for (row = CVM0; row < nM + CVM0; ++row)
591  for (col = nN - nM + row; col < nN + CVM0; ++col)
592  mR(row,col) = mA(row,col);
593 
594  // calculate size of workspace for finding Q that is n x n
595  lWork = -1;
596  ZUNGRQ (&nM, &nN, &nM, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
597  lWork = static_cast<tint>(dWork.real());
598  if (lWork > vWork.size()) vWork.resize(lWork);
599 
600  // find Q that is n x n
601  ZUNGRQ (&nM, &nN, &nM, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
602  _check_negative(CVM_WRONGMKLARG, nOutInfo);
603 
604  for (row = CVM0; row < nM + CVM0; ++row)
605  for (col = CVM0; col < nN + CVM0; ++col)
606  mQ(nN - nM + row,col) = mA(row,col);
607 }
608 
609 // LQ Case 1: "economy" mode, A is (m x n) and L is (m x m) and Q is (m x n)
610 template <>
611 CVM_API void
612 __lqe<basic_cmatrix<float, std::complex<float> >, basic_scmatrix<float, std::complex<float> > >
616 {
617  const tint nM = mArg.msize();
618  const tint nN = mArg.nsize();
619  const tint nK = _cvm_min<tint>(nM, nN);
620 
621  // we will eventually overwrite mQ to be the output matrix
622  mQ = mArg;
624 
625  tint lWork = -1;
626  tint nOutInfo = 0;
627  std::complex<float> dWork;
628 
629  // calculate size of workspace
630  CGELQF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
631  lWork = static_cast<tint>(dWork.real());
633 
634  // now do most of hardwork, find R and TAU and Householderish bits of Q
635  CGELQF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
636  _check_negative(CVM_WRONGMKLARG, nOutInfo);
637 
638  // get lower-triangular L from overwritten A
639  mL.vanish();
640  for (tint col = CVM0; col < nK + CVM0; ++col)
641  for (tint row = col; row < nM + CVM0; ++row)
642  mL(row,col) = mQ(row,col);
643 
644  // calculate size of workspace for finding Q
645  lWork = -1;
646  CUNGLQ (&nK, &nN, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
647  lWork = static_cast<tint>(dWork.real());
648  if (lWork > vWork.size()) vWork.resize(lWork);
649 
650  // find Q
651  CUNGLQ (&nK, &nN, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
652  _check_negative(CVM_WRONGMKLARG, nOutInfo);
653 }
654 
655 // LQ Case 1: "economy" mode, A is (m x n) and L is (m x m) and Q is (m x n)
656 template <>
657 CVM_API void
658 __lqe<basic_cmatrix<double, std::complex<double> >, basic_scmatrix<double, std::complex<double> > >
662 {
663  const tint nM = mArg.msize();
664  const tint nN = mArg.nsize();
665  const tint nK = _cvm_min<tint>(nM, nN);
666 
667  // we will eventually overwrite mQ to be the output matrix
668  mQ = mArg;
670 
671  tint lWork = -1;
672  tint nOutInfo = 0;
673  std::complex<double> dWork;
674 
675  // calculate size of workspace
676  ZGELQF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
677  lWork = static_cast<tint>(dWork.real());
679 
680  // now do most of hardwork, find R and TAU and Householderish bits of Q
681  ZGELQF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
682  _check_negative(CVM_WRONGMKLARG, nOutInfo);
683 
684  // get lower-triangular L from overwritten A
685  mL.vanish();
686  for (tint col = CVM0; col < nK + CVM0; ++col)
687  for (tint row = col; row < nM + CVM0; ++row)
688  mL(row,col) = mQ(row,col);
689 
690  // calculate size of workspace for finding Q
691  lWork = -1;
692  ZUNGLQ (&nK, &nN, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
693  lWork = static_cast<tint>(dWork.real());
694  if (lWork > vWork.size()) vWork.resize(lWork);
695 
696  // find Q
697  ZUNGLQ (&nK, &nN, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
698  _check_negative(CVM_WRONGMKLARG, nOutInfo);
699 }
700 
701 // LQ Case 2: full mode, A is (m x n) and L is (m x n) and Q is (n x n)
702 template <>
703 CVM_API void
704 __lqf<basic_cmatrix<float, std::complex<float> >, basic_scmatrix<float, std::complex<float> > >
708 {
709  const tint nM = mArg.msize();
710  const tint nN = mArg.nsize();
711  const tint nK = _cvm_min<tint>(nM, nN);
712 
713  // unlike "economy" mode, we need a copy here since Q will be n x n and this may bigger than original A
714  basic_cmatrix<float, std::complex<float> > mA (nM <= nN ? nN : nM, nN);
715 
716  // copy over argument matrix
717  mA.assign (CVM0, CVM0, mArg);
718 
720 
721  tint row, col;
722  tint lWork = -1;
723  tint nOutInfo = 0;
724  std::complex<float> dWork;
725 
726  // calculate size of workspace
727  CGELQF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
728  lWork = static_cast<tint>(dWork.real());
730 
731  // now do most of hardwork, find R and TAU and Householderish bits of Q
732  CGELQF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
733  _check_negative(CVM_WRONGMKLARG, nOutInfo);
734 
735  // get lower-triangular L from overwritten A
736  mL.vanish();
737  for (col = CVM0; col < nK + CVM0; ++col)
738  for (row = col; row < nM + CVM0; ++row)
739  mL(row,col) = mA(row,col);
740 
741  // calculate size of workspace for finding Q
742  lWork = -1;
743  CUNGLQ (&nN, &nN, &nK, mA._pd(), mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
744  lWork = static_cast<tint>(dWork.real());
745  if (lWork > vWork.size()) vWork.resize(lWork);
746 
747  // find Q
748  CUNGLQ (&nN, &nN, &nK, mA._pd(), mA._pld(), vTau, vWork, &lWork, &nOutInfo);
749  _check_negative(CVM_WRONGMKLARG, nOutInfo);
750 
751  // is Q big enough to have all conents of mA ?
752  if (nM <= nN)
753  mQ.assign (CVM0, CVM0, mA);
754  else
755  for (row = CVM0; row < nN + CVM0; ++row)
756  for (col = CVM0; col < nN + CVM0; ++col)
757  mQ(row,col) = mA(row,col);
758 }
759 
760 // LQ Case 2: full mode, A is (m x n) and L is (m x n) and Q is (n x n)
761 template <>
762 CVM_API void
763 __lqf<basic_cmatrix<double, std::complex<double> >, basic_scmatrix<double, std::complex<double> > >
767 {
768  const tint nM = mArg.msize();
769  const tint nN = mArg.nsize();
770  const tint nK = _cvm_min<tint>(nM, nN);
771 
772  // unlike "economy" mode, we need a copy here since Q will be n x n and this may bigger than original A
773  basic_cmatrix<double, std::complex<double> > mA (nM <= nN ? nN : nM, nN);
774 
775  // copy over argument matrix
776  mA.assign (CVM0, CVM0, mArg);
777 
779 
780  tint row, col;
781  tint lWork = -1;
782  tint nOutInfo = 0;
783  std::complex<double> dWork;
784 
785  // calculate size of workspace
786  ZGELQF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
787  lWork = static_cast<tint>(dWork.real());
789 
790  // now do most of hardwork, find R and TAU and Householderish bits of Q
791  ZGELQF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
792  _check_negative(CVM_WRONGMKLARG, nOutInfo);
793 
794  // get lower-triangular L from overwritten A
795  mL.vanish();
796  for (col = CVM0; col < nK + CVM0; ++col)
797  for (row = col; row < nM + CVM0; ++row)
798  mL(row,col) = mA(row,col);
799 
800  // calculate size of workspace for finding Q
801  lWork = -1;
802  ZUNGLQ (&nN, &nN, &nK, mA._pd(), mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
803  lWork = static_cast<tint>(dWork.real());
804  if (lWork > vWork.size()) vWork.resize(lWork);
805 
806  // find Q
807  ZUNGLQ (&nN, &nN, &nK, mA._pd(), mA._pld(), vTau, vWork, &lWork, &nOutInfo);
808  _check_negative(CVM_WRONGMKLARG, nOutInfo);
809 
810  // is Q big enough to have all conents of mA ?
811  if (nM <= nN)
812  mQ.assign (CVM0, CVM0, mA);
813  else
814  for (row = CVM0; row < nN + CVM0; ++row)
815  for (col = CVM0; col < nN + CVM0; ++col)
816  mQ(row,col) = mA(row,col);
817 }
818 
819 
820 // QL Case 1: "economy" mode, A is (m x n) and Q is (m x n) and L is (n x n)
821 template <>
822 CVM_API void
823 __qle<basic_cmatrix<float, std::complex<float> >, basic_scmatrix<float, std::complex<float> > >
827 {
828  const tint nM = mArg.msize();
829  const tint nN = mArg.nsize();
830  const tint nK = _cvm_min<tint>(nM, nN);
831 
832  // we will eventually overwrite mQ to be the output matrix
833  mQ = mArg;
835 
836  tint lWork = -1;
837  tint nOutInfo = 0;
838  std::complex<float> dWork;
839 
840  // calculate size of workspace
841  CGEQLF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
842  lWork = static_cast<tint>(dWork.real());
844 
845  // now do most of hardwork, find R and TAU and Householderish bits of Q
846  CGEQLF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
847  _check_negative(CVM_WRONGMKLARG, nOutInfo);
848 
849  // get lower-triangular L from overwritten A
850  mL.vanish();
851  for (tint col = CVM0; col < nN + CVM0; ++col)
852  for (tint row = col; row < nK + CVM0; ++row)
853  mL(row,col) = mQ(nM - nN + row,col);
854 
855  // calculate size of workspace for finding Q
856  lWork = -1;
857  CUNGQL (&nM, &nN, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
858  lWork = static_cast<tint>(dWork.real());
859  if (lWork > vWork.size()) vWork.resize(lWork);
860 
861  // find Q
862  CUNGQL (&nM, &nN, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
863  _check_negative(CVM_WRONGMKLARG, nOutInfo);
864 }
865 
866 
867 // QL Case 1: "economy" mode, A is (m x n) and Q is (m x n) and L is (n x n)
868 template <>
869 CVM_API void
870 __qle<basic_cmatrix<double, std::complex<double> >, basic_scmatrix<double, std::complex<double> > >
874 {
875  const tint nM = mArg.msize();
876  const tint nN = mArg.nsize();
877  const tint nK = _cvm_min<tint>(nM, nN);
878 
879  // we will eventually overwrite mQ to be the output matrix
880  mQ = mArg;
882 
883  tint lWork = -1;
884  tint nOutInfo = 0;
885  std::complex<double> dWork;
886 
887  // calculate size of workspace
888  ZGEQLF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
889  lWork = static_cast<tint>(dWork.real());
891 
892  // now do most of hardwork, find R and TAU and Householderish bits of Q
893  ZGEQLF (&nM, &nN, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
894  _check_negative(CVM_WRONGMKLARG, nOutInfo);
895 
896  // get lower-triangular L from overwritten A
897  mL.vanish();
898  for (tint col = CVM0; col < nN + CVM0; ++col)
899  for (tint row = col; row < nK + CVM0; ++row)
900  mL(row,col) = mQ(nM - nN + row,col);
901 
902  // calculate size of workspace for finding Q
903  lWork = -1;
904  ZUNGQL (&nM, &nN, &nK, mQ._pd(), mQ._pld(), vTau, &dWork, &lWork, &nOutInfo);
905  lWork = static_cast<tint>(dWork.real());
906  if (lWork > vWork.size()) vWork.resize(lWork);
907 
908  // find Q
909  ZUNGQL (&nM, &nN, &nK, mQ._pd(), mQ._pld(), vTau, vWork, &lWork, &nOutInfo);
910  _check_negative(CVM_WRONGMKLARG, nOutInfo);
911 }
912 
913 
914 // QL Case 2: full mode, A is (m x n) and Q is (m x m) and L is (m x n)
915 template <>
916 CVM_API void
917 __qlf<basic_cmatrix<float, std::complex<float> >, basic_scmatrix<float, std::complex<float> > >
921 {
922  const tint nM = mArg.msize();
923  const tint nN = mArg.nsize();
924 
925  // unlike "economy" mode, we need a copy here since Q will be m x m and this may be bigger than original A
927 
928  // copy over argument matrix
929  mA.assign (CVM0, CVM0, mArg);
930 
932 
933  tint row, col;
934  tint lWork = -1;
935  tint nOutInfo = 0;
936  std::complex<float> dWork;
937 
938  // calculate size of workspace
939  CGEQLF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
940  lWork = static_cast<tint>(dWork.real());
942 
943  // now do most of hardwork, find R and TAU and Householderish bits of Q
944  CGEQLF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
945  _check_negative(CVM_WRONGMKLARG, nOutInfo);
946 
947  // get lower triangular L which is now m x n from overwritten A
948  mL.vanish();
949  for (col = CVM0; col < nN + CVM0; ++col)
950  for (row = nM - nN + col; row < nM + CVM0; ++row)
951  mL(row,col) = mA(row,col);
952 
953  // calculate size of workspace for finding Q that is n x n
954  lWork = -1;
955  CUNGQL (&nM, &nN, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
956  lWork = static_cast<tint>(dWork.real());
957  if (lWork > vWork.size()) vWork.resize(lWork);
958 
959  // find Q that is n x n
960  CUNGQL (&nM, &nN, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
961  _check_negative(CVM_WRONGMKLARG, nOutInfo);
962 
963  for (row = CVM0; row < nM + CVM0; ++row)
964  for (col = CVM0; col < nN + CVM0; ++col)
965  mQ(row,nM - nN + col) = mA(row,col);
966 }
967 
968 
969 // QL Case 2: full mode, A is (m x n) and Q is (m x m) and L is (m x n)
970 template <>
971 CVM_API void
972 __qlf<basic_cmatrix<double, std::complex<double> >, basic_scmatrix<double, std::complex<double> > >
976 {
977  const tint nM = mArg.msize();
978  const tint nN = mArg.nsize();
979 
980  // unlike "economy" mode, we need a copy here since Q will be m x m and this may be bigger than original A
982 
983  // copy over argument matrix
984  mA.assign (CVM0, CVM0, mArg);
985 
987 
988  tint row, col;
989  tint lWork = -1;
990  tint nOutInfo = 0;
991  std::complex<double> dWork;
992 
993  // calculate size of workspace
994  ZGEQLF (&nM, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
995  lWork = static_cast<tint>(dWork.real());
997 
998  // now do most of hardwork, find R and TAU and Householderish bits of Q
999  ZGEQLF (&nM, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
1000  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1001 
1002  // get lower triangular L which is now m x n from overwritten A
1003  mL.vanish();
1004  for (col = CVM0; col < nN + CVM0; ++col)
1005  for (row = nM - nN + col; row < nM + CVM0; ++row)
1006  mL(row,col) = mA(row,col);
1007 
1008  // calculate size of workspace for finding Q that is n x n
1009  lWork = -1;
1010  ZUNGQL (&nM, &nN, &nN, mA, mA._pld(), vTau, &dWork, &lWork, &nOutInfo);
1011  lWork = static_cast<tint>(dWork.real());
1012  if (lWork > vWork.size()) vWork.resize(lWork);
1013 
1014  // find Q that is n x n
1015  ZUNGQL (&nM, &nN, &nN, mA, mA._pld(), vTau, vWork, &lWork, &nOutInfo);
1016  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1017 
1018  for (row = CVM0; row < nM + CVM0; ++row)
1019  for (col = CVM0; col < nN + CVM0; ++col)
1020  mQ(row,nM - nN + col) = mA(row,col);
1021 }
1022 
1023 // LLS routines
1024 template <>
1025 CVM_API void
1026 __gels<basic_cmatrix<float, std::complex<float> >, basic_cvector<float, std::complex<float> > >
1027  (bool transpose,
1030  basic_cmatrix<float, std::complex<float> >& mX, // output: will be resized anyway, so better pass it empty
1032 {
1033  const tint nM = mA.msize();
1034  const tint nN = mA.nsize();
1035  const tint nL = _cvm_min<tint>(nM, nN);
1036  const tint nK = _cvm_max<tint>(nM, nN);
1037  const tint nrhs = mB.nsize();
1038  tint lWork = -1;
1039  tint nOutInfo = 0;
1040  std::complex<float> dWork;
1041  const char* trans = transpose ? Chars::pC() : Chars::pN();
1042 
1043  mX.resize(nK, nrhs); // first we might need extra space for residuals
1044  mX.assign(CVM0, CVM0, mB);
1045 
1046  // calculate size of workspace
1047  CGELS (trans,
1048 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
1049  1,
1050 #endif
1051  &nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(), &dWork, &lWork, &nOutInfo);
1052  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1053  lWork = static_cast<tint>(dWork.real());
1055 
1056  CGELS (trans,
1057 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
1058  1,
1059 #endif
1060  &nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(), vWork, &lWork, &nOutInfo);
1061  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1062 
1063  // collecting residuals if applicable
1064  vErr.set(0.);
1065  if ((!transpose && nM > nN) || (transpose && nM < nN))
1066  {
1067  for (tint col = CVM0; col < nrhs + CVM0; col++)
1068  {
1069  for (tint row = nL + CVM0; row < nK + CVM0; row++)
1070  {
1071  vErr[col] += mX(row, col)*mX(row, col);
1072  }
1073  }
1074  mX.resize(nL, nrhs);
1075  }
1076 }
1077 
1078 template <>
1079 CVM_API void
1080 __gels<basic_cmatrix<double, std::complex<double> >, basic_cvector<double, std::complex<double> > >
1081  (bool transpose,
1084  basic_cmatrix<double, std::complex<double> >& mX, // output: will be resized anyway, so better pass it empty
1086 {
1087  const tint nM = mA.msize();
1088  const tint nN = mA.nsize();
1089  const tint nL = _cvm_min<tint>(nM, nN);
1090  const tint nK = _cvm_max<tint>(nM, nN);
1091  const tint nrhs = mB.nsize();
1092  tint lWork = -1;
1093  tint nOutInfo = 0;
1094  std::complex<double> dWork;
1095  const char* trans = transpose ? Chars::pC() : Chars::pN();
1096 
1097  mX.resize(nK, nrhs); // first we might need extra space for residuals
1098  mX.assign(CVM0, CVM0, mB);
1099 
1100  // calculate size of workspace
1101  ZGELS (trans,
1102 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
1103  1,
1104 #endif
1105  &nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(), &dWork, &lWork, &nOutInfo);
1106  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1107  lWork = static_cast<tint>(dWork.real());
1109 
1110  ZGELS (trans,
1111 #if defined (CVM_PASS_STRING_LENGTH_TO_FTN_SUBROUTINES)
1112  1,
1113 #endif
1114  &nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(), vWork, &lWork, &nOutInfo);
1115  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1116 
1117  // collecting residuals if applicable
1118  vErr.set(0.);
1119  if ((!transpose && nM > nN) || (transpose && nM < nN))
1120  {
1121  for (tint col = CVM0; col < nrhs + CVM0; col++)
1122  {
1123  for (tint row = nL + CVM0; row < nK + CVM0; row++)
1124  {
1125  vErr[col] += mX(row, col)*mX(row, col);
1126  }
1127  }
1128  mX.resize(nL, nrhs);
1129  }
1130 }
1132 
1133 template <>
1134 CVM_API void
1135 __gelsy<float, basic_cmatrix<float, std::complex<float> > >
1138  basic_cmatrix<float, std::complex<float> >& mX, // output: will be resized anyway, so better pass it empty
1139  float rcond,
1140  tint& rank) throw (cvmexception)
1141 {
1142  const tint nM = mA.msize();
1143  const tint nN = mA.nsize();
1144  const tint nrhs = mB.nsize();
1145  const tint nK = _cvm_max<tint>(nM, nN);
1146  tint lWork = -1;
1147  tint nOutInfo = 0;
1148  iarray jpvt(nN);
1149  std::complex<float> dWork;
1150  basic_rvector<float> rWork(2*nN);
1151 
1152  mX.resize(nK, nrhs); // we might need extra space here
1153  mX.assign(CVM0, CVM0, mB);
1154 
1155  // calculate size of workspace
1156  CGELSY (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1157  jpvt, &rcond, &rank, &dWork, &lWork, rWork, &nOutInfo);
1158  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1159  lWork = static_cast<tint>(dWork.real());
1161 
1162  CGELSY (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1163  jpvt, &rcond, &rank, vWork, &lWork, rWork, &nOutInfo);
1164  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1165 
1166  mX.resize(nN, nrhs);
1167 }
1168 
1169 template <>
1170 CVM_API void
1171 __gelsy<double, basic_cmatrix<double, std::complex<double> > >
1174  basic_cmatrix<double, std::complex<double> >& mX, // output: will be resized anyway, so better pass it empty
1175  double rcond,
1176  tint& rank) throw (cvmexception)
1177 {
1178  const tint nM = mA.msize();
1179  const tint nN = mA.nsize();
1180  const tint nrhs = mB.nsize();
1181  const tint nK = _cvm_max<tint>(nM, nN);
1182  tint lWork = -1;
1183  tint nOutInfo = 0;
1184  iarray jpvt(nN);
1185  std::complex<double> dWork;
1186  basic_rvector<double> rWork(2*nN);
1187 
1188  mX.resize(nK, nrhs); // we might need extra space here
1189  mX.assign(CVM0, CVM0, mB);
1190 
1191  // calculate size of workspace
1192  ZGELSY (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1193  jpvt, &rcond, &rank, &dWork, &lWork, rWork, &nOutInfo);
1194  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1195  lWork = static_cast<tint>(dWork.real());
1197 
1198  ZGELSY (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1199  jpvt, &rcond, &rank, vWork, &lWork, rWork, &nOutInfo);
1200  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1201 
1202  mX.resize(nN, nrhs);
1203 }
1204 
1206 template <>
1207 CVM_API void
1208 __gelss<float, basic_rvector<float>, basic_cmatrix<float, std::complex<float> > >
1211  basic_cmatrix<float, std::complex<float> >& mX, // output: will be resized anyway, so better pass it empty
1212  float rcond,
1213  basic_rvector<float>& vSV, // incr=1 required
1214  tint& rank) throw (cvmexception)
1215 {
1216  const tint nM = mA.msize();
1217  const tint nN = mA.nsize();
1218  const tint nrhs = mB.nsize();
1219  const tint nK = _cvm_max<tint>(nM, nN);
1220  tint lWork = -1;
1221  tint nOutInfo = 0;
1222  std::complex<float> dWork;
1223  basic_rvector<float> rWork(5 * _cvm_min<tint>(nM, nN));
1224 
1225  mX.resize(nK, nrhs); // we might need extra space here
1226  mX.assign(CVM0, CVM0, mB);
1227 
1228  // calculate size of workspace
1229  CGELSS (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1230  vSV, &rcond, &rank, &dWork, &lWork, rWork, &nOutInfo);
1231  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1232  lWork = static_cast<tint>(dWork.real());
1234 
1235  CGELSS (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1236  vSV, &rcond, &rank, vWork, &lWork, rWork, &nOutInfo);
1237  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1238 
1239  mX.resize(nN, nrhs);
1240 }
1241 
1242 template <>
1243 CVM_API void
1244 __gelss<double, basic_rvector<double>, basic_cmatrix<double, std::complex<double> > >
1247  basic_cmatrix<double, std::complex<double> >& mX, // output: will be resized anyway, so better pass it empty
1248  double rcond,
1249  basic_rvector<double>& vSV, // incr=1 required
1250  tint& rank) throw (cvmexception)
1251 {
1252  const tint nM = mA.msize();
1253  const tint nN = mA.nsize();
1254  const tint nrhs = mB.nsize();
1255  const tint nK = _cvm_max<tint>(nM, nN);
1256  tint lWork = -1;
1257  tint nOutInfo = 0;
1258  std::complex<double> dWork;
1259  basic_rvector<double> rWork(5 * _cvm_min<tint>(nM, nN));
1260 
1261  mX.resize(nK, nrhs); // we might need extra space here
1262  mX.assign(CVM0, CVM0, mB);
1263 
1264  // calculate size of workspace
1265  ZGELSS (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1266  vSV, &rcond, &rank, &dWork, &lWork, rWork, &nOutInfo);
1267  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1268  lWork = static_cast<tint>(dWork.real());
1270 
1271  ZGELSS (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1272  vSV, &rcond, &rank, vWork, &lWork, rWork, &nOutInfo);
1273  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1274 
1275  mX.resize(nN, nrhs);
1276 }
1277 
1278 template <>
1279 CVM_API void
1280 __gelsd<float, basic_rvector<float>, basic_cmatrix<float, std::complex<float> > >
1283  basic_cmatrix<float, std::complex<float> >& mX, // output: will be resized anyway, so better pass it empty
1284  float rcond,
1285  basic_rvector<float>& vSV, // incr=1 required
1286  tint& rank) throw (cvmexception)
1287 {
1288  const tint nM = mA.msize();
1289  const tint nN = mA.nsize();
1290  const tint nrhs = mB.nsize();
1291  const tint nK = _cvm_max<tint>(nM, nN);
1292  tint lWork = -1;
1293  tint nOutInfo = 0;
1294  std::complex<float> dWork;
1295  float rWork;
1296  tint iWork = -1;
1297 
1298  mX.resize(nK, nrhs); // we might need extra space here
1299  mX.assign(CVM0, CVM0, mB);
1300 
1301  // calculate size of workspace
1302  CGELSD (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1303  vSV, &rcond, &rank, &dWork, &lWork, &rWork, &iWork, &nOutInfo);
1304  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1305  lWork = static_cast<tint>(dWork.real());
1307  basic_rvector<float> vrWork(static_cast<tint>(rWork));
1308  basic_array<tint,tint> viWork(iWork);
1309 
1310  CGELSD (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1311  vSV, &rcond, &rank, vWork, &lWork, vrWork, viWork, &nOutInfo);
1312  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1313 
1314  mX.resize(nN, nrhs);
1315 }
1316 
1317 template <>
1318 CVM_API void
1319 __gelsd<double, basic_rvector<double>, basic_cmatrix<double, std::complex<double> > >
1322  basic_cmatrix<double, std::complex<double> >& mX, // output: will be resized anyway, so better pass it empty
1323  double rcond,
1324  basic_rvector<double>& vSV, // incr=1 required
1325  tint& rank) throw (cvmexception)
1326 {
1327  const tint nM = mA.msize();
1328  const tint nN = mA.nsize();
1329  const tint nrhs = mB.nsize();
1330  const tint nK = _cvm_max<tint>(nM, nN);
1331  tint lWork = -1;
1332  tint nOutInfo = 0;
1333  std::complex<double> dWork;
1334  double rWork;
1335  tint iWork = -1;
1336 
1337  mX.resize(nK, nrhs); // we might need extra space here
1338  mX.assign(CVM0, CVM0, mB);
1339 
1340  // calculate size of workspace
1341  ZGELSD (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1342  vSV, &rcond, &rank, &dWork, &lWork, &rWork, &iWork, &nOutInfo);
1343  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1344  lWork = static_cast<tint>(dWork.real());
1346  basic_rvector<double> vrWork(static_cast<tint>(rWork));
1347  basic_array<tint,tint> viWork(iWork);
1348 
1349  ZGELSD (&nM, &nN, &nrhs, mA._pd(), mA._pld(), mX._pd(), mX._pld(),
1350  vSV, &rcond, &rank, vWork, &lWork, vrWork, viWork, &nOutInfo);
1351  _check_negative(CVM_WRONGMKLARG, nOutInfo);
1352 
1353  mX.resize(nN, nrhs);
1354 }
1356 
1358