You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

421 lines
23 KiB

  1. /* -*- c++ -*- (enables emacs c++ mode) */
  2. /*===========================================================================
  3. Copyright (C) 2003-2012 Yves Renard
  4. This file is a part of GETFEM++
  5. Getfem++ is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU Lesser General Public License as published
  7. by the Free Software Foundation; either version 3 of the License, or
  8. (at your option) any later version along with the GCC Runtime Library
  9. Exception either version 3.1 or (at your option) any later version.
  10. This program is distributed in the hope that it will be useful, but
  11. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  13. License and GCC Runtime Library Exception for more details.
  14. You should have received a copy of the GNU Lesser General Public License
  15. along with this program; if not, write to the Free Software Foundation,
  16. Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  17. As a special exception, you may use this file as it is a part of a free
  18. software library without restriction. Specifically, if other files
  19. instantiate templates or use macros or inline functions from this file,
  20. or you compile this file and link it with other files to produce an
  21. executable, this file does not by itself cause the resulting executable
  22. to be covered by the GNU Lesser General Public License. This exception
  23. does not however invalidate any other reasons why the executable file
  24. might be covered by the GNU Lesser General Public License.
  25. ===========================================================================*/
  26. /**@file gmm_lapack_interface.h
  27. @author Yves Renard <Yves.Renard@insa-lyon.fr>
  28. @date October 7, 2003.
  29. @brief gmm interface for LAPACK
  30. */
  31. #if defined(GMM_USES_LAPACK)
  32. #ifndef GMM_LAPACK_INTERFACE_H
  33. #define GMM_LAPACK_INTERFACE_H
  34. #include "gmm_blas_interface.h"
  35. #include "gmm_dense_lu.h"
  36. #include "gmm_dense_qr.h"
  37. namespace gmm {
  38. /* ********************************************************************* */
  39. /* Operations interfaced for T = float, double, std::complex<float> */
  40. /* or std::complex<double> : */
  41. /* */
  42. /* lu_factor(dense_matrix<T>, std::vector<int>) */
  43. /* lu_solve(dense_matrix<T>, std::vector<T>, std::vector<T>) */
  44. /* lu_solve(dense_matrix<T>, std::vector<int>, std::vector<T>, */
  45. /* std::vector<T>) */
  46. /* lu_solve_transposed(dense_matrix<T>, std::vector<int>, std::vector<T>,*/
  47. /* std::vector<T>) */
  48. /* lu_inverse(dense_matrix<T>) */
  49. /* lu_inverse(dense_matrix<T>, std::vector<int>, dense_matrix<T>) */
  50. /* */
  51. /* qr_factor(dense_matrix<T>, dense_matrix<T>, dense_matrix<T>) */
  52. /* */
  53. /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>) */
  54. /* implicit_qr_algorithm(dense_matrix<T>, std::vector<T>, */
  55. /* dense_matrix<T>) */
  56. /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >) */
  57. /* implicit_qr_algorithm(dense_matrix<T>, std::vector<std::complex<T> >, */
  58. /* dense_matrix<T>) */
  59. /* */
  60. /* ********************************************************************* */
  61. /* ********************************************************************* */
  62. /* LAPACK functions used. */
  63. /* ********************************************************************* */
  64. extern "C" {
  65. void sgetrf_(...); void dgetrf_(...); void cgetrf_(...); void zgetrf_(...);
  66. void sgetrs_(...); void dgetrs_(...); void cgetrs_(...); void zgetrs_(...);
  67. void sgetri_(...); void dgetri_(...); void cgetri_(...); void zgetri_(...);
  68. void sgeqrf_(...); void dgeqrf_(...); void cgeqrf_(...); void zgeqrf_(...);
  69. void sorgqr_(...); void dorgqr_(...); void cungqr_(...); void zungqr_(...);
  70. void sormqr_(...); void dormqr_(...); void cunmqr_(...); void zunmqr_(...);
  71. void sgees_ (...); void dgees_ (...); void cgees_ (...); void zgees_ (...);
  72. void sgeev_ (...); void dgeev_ (...); void cgeev_ (...); void zgeev_ (...);
  73. void sgesvd_(...); void dgesvd_(...); void cgesvd_(...); void zgesvd_(...);
  74. }
  75. /* ********************************************************************* */
  76. /* LU decomposition. */
  77. /* ********************************************************************* */
  78. # define getrf_interface(lapack_name, base_type) inline \
  79. size_type lu_factor(dense_matrix<base_type > &A, std::vector<int> &ipvt){\
  80. GMMLAPACK_TRACE("getrf_interface"); \
  81. int m = int(mat_nrows(A)), n = int(mat_ncols(A)), lda(m), info(0); \
  82. if (m && n) lapack_name(&m, &n, &A(0,0), &lda, &ipvt[0], &info); \
  83. return size_type(info); \
  84. }
  85. getrf_interface(sgetrf_, BLAS_S)
  86. getrf_interface(dgetrf_, BLAS_D)
  87. getrf_interface(cgetrf_, BLAS_C)
  88. getrf_interface(zgetrf_, BLAS_Z)
  89. /* ********************************************************************* */
  90. /* LU solve. */
  91. /* ********************************************************************* */
  92. # define getrs_interface(f_name, trans1, lapack_name, base_type) inline \
  93. void f_name(const dense_matrix<base_type > &A, \
  94. const std::vector<int> &ipvt, std::vector<base_type > &x, \
  95. const std::vector<base_type > &b) { \
  96. GMMLAPACK_TRACE("getrs_interface"); \
  97. int n = int(mat_nrows(A)), info, nrhs(1); \
  98. gmm::copy(b, x); trans1; \
  99. if (n) \
  100. lapack_name(&t, &n, &nrhs, &(A(0,0)),&n,&ipvt[0], &x[0], &n, &info); \
  101. }
  102. # define getrs_trans_n const char t = 'N'
  103. # define getrs_trans_t const char t = 'T'
  104. getrs_interface(lu_solve, getrs_trans_n, sgetrs_, BLAS_S)
  105. getrs_interface(lu_solve, getrs_trans_n, dgetrs_, BLAS_D)
  106. getrs_interface(lu_solve, getrs_trans_n, cgetrs_, BLAS_C)
  107. getrs_interface(lu_solve, getrs_trans_n, zgetrs_, BLAS_Z)
  108. getrs_interface(lu_solve_transposed, getrs_trans_t, sgetrs_, BLAS_S)
  109. getrs_interface(lu_solve_transposed, getrs_trans_t, dgetrs_, BLAS_D)
  110. getrs_interface(lu_solve_transposed, getrs_trans_t, cgetrs_, BLAS_C)
  111. getrs_interface(lu_solve_transposed, getrs_trans_t, zgetrs_, BLAS_Z)
  112. /* ********************************************************************* */
  113. /* LU inverse. */
  114. /* ********************************************************************* */
  115. # define getri_interface(lapack_name, base_type) inline \
  116. void lu_inverse(const dense_matrix<base_type > &LU, \
  117. std::vector<int> &ipvt, const dense_matrix<base_type > &A_) { \
  118. GMMLAPACK_TRACE("getri_interface"); \
  119. dense_matrix<base_type >& \
  120. A = const_cast<dense_matrix<base_type > &>(A_); \
  121. int n = int(mat_nrows(A)), info, lwork(-1); base_type work1; \
  122. if (n) { \
  123. gmm::copy(LU, A); \
  124. lapack_name(&n, &A(0,0), &n, &ipvt[0], &work1, &lwork, &info); \
  125. lwork = int(gmm::real(work1)); \
  126. std::vector<base_type > work(lwork); \
  127. lapack_name(&n, &A(0,0), &n, &ipvt[0], &work[0], &lwork, &info); \
  128. } \
  129. }
  130. getri_interface(sgetri_, BLAS_S)
  131. getri_interface(dgetri_, BLAS_D)
  132. getri_interface(cgetri_, BLAS_C)
  133. getri_interface(zgetri_, BLAS_Z)
  134. /* ********************************************************************* */
  135. /* QR factorization. */
  136. /* ********************************************************************* */
  137. # define geqrf_interface(lapack_name1, base_type) inline \
  138. void qr_factor(dense_matrix<base_type > &A){ \
  139. GMMLAPACK_TRACE("geqrf_interface"); \
  140. int m = int(mat_nrows(A)), n = int(mat_ncols(A)), info, lwork(-1); \
  141. base_type work1; \
  142. if (m && n) { \
  143. std::vector<base_type > tau(n); \
  144. lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work1 , &lwork, &info); \
  145. lwork = int(gmm::real(work1)); \
  146. std::vector<base_type > work(lwork); \
  147. lapack_name1(&m, &n, &A(0,0), &m, &tau[0], &work[0], &lwork, &info); \
  148. GMM_ASSERT1(!info, "QR factorization failed"); \
  149. } \
  150. }
  151. geqrf_interface(sgeqrf_, BLAS_S)
  152. geqrf_interface(dgeqrf_, BLAS_D)
  153. // For complex values, housholder vectors are not the same as in
  154. // gmm::lu_factor. Impossible to interface for the moment.
  155. // geqrf_interface(cgeqrf_, BLAS_C)
  156. // geqrf_interface(zgeqrf_, BLAS_Z)
  157. # define geqrf_interface2(lapack_name1, lapack_name2, base_type) inline \
  158. void qr_factor(const dense_matrix<base_type > &A, \
  159. dense_matrix<base_type > &Q, dense_matrix<base_type > &R) { \
  160. GMMLAPACK_TRACE("geqrf_interface2"); \
  161. int m = int(mat_nrows(A)), n = int(mat_ncols(A)), info, lwork(-1); \
  162. base_type work1; \
  163. if (m && n) { \
  164. gmm::copy(A, Q); \
  165. std::vector<base_type > tau(n); \
  166. lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work1 , &lwork, &info); \
  167. lwork = int(gmm::real(work1)); \
  168. std::vector<base_type > work(lwork); \
  169. lapack_name1(&m, &n, &Q(0,0), &m, &tau[0], &work[0], &lwork, &info); \
  170. GMM_ASSERT1(!info, "QR factorization failed"); \
  171. base_type *p = &R(0,0), *q = &Q(0,0); \
  172. for (int j = 0; j < n; ++j, q += m-n) \
  173. for (int i = 0; i < n; ++i, ++p, ++q) \
  174. *p = (j < i) ? base_type(0) : *q; \
  175. lapack_name2(&m, &n, &n, &Q(0,0), &m,&tau[0],&work[0],&lwork,&info); \
  176. } \
  177. else gmm::clear(Q); \
  178. }
  179. geqrf_interface2(sgeqrf_, sorgqr_, BLAS_S)
  180. geqrf_interface2(dgeqrf_, dorgqr_, BLAS_D)
  181. geqrf_interface2(cgeqrf_, cungqr_, BLAS_C)
  182. geqrf_interface2(zgeqrf_, zungqr_, BLAS_Z)
  183. /* ********************************************************************* */
  184. /* QR algorithm for eigenvalues search. */
  185. /* ********************************************************************* */
  186. # define gees_interface(lapack_name, base_type) \
  187. template <typename VECT> inline void implicit_qr_algorithm( \
  188. const dense_matrix<base_type > &A, const VECT &eigval_, \
  189. dense_matrix<base_type > &Q, \
  190. double tol=gmm::default_tol(base_type()), bool compvect = true) { \
  191. GMMLAPACK_TRACE("gees_interface"); \
  192. typedef bool (*L_fp)(...); L_fp p = 0; \
  193. int n = int(mat_nrows(A)), info, lwork(-1), sdim; base_type work1; \
  194. if (!n) return; \
  195. dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
  196. char jobvs = (compvect ? 'V' : 'N'), sort = 'N'; \
  197. std::vector<double> rwork(n), eigv1(n), eigv2(n); \
  198. lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0], \
  199. &eigv2[0], &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \
  200. lwork = int(gmm::real(work1)); \
  201. std::vector<base_type > work(lwork); \
  202. lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigv1[0], \
  203. &eigv2[0], &Q(0,0), &n, &work[0], &lwork, &rwork[0],&info);\
  204. GMM_ASSERT1(!info, "QR algorithm failed"); \
  205. extract_eig(H, const_cast<VECT &>(eigval_), tol); \
  206. }
  207. # define gees_interface2(lapack_name, base_type) \
  208. template <typename VECT> inline void implicit_qr_algorithm( \
  209. const dense_matrix<base_type > &A, const VECT &eigval_, \
  210. dense_matrix<base_type > &Q, \
  211. double tol=gmm::default_tol(base_type()), bool compvect = true) { \
  212. GMMLAPACK_TRACE("gees_interface2"); \
  213. typedef bool (*L_fp)(...); L_fp p = 0; \
  214. int n = int(mat_nrows(A)), info, lwork(-1), sdim; base_type work1; \
  215. if (!n) return; \
  216. dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
  217. char jobvs = (compvect ? 'V' : 'N'), sort = 'N'; \
  218. std::vector<double> rwork(n), eigvv(n*2); \
  219. lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0], \
  220. &Q(0,0), &n, &work1, &lwork, &rwork[0], &rwork[0], &info); \
  221. lwork = int(gmm::real(work1)); \
  222. std::vector<base_type > work(lwork); \
  223. lapack_name(&jobvs, &sort, p, &n, &H(0,0), &n, &sdim, &eigvv[0], \
  224. &Q(0,0), &n, &work[0], &lwork, &rwork[0], &rwork[0],&info);\
  225. GMM_ASSERT1(!info, "QR algorithm failed"); \
  226. extract_eig(H, const_cast<VECT &>(eigval_), tol); \
  227. }
  228. gees_interface(sgees_, BLAS_S)
  229. gees_interface(dgees_, BLAS_D)
  230. gees_interface2(cgees_, BLAS_C)
  231. gees_interface2(zgees_, BLAS_Z)
  232. # define geev_int_right(lapack_name, base_type) \
  233. template <typename VECT> inline void geev_interface_right( \
  234. const dense_matrix<base_type > &A, const VECT &eigval_, \
  235. dense_matrix<base_type > &Q) { \
  236. GMMLAPACK_TRACE("geev_interface"); \
  237. int n = int(mat_nrows(A)), info, lwork(-1); base_type work1; \
  238. if (!n) return; \
  239. dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
  240. char jobvl = 'N', jobvr = 'V'; \
  241. std::vector<base_type > eigvr(n), eigvi(n); \
  242. lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \
  243. &Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info); \
  244. lwork = int(gmm::real(work1)); \
  245. std::vector<base_type > work(lwork); \
  246. lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \
  247. &Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info); \
  248. GMM_ASSERT1(!info, "QR algorithm failed"); \
  249. gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_))); \
  250. gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_))); \
  251. }
  252. # define geev_int_rightc(lapack_name, base_type) \
  253. template <typename VECT> inline void geev_interface_right( \
  254. const dense_matrix<base_type > &A, const VECT &eigval_, \
  255. dense_matrix<base_type > &Q) { \
  256. GMMLAPACK_TRACE("geev_interface"); \
  257. int n = int(mat_nrows(A)), info, lwork(-1); base_type work1; \
  258. if (!n) return; \
  259. dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
  260. char jobvl = 'N', jobvr = 'V'; \
  261. std::vector<double> rwork(2*n); \
  262. std::vector<base_type> eigv(n); \
  263. lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \
  264. &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \
  265. lwork = int(gmm::real(work1)); \
  266. std::vector<base_type > work(lwork); \
  267. lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \
  268. &Q(0,0), &n, &work[0], &lwork, &rwork[0], &info); \
  269. GMM_ASSERT1(!info, "QR algorithm failed"); \
  270. gmm::copy(eigv, const_cast<VECT &>(eigval_)); \
  271. }
  272. geev_int_right(sgeev_, BLAS_S)
  273. geev_int_right(dgeev_, BLAS_D)
  274. geev_int_rightc(cgeev_, BLAS_C)
  275. geev_int_rightc(zgeev_, BLAS_Z)
  276. # define geev_int_left(lapack_name, base_type) \
  277. template <typename VECT> inline void geev_interface_left( \
  278. const dense_matrix<base_type > &A, const VECT &eigval_, \
  279. dense_matrix<base_type > &Q) { \
  280. GMMLAPACK_TRACE("geev_interface"); \
  281. int n = int(mat_nrows(A)), info, lwork(-1); base_type work1; \
  282. if (!n) return; \
  283. dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
  284. char jobvl = 'V', jobvr = 'N'; \
  285. std::vector<base_type > eigvr(n), eigvi(n); \
  286. lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \
  287. &Q(0,0), &n, &Q(0,0), &n, &work1, &lwork, &info); \
  288. lwork = int(gmm::real(work1)); \
  289. std::vector<base_type > work(lwork); \
  290. lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigvr[0], &eigvi[0], \
  291. &Q(0,0), &n, &Q(0,0), &n, &work[0], &lwork, &info); \
  292. GMM_ASSERT1(!info, "QR algorithm failed"); \
  293. gmm::copy(eigvr, gmm::real_part(const_cast<VECT &>(eigval_))); \
  294. gmm::copy(eigvi, gmm::imag_part(const_cast<VECT &>(eigval_))); \
  295. }
  296. # define geev_int_leftc(lapack_name, base_type) \
  297. template <typename VECT> inline void geev_interface_left( \
  298. const dense_matrix<base_type > &A, const VECT &eigval_, \
  299. dense_matrix<base_type > &Q) { \
  300. GMMLAPACK_TRACE("geev_interface"); \
  301. int n = int(mat_nrows(A)), info, lwork(-1); base_type work1; \
  302. if (!n) return; \
  303. dense_matrix<base_type > H(n,n); gmm::copy(A, H); \
  304. char jobvl = 'V', jobvr = 'N'; \
  305. std::vector<double> rwork(2*n); \
  306. std::vector<base_type> eigv(n); \
  307. lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \
  308. &Q(0,0), &n, &work1, &lwork, &rwork[0], &info); \
  309. lwork = int(gmm::real(work1)); \
  310. std::vector<base_type > work(lwork); \
  311. lapack_name(&jobvl, &jobvr, &n, &H(0,0), &n, &eigv[0], &Q(0,0), &n, \
  312. &Q(0,0), &n, &work[0], &lwork, &rwork[0], &info); \
  313. GMM_ASSERT1(!info, "QR algorithm failed"); \
  314. gmm::copy(eigv, const_cast<VECT &>(eigval_)); \
  315. }
  316. geev_int_left(sgeev_, BLAS_S)
  317. geev_int_left(dgeev_, BLAS_D)
  318. geev_int_leftc(cgeev_, BLAS_C)
  319. geev_int_leftc(zgeev_, BLAS_Z)
  320. /* ********************************************************************* */
  321. /* Interface to SVD. Does not correspond to a Gmm++ functionnality. */
  322. /* Author : Sebastian Nowozin <sebastian.nowozin@tuebingen.mpg.de> */
  323. /* ********************************************************************* */
  324. # define gesvd_interface(lapack_name, base_type) inline \
  325. void svd(dense_matrix<base_type> &X, \
  326. dense_matrix<base_type> &U, \
  327. dense_matrix<base_type> &Vtransposed, \
  328. std::vector<base_type> &sigma) { \
  329. GMMLAPACK_TRACE("gesvd_interface"); \
  330. int m = int(mat_nrows(X)), n = int(mat_ncols(X)); \
  331. int mn_min = m < n ? m : n; \
  332. sigma.resize(mn_min); \
  333. std::vector<base_type> work(15 * mn_min); \
  334. int lwork = int(work.size()); \
  335. resize(U, m, n); \
  336. resize(Vtransposed, n, n); \
  337. char job = 'A'; \
  338. int info = -1; \
  339. lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0), \
  340. &m, &Vtransposed(0,0), &n, &work[0], &lwork, &info); \
  341. }
  342. # define cgesvd_interface(lapack_name, base_type, base_type2) inline \
  343. void svd(dense_matrix<base_type> &X, \
  344. dense_matrix<base_type> &U, \
  345. dense_matrix<base_type> &Vtransposed, \
  346. std::vector<base_type2> &sigma) { \
  347. GMMLAPACK_TRACE("gesvd_interface"); \
  348. int m = int(mat_nrows(X)), n = int(mat_ncols(X)); \
  349. int mn_min = m < n ? m : n; \
  350. sigma.resize(mn_min); \
  351. std::vector<base_type> work(15 * mn_min); \
  352. std::vector<base_type2> rwork(5 * mn_min); \
  353. int lwork = int(work.size()); \
  354. resize(U, m, n); \
  355. resize(Vtransposed, n, n); \
  356. char job = 'A'; \
  357. int info = -1; \
  358. lapack_name(&job, &job, &m, &n, &X(0,0), &m, &sigma[0], &U(0,0), \
  359. &m, &Vtransposed(0,0), &n, &work[0], &lwork, \
  360. &rwork[0], &info); \
  361. }
  362. gesvd_interface(sgesvd_, BLAS_S)
  363. gesvd_interface(dgesvd_, BLAS_D)
  364. cgesvd_interface(cgesvd_, BLAS_C, BLAS_S)
  365. cgesvd_interface(zgesvd_, BLAS_Z, BLAS_D)
  366. }
  367. #endif // GMM_LAPACK_INTERFACE_H
  368. #endif // GMM_USES_LAPACK