You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2490 lines
96 KiB

  1. /* -*- c++ -*- (enables emacs c++ mode) */
  2. /*===========================================================================
  3. Copyright (C) 2002-2017 Yves Renard
  4. This file is a part of GetFEM++
  5. GetFEM++ is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU Lesser General Public License as published
  7. by the Free Software Foundation; either version 3 of the License, or
  8. (at your option) any later version along with the GCC Runtime Library
  9. Exception either version 3.1 or (at your option) any later version.
  10. This program is distributed in the hope that it will be useful, but
  11. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  13. License and GCC Runtime Library Exception for more details.
  14. You should have received a copy of the GNU Lesser General Public License
  15. along with this program; if not, write to the Free Software Foundation,
  16. Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  17. As a special exception, you may use this file as it is a part of a free
  18. software library without restriction. Specifically, if other files
  19. instantiate templates or use macros or inline functions from this file,
  20. or you compile this file and link it with other files to produce an
  21. executable, this file does not by itself cause the resulting executable
  22. to be covered by the GNU Lesser General Public License. This exception
  23. does not however invalidate any other reasons why the executable file
  24. might be covered by the GNU Lesser General Public License.
  25. ===========================================================================*/
  26. /**@file gmm_blas.h
  27. @author Yves Renard <Yves.Renard@insa-lyon.fr>
  28. @date October 13, 2002.
  29. @brief Basic linear algebra functions.
  30. */
  31. #ifndef GMM_BLAS_H__
  32. #define GMM_BLAS_H__
  33. // This Version of gmm++ was modified for Storm.
  34. // To detect whether the usage of TBB is possible, this include is neccessary.
  35. #include "storm-config.h"
  36. #ifdef STORM_HAVE_INTELTBB
  37. #include <new> // This fixes a potential dependency ordering problem between GMM and TBB
  38. #include "tbb/tbb.h"
  39. #include <iterator>
  40. #endif
  41. #include "gmm_scaled.h"
  42. #include "gmm_transposed.h"
  43. #include "gmm_conjugated.h"
  44. namespace gmm {
  45. /* ******************************************************************** */
  46. /* */
  47. /* Generic algorithms */
  48. /* */
  49. /* ******************************************************************** */
  50. /* ******************************************************************** */
  51. /* Miscellaneous */
  52. /* ******************************************************************** */
  53. /** clear (fill with zeros) a vector or matrix. */
  54. template <typename L> inline void clear(L &l)
  55. { linalg_traits<L>::do_clear(l); }
  56. /** @cond DOXY_SHOW_ALL_FUNCTIONS
  57. skip all these redundant definitions in doxygen documentation..
  58. */
  59. template <typename L> inline void clear(const L &l)
  60. { linalg_traits<L>::do_clear(linalg_const_cast(l)); }
  61. ///@endcond
  62. /** count the number of non-zero entries of a vector or matrix. */ template <typename L> inline size_type nnz(const L& l)
  63. { return nnz(l, typename linalg_traits<L>::linalg_type()); }
  64. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  65. template <typename L> inline size_type nnz(const L& l, abstract_vector) {
  66. auto it = vect_const_begin(l), ite = vect_const_end(l);
  67. size_type res(0);
  68. for (; it != ite; ++it) ++res;
  69. return res;
  70. }
  71. template <typename L> inline size_type nnz(const L& l, abstract_matrix) {
  72. return nnz(l, typename principal_orientation_type<typename
  73. linalg_traits<L>::sub_orientation>::potype());
  74. }
  75. template <typename L> inline size_type nnz(const L& l, row_major) {
  76. size_type res(0);
  77. for (size_type i = 0; i < mat_nrows(l); ++i)
  78. res += nnz(mat_const_row(l, i));
  79. return res;
  80. }
  81. template <typename L> inline size_type nnz(const L& l, col_major) {
  82. size_type res(0);
  83. for (size_type i = 0; i < mat_ncols(l); ++i)
  84. res += nnz(mat_const_col(l, i));
  85. return res;
  86. }
  87. ///@endcond
  88. /** fill a vector or matrix with x. */
  89. template <typename L> inline
  90. void fill(L& l, typename gmm::linalg_traits<L>::value_type x) {
  91. typedef typename gmm::linalg_traits<L>::value_type T;
  92. if (x == T(0)) gmm::clear(l);
  93. fill(l, x, typename linalg_traits<L>::linalg_type());
  94. }
  95. template <typename L> inline
  96. void fill(const L& l, typename gmm::linalg_traits<L>::value_type x) {
  97. fill(linalg_const_cast(l), x);
  98. }
  99. template <typename L> inline // to be optimized for dense vectors ...
  100. void fill(L& l, typename gmm::linalg_traits<L>::value_type x,
  101. abstract_vector) {
  102. for (size_type i = 0; i < vect_size(l); ++i) l[i] = x;
  103. }
  104. template <typename L> inline // to be optimized for dense matrices ...
  105. void fill(L& l, typename gmm::linalg_traits<L>::value_type x,
  106. abstract_matrix) {
  107. for (size_type i = 0; i < mat_nrows(l); ++i)
  108. for (size_type j = 0; j < mat_ncols(l); ++j)
  109. l(i,j) = x;
  110. }
  111. /** fill a vector or matrix with random value (uniform [-1,1]). */
  112. template <typename L> inline void fill_random(L& l)
  113. { fill_random(l, typename linalg_traits<L>::linalg_type()); }
  114. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  115. template <typename L> inline void fill_random(const L& l) {
  116. fill_random(linalg_const_cast(l),
  117. typename linalg_traits<L>::linalg_type());
  118. }
  119. template <typename L> inline void fill_random(L& l, abstract_vector) {
  120. for (size_type i = 0; i < vect_size(l); ++i)
  121. l[i] = gmm::random(typename linalg_traits<L>::value_type());
  122. }
  123. template <typename L> inline void fill_random(L& l, abstract_matrix) {
  124. for (size_type i = 0; i < mat_nrows(l); ++i)
  125. for (size_type j = 0; j < mat_ncols(l); ++j)
  126. l(i,j) = gmm::random(typename linalg_traits<L>::value_type());
  127. }
  128. ///@endcond
  129. /** fill a vector or matrix with random value.
  130. @param l a vector or matrix.
  131. @param cfill probability of a non-zero value.
  132. */
  133. template <typename L> inline void fill_random(L& l, double cfill)
  134. { fill_random(l, cfill, typename linalg_traits<L>::linalg_type()); }
  135. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  136. template <typename L> inline void fill_random(const L& l, double cfill) {
  137. fill_random(linalg_const_cast(l), cfill,
  138. typename linalg_traits<L>::linalg_type());
  139. }
  140. template <typename L> inline
  141. void fill_random(L& l, double cfill, abstract_vector) {
  142. typedef typename linalg_traits<L>::value_type T;
  143. size_type ntot = std::min(vect_size(l),
  144. size_type(double(vect_size(l))*cfill) + 1);
  145. for (size_type nb = 0; nb < ntot;) {
  146. size_type i = gmm::irandom(vect_size(l));
  147. if (l[i] == T(0)) {
  148. l[i] = gmm::random(typename linalg_traits<L>::value_type());
  149. ++nb;
  150. }
  151. }
  152. }
  153. template <typename L> inline
  154. void fill_random(L& l, double cfill, abstract_matrix) {
  155. fill_random(l, cfill, typename principal_orientation_type<typename
  156. linalg_traits<L>::sub_orientation>::potype());
  157. }
  158. template <typename L> inline
  159. void fill_random(L& l, double cfill, row_major) {
  160. for (size_type i=0; i < mat_nrows(l); ++i) fill_random(mat_row(l,i),cfill);
  161. }
  162. template <typename L> inline
  163. void fill_random(L& l, double cfill, col_major) {
  164. for (size_type j=0; j < mat_ncols(l); ++j) fill_random(mat_col(l,j),cfill);
  165. }
  166. /* resize a vector */
  167. template <typename V> inline
  168. void resize(V &v, size_type n, linalg_false)
  169. { linalg_traits<V>::resize(v, n); }
  170. template <typename V> inline
  171. void resize(V &, size_type , linalg_modifiable)
  172. { GMM_ASSERT1(false, "You cannot resize a reference"); }
  173. template <typename V> inline
  174. void resize(V &, size_type , linalg_const)
  175. { GMM_ASSERT1(false, "You cannot resize a reference"); }
  176. ///@endcond
  177. /** resize a vector. */
  178. template <typename V> inline
  179. void resize(V &v, size_type n) {
  180. resize(v, n, typename linalg_traits<V>::is_reference());
  181. }
  182. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  183. /** resize a matrix **/
  184. template <typename M> inline
  185. void resize(M &v, size_type m, size_type n, linalg_false) {
  186. linalg_traits<M>::resize(v, m, n);
  187. }
  188. template <typename M> inline
  189. void resize(M &, size_type, size_type, linalg_modifiable)
  190. { GMM_ASSERT1(false, "You cannot resize a reference"); }
  191. template <typename M> inline
  192. void resize(M &, size_type, size_type, linalg_const)
  193. { GMM_ASSERT1(false, "You cannot resize a reference"); }
  194. ///@endcond
  195. /** resize a matrix */
  196. template <typename M> inline
  197. void resize(M &v, size_type m, size_type n)
  198. { resize(v, m, n, typename linalg_traits<M>::is_reference()); }
  199. ///@cond
  200. template <typename M> inline
  201. void reshape(M &v, size_type m, size_type n, linalg_false)
  202. { linalg_traits<M>::reshape(v, m, n); }
  203. template <typename M> inline
  204. void reshape(M &, size_type, size_type, linalg_modifiable)
  205. { GMM_ASSERT1(false, "You cannot reshape a reference"); }
  206. template <typename M> inline
  207. void reshape(M &, size_type, size_type, linalg_const)
  208. { GMM_ASSERT1(false, "You cannot reshape a reference"); }
  209. ///@endcond
  210. /** reshape a matrix */
  211. template <typename M> inline
  212. void reshape(M &v, size_type m, size_type n)
  213. { reshape(v, m, n, typename linalg_traits<M>::is_reference()); }
  214. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  215. /* ******************************************************************** */
  216. /* Scalar product */
  217. /* ******************************************************************** */
  218. ///@endcond
  219. /** scalar product between two vectors */
  220. template <typename V1, typename V2> inline
  221. typename strongest_value_type<V1,V2>::value_type
  222. vect_sp(const V1 &v1, const V2 &v2) {
  223. GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch, "
  224. << vect_size(v1) << " !=" << vect_size(v2));
  225. return vect_sp(v1, v2,
  226. typename linalg_traits<V1>::storage_type(),
  227. typename linalg_traits<V2>::storage_type());
  228. }
  229. /** scalar product between two vectors, using a matrix.
  230. @param ps the matrix of the scalar product.
  231. @param v1 the first vector
  232. @param v2 the second vector
  233. */
  234. template <typename MATSP, typename V1, typename V2> inline
  235. typename strongest_value_type3<V1,V2,MATSP>::value_type
  236. vect_sp(const MATSP &ps, const V1 &v1, const V2 &v2) {
  237. return vect_sp_with_mat(ps, v1, v2,
  238. typename linalg_traits<MATSP>::sub_orientation());
  239. }
  240. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  241. template <typename MATSP, typename V1, typename V2> inline
  242. typename strongest_value_type3<V1,V2,MATSP>::value_type
  243. vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2, row_major) {
  244. return vect_sp_with_matr(ps, v1, v2,
  245. typename linalg_traits<V2>::storage_type());
  246. }
  247. template <typename MATSP, typename V1, typename V2> inline
  248. typename strongest_value_type3<V1,V2,MATSP>::value_type
  249. vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
  250. abstract_sparse) {
  251. GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
  252. vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
  253. size_type nr = mat_nrows(ps);
  254. typename linalg_traits<V2>::const_iterator
  255. it = vect_const_begin(v2), ite = vect_const_end(v2);
  256. typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
  257. for (; it != ite; ++it)
  258. res += vect_sp(mat_const_row(ps, it.index()), v1)* (*it);
  259. return res;
  260. }
  261. template <typename MATSP, typename V1, typename V2> inline
  262. typename strongest_value_type3<V1,V2,MATSP>::value_type
  263. vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
  264. abstract_skyline)
  265. { return vect_sp_with_matr(ps, v1, v2, abstract_sparse()); }
  266. template <typename MATSP, typename V1, typename V2> inline
  267. typename strongest_value_type3<V1,V2,MATSP>::value_type
  268. vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
  269. abstract_dense) {
  270. GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
  271. vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
  272. typename linalg_traits<V2>::const_iterator
  273. it = vect_const_begin(v2), ite = vect_const_end(v2);
  274. typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
  275. for (size_type i = 0; it != ite; ++i, ++it)
  276. res += vect_sp(mat_const_row(ps, i), v1) * (*it);
  277. return res;
  278. }
  279. template <typename MATSP, typename V1, typename V2> inline
  280. typename strongest_value_type3<V1,V2,MATSP>::value_type
  281. vect_sp_with_mat(const MATSP &ps, const V1 &v1,const V2 &v2,row_and_col)
  282. { return vect_sp_with_mat(ps, v1, v2, row_major()); }
  283. template <typename MATSP, typename V1, typename V2> inline
  284. typename strongest_value_type3<V1,V2,MATSP>::value_type
  285. vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2,col_major){
  286. return vect_sp_with_matc(ps, v1, v2,
  287. typename linalg_traits<V1>::storage_type());
  288. }
  289. template <typename MATSP, typename V1, typename V2> inline
  290. typename strongest_value_type3<V1,V2,MATSP>::value_type
  291. vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
  292. abstract_sparse) {
  293. GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
  294. vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
  295. typename linalg_traits<V1>::const_iterator
  296. it = vect_const_begin(v1), ite = vect_const_end(v1);
  297. typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
  298. for (; it != ite; ++it)
  299. res += vect_sp(mat_const_col(ps, it.index()), v2) * (*it);
  300. return res;
  301. }
  302. template <typename MATSP, typename V1, typename V2> inline
  303. typename strongest_value_type3<V1,V2,MATSP>::value_type
  304. vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
  305. abstract_skyline)
  306. { return vect_sp_with_matc(ps, v1, v2, abstract_sparse()); }
  307. template <typename MATSP, typename V1, typename V2> inline
  308. typename strongest_value_type3<V1,V2,MATSP>::value_type
  309. vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
  310. abstract_dense) {
  311. GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
  312. vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
  313. typename linalg_traits<V1>::const_iterator
  314. it = vect_const_begin(v1), ite = vect_const_end(v1);
  315. typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
  316. for (size_type i = 0; it != ite; ++i, ++it)
  317. res += vect_sp(mat_const_col(ps, i), v2) * (*it);
  318. return res;
  319. }
  320. template <typename MATSP, typename V1, typename V2> inline
  321. typename strongest_value_type3<V1,V2,MATSP>::value_type
  322. vect_sp_with_mat(const MATSP &ps, const V1 &v1,const V2 &v2,col_and_row)
  323. { return vect_sp_with_mat(ps, v1, v2, col_major()); }
  324. template <typename MATSP, typename V1, typename V2> inline
  325. typename strongest_value_type3<V1,V2,MATSP>::value_type
  326. vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2,
  327. abstract_null_type) {
  328. typename temporary_vector<V1>::vector_type w(mat_nrows(ps));
  329. GMM_WARNING2("Warning, a temporary is used in scalar product\n");
  330. mult(ps, v1, w);
  331. return vect_sp(w, v2);
  332. }
  333. template <typename IT1, typename IT2> inline
  334. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  335. typename std::iterator_traits<IT2>::value_type>::T
  336. vect_sp_dense_(IT1 it, IT1 ite, IT2 it2) {
  337. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  338. typename std::iterator_traits<IT2>::value_type>::T res(0);
  339. for (; it != ite; ++it, ++it2) res += (*it) * (*it2);
  340. return res;
  341. }
  342. template <typename IT1, typename V> inline
  343. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  344. typename linalg_traits<V>::value_type>::T
  345. vect_sp_sparse_(IT1 it, IT1 ite, const V &v) {
  346. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  347. typename linalg_traits<V>::value_type>::T res(0);
  348. for (; it != ite; ++it) res += (*it) * v[it.index()];
  349. return res;
  350. }
  351. template <typename V1, typename V2> inline
  352. typename strongest_value_type<V1,V2>::value_type
  353. vect_sp(const V1 &v1, const V2 &v2, abstract_dense, abstract_dense) {
  354. return vect_sp_dense_(vect_const_begin(v1), vect_const_end(v1),
  355. vect_const_begin(v2));
  356. }
  357. template <typename V1, typename V2> inline
  358. typename strongest_value_type<V1,V2>::value_type
  359. vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_dense) {
  360. typename linalg_traits<V1>::const_iterator it1 = vect_const_begin(v1),
  361. ite = vect_const_end(v1);
  362. typename linalg_traits<V2>::const_iterator it2 = vect_const_begin(v2);
  363. return vect_sp_dense_(it1, ite, it2 + it1.index());
  364. }
  365. template <typename V1, typename V2> inline
  366. typename strongest_value_type<V1,V2>::value_type
  367. vect_sp(const V1 &v1, const V2 &v2, abstract_dense, abstract_skyline) {
  368. typename linalg_traits<V2>::const_iterator it1 = vect_const_begin(v2),
  369. ite = vect_const_end(v2);
  370. typename linalg_traits<V1>::const_iterator it2 = vect_const_begin(v1);
  371. return vect_sp_dense_(it1, ite, it2 + it1.index());
  372. }
  373. template <typename V1, typename V2> inline
  374. typename strongest_value_type<V1,V2>::value_type
  375. vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_skyline) {
  376. typedef typename strongest_value_type<V1,V2>::value_type T;
  377. auto it1 = vect_const_begin(v1), ite1 = vect_const_end(v1);
  378. auto it2 = vect_const_begin(v2), ite2 = vect_const_end(v2);
  379. size_type n = std::min(ite1.index(), ite2.index());
  380. size_type l = std::max(it1.index(), it2.index());
  381. if (l < n) {
  382. size_type m = l - it1.index(), p = l - it2.index(), q = m + n - l;
  383. return vect_sp_dense_(it1+m, it1+q, it2 + p);
  384. }
  385. return T(0);
  386. }
  387. template <typename V1, typename V2> inline
  388. typename strongest_value_type<V1,V2>::value_type
  389. vect_sp(const V1 &v1, const V2 &v2,abstract_sparse,abstract_dense) {
  390. return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
  391. }
  392. template <typename V1, typename V2> inline
  393. typename strongest_value_type<V1,V2>::value_type
  394. vect_sp(const V1 &v1, const V2 &v2, abstract_sparse, abstract_skyline) {
  395. return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
  396. }
  397. template <typename V1, typename V2> inline
  398. typename strongest_value_type<V1,V2>::value_type
  399. vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_sparse) {
  400. return vect_sp_sparse_(vect_const_begin(v2), vect_const_end(v2), v1);
  401. }
  402. template <typename V1, typename V2> inline
  403. typename strongest_value_type<V1,V2>::value_type
  404. vect_sp(const V1 &v1, const V2 &v2, abstract_dense,abstract_sparse) {
  405. return vect_sp_sparse_(vect_const_begin(v2), vect_const_end(v2), v1);
  406. }
  407. template <typename V1, typename V2> inline
  408. typename strongest_value_type<V1,V2>::value_type
  409. vect_sp_sparse_sparse(const V1 &v1, const V2 &v2, linalg_true) {
  410. typename linalg_traits<V1>::const_iterator it1 = vect_const_begin(v1),
  411. ite1 = vect_const_end(v1);
  412. typename linalg_traits<V2>::const_iterator it2 = vect_const_begin(v2),
  413. ite2 = vect_const_end(v2);
  414. typename strongest_value_type<V1,V2>::value_type res(0);
  415. while (it1 != ite1 && it2 != ite2) {
  416. if (it1.index() == it2.index())
  417. { res += (*it1) * *it2; ++it1; ++it2; }
  418. else if (it1.index() < it2.index()) ++it1; else ++it2;
  419. }
  420. return res;
  421. }
  422. template <typename V1, typename V2> inline
  423. typename strongest_value_type<V1,V2>::value_type
  424. vect_sp_sparse_sparse(const V1 &v1, const V2 &v2, linalg_false) {
  425. return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
  426. }
  427. template <typename V1, typename V2> inline
  428. typename strongest_value_type<V1,V2>::value_type
  429. vect_sp(const V1 &v1, const V2 &v2,abstract_sparse,abstract_sparse) {
  430. return vect_sp_sparse_sparse(v1, v2,
  431. typename linalg_and<typename linalg_traits<V1>::index_sorted,
  432. typename linalg_traits<V2>::index_sorted>::bool_type());
  433. }
  434. /* ******************************************************************** */
  435. /* Hermitian product */
  436. /* ******************************************************************** */
  437. ///@endcond
  438. /** Hermitian product. */
  439. template <typename V1, typename V2>
  440. inline typename strongest_value_type<V1,V2>::value_type
  441. vect_hp(const V1 &v1, const V2 &v2)
  442. { return vect_sp(v1, conjugated(v2)); }
  443. /** Hermitian product with a matrix. */
  444. template <typename MATSP, typename V1, typename V2> inline
  445. typename strongest_value_type3<V1,V2,MATSP>::value_type
  446. vect_hp(const MATSP &ps, const V1 &v1, const V2 &v2) {
  447. return vect_sp(ps, v1, gmm::conjugated(v2));
  448. }
  449. /* ******************************************************************** */
  450. /* Trace of a matrix */
  451. /* ******************************************************************** */
  452. /** Trace of a matrix */
  453. template <typename M>
  454. typename linalg_traits<M>::value_type
  455. mat_trace(const M &m) {
  456. typedef typename linalg_traits<M>::value_type T;
  457. T res(0);
  458. for (size_type i = 0; i < std::min(mat_nrows(m), mat_ncols(m)); ++i)
  459. res += m(i,i);
  460. return res;
  461. }
  462. /* ******************************************************************** */
  463. /* Euclidean norm */
  464. /* ******************************************************************** */
  465. /** squared Euclidean norm of a vector. */
  466. template <typename V>
  467. typename number_traits<typename linalg_traits<V>::value_type>
  468. ::magnitude_type
  469. vect_norm2_sqr(const V &v) {
  470. typedef typename linalg_traits<V>::value_type T;
  471. typedef typename number_traits<T>::magnitude_type R;
  472. auto it = vect_const_begin(v), ite = vect_const_end(v);
  473. R res(0);
  474. for (; it != ite; ++it) res += gmm::abs_sqr(*it);
  475. return res;
  476. }
  477. /** Euclidean norm of a vector. */
  478. template <typename V> inline
  479. typename number_traits<typename linalg_traits<V>::value_type>
  480. ::magnitude_type
  481. vect_norm2(const V &v)
  482. { return sqrt(vect_norm2_sqr(v)); }
  483. /** squared Euclidean distance between two vectors */
  484. template <typename V1, typename V2> inline
  485. typename number_traits<typename linalg_traits<V1>::value_type>
  486. ::magnitude_type
  487. vect_dist2_sqr(const V1 &v1, const V2 &v2) { // not fully optimized
  488. typedef typename linalg_traits<V1>::value_type T;
  489. typedef typename number_traits<T>::magnitude_type R;
  490. auto it1 = vect_const_begin(v1), ite1 = vect_const_end(v1);
  491. auto it2 = vect_const_begin(v2), ite2 = vect_const_end(v2);
  492. size_type k1(0), k2(0);
  493. R res(0);
  494. while (it1 != ite1 && it2 != ite2) {
  495. size_type i1 = index_of_it(it1, k1,
  496. typename linalg_traits<V1>::storage_type());
  497. size_type i2 = index_of_it(it2, k2,
  498. typename linalg_traits<V2>::storage_type());
  499. if (i1 == i2) {
  500. res += gmm::abs_sqr(*it2 - *it1); ++it1; ++k1; ++it2; ++k2;
  501. }
  502. else if (i1 < i2) {
  503. res += gmm::abs_sqr(*it1); ++it1; ++k1;
  504. }
  505. else {
  506. res += gmm::abs_sqr(*it2); ++it2; ++k2;
  507. }
  508. }
  509. while (it1 != ite1) { res += gmm::abs_sqr(*it1); ++it1; }
  510. while (it2 != ite2) { res += gmm::abs_sqr(*it2); ++it2; }
  511. return res;
  512. }
  513. /** Euclidean distance between two vectors */
  514. template <typename V1, typename V2> inline
  515. typename number_traits<typename linalg_traits<V1>::value_type>
  516. ::magnitude_type
  517. vect_dist2(const V1 &v1, const V2 &v2)
  518. { return sqrt(vect_dist2_sqr(v1, v2)); }
  519. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  520. template <typename M>
  521. typename number_traits<typename linalg_traits<M>::value_type>
  522. ::magnitude_type
  523. mat_euclidean_norm_sqr(const M &m, row_major) {
  524. typename number_traits<typename linalg_traits<M>::value_type>
  525. ::magnitude_type res(0);
  526. for (size_type i = 0; i < mat_nrows(m); ++i)
  527. res += vect_norm2_sqr(mat_const_row(m, i));
  528. return res;
  529. }
  530. template <typename M>
  531. typename number_traits<typename linalg_traits<M>::value_type>
  532. ::magnitude_type
  533. mat_euclidean_norm_sqr(const M &m, col_major) {
  534. typename number_traits<typename linalg_traits<M>::value_type>
  535. ::magnitude_type res(0);
  536. for (size_type i = 0; i < mat_ncols(m); ++i)
  537. res += vect_norm2_sqr(mat_const_col(m, i));
  538. return res;
  539. }
  540. ///@endcond
  541. /** squared Euclidean norm of a matrix. */
  542. template <typename M> inline
  543. typename number_traits<typename linalg_traits<M>::value_type>
  544. ::magnitude_type
  545. mat_euclidean_norm_sqr(const M &m) {
  546. return mat_euclidean_norm_sqr(m,
  547. typename principal_orientation_type<typename
  548. linalg_traits<M>::sub_orientation>::potype());
  549. }
  550. /** Euclidean norm of a matrix. */
  551. template <typename M> inline
  552. typename number_traits<typename linalg_traits<M>::value_type>
  553. ::magnitude_type
  554. mat_euclidean_norm(const M &m)
  555. { return gmm::sqrt(mat_euclidean_norm_sqr(m)); }
  556. /* ******************************************************************** */
  557. /* vector norm1 */
  558. /* ******************************************************************** */
  559. /** 1-norm of a vector */
  560. template <typename V>
  561. typename number_traits<typename linalg_traits<V>::value_type>
  562. ::magnitude_type
  563. vect_norm1(const V &v) {
  564. auto it = vect_const_begin(v), ite = vect_const_end(v);
  565. typename number_traits<typename linalg_traits<V>::value_type>
  566. ::magnitude_type res(0);
  567. for (; it != ite; ++it) res += gmm::abs(*it);
  568. return res;
  569. }
  570. /* ******************************************************************** */
  571. /* vector Infinity norm */
  572. /* ******************************************************************** */
  573. /** Infinity norm of a vector. */
  574. template <typename V>
  575. typename number_traits<typename linalg_traits<V>::value_type>
  576. ::magnitude_type
  577. vect_norminf(const V &v) {
  578. auto it = vect_const_begin(v), ite = vect_const_end(v);
  579. typename number_traits<typename linalg_traits<V>::value_type>
  580. ::magnitude_type res(0);
  581. for (; it != ite; ++it) res = std::max(res, gmm::abs(*it));
  582. return res;
  583. }
  584. /* ******************************************************************** */
  585. /* matrix norm1 */
  586. /* ******************************************************************** */
  587. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  588. template <typename M>
  589. typename number_traits<typename linalg_traits<M>::value_type>
  590. ::magnitude_type
  591. mat_norm1(const M &m, col_major) {
  592. typename number_traits<typename linalg_traits<M>::value_type>
  593. ::magnitude_type res(0);
  594. for (size_type i = 0; i < mat_ncols(m); ++i)
  595. res = std::max(res, vect_norm1(mat_const_col(m,i)));
  596. return res;
  597. }
  598. template <typename M>
  599. typename number_traits<typename linalg_traits<M>::value_type>
  600. ::magnitude_type
  601. mat_norm1(const M &m, row_major) {
  602. typedef typename linalg_traits<M>::value_type T;
  603. typedef typename number_traits<T>::magnitude_type R;
  604. typedef typename linalg_traits<M>::storage_type store_type;
  605. std::vector<R> aux(mat_ncols(m));
  606. for (size_type i = 0; i < mat_nrows(m); ++i) {
  607. typename linalg_traits<M>::const_sub_row_type row = mat_const_row(m, i);
  608. auto it = vect_const_begin(row), ite = vect_const_end(row);
  609. for (size_type k = 0; it != ite; ++it, ++k)
  610. aux[index_of_it(it, k, store_type())] += gmm::abs(*it);
  611. }
  612. return vect_norminf(aux);
  613. }
  614. template <typename M>
  615. typename number_traits<typename linalg_traits<M>::value_type>
  616. ::magnitude_type
  617. mat_norm1(const M &m, col_and_row)
  618. { return mat_norm1(m, col_major()); }
  619. template <typename M>
  620. typename number_traits<typename linalg_traits<M>::value_type>
  621. ::magnitude_type
  622. mat_norm1(const M &m, row_and_col)
  623. { return mat_norm1(m, col_major()); }
  624. ///@endcond
  625. /** 1-norm of a matrix */
  626. template <typename M>
  627. typename number_traits<typename linalg_traits<M>::value_type>
  628. ::magnitude_type
  629. mat_norm1(const M &m) {
  630. return mat_norm1(m, typename linalg_traits<M>::sub_orientation());
  631. }
  632. /* ******************************************************************** */
  633. /* matrix Infinity norm */
  634. /* ******************************************************************** */
  635. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  636. template <typename M>
  637. typename number_traits<typename linalg_traits<M>::value_type>
  638. ::magnitude_type
  639. mat_norminf(const M &m, row_major) {
  640. typename number_traits<typename linalg_traits<M>::value_type>
  641. ::magnitude_type res(0);
  642. for (size_type i = 0; i < mat_nrows(m); ++i)
  643. res = std::max(res, vect_norm1(mat_const_row(m,i)));
  644. return res;
  645. }
  646. template <typename M>
  647. typename number_traits<typename linalg_traits<M>::value_type>
  648. ::magnitude_type
  649. mat_norminf(const M &m, col_major) {
  650. typedef typename linalg_traits<M>::value_type T;
  651. typedef typename number_traits<T>::magnitude_type R;
  652. typedef typename linalg_traits<M>::storage_type store_type;
  653. std::vector<R> aux(mat_nrows(m));
  654. for (size_type i = 0; i < mat_ncols(m); ++i) {
  655. typename linalg_traits<M>::const_sub_col_type col = mat_const_col(m, i);
  656. auto it = vect_const_begin(col), ite = vect_const_end(col);
  657. for (size_type k = 0; it != ite; ++it, ++k)
  658. aux[index_of_it(it, k, store_type())] += gmm::abs(*it);
  659. }
  660. return vect_norminf(aux);
  661. }
  662. template <typename M>
  663. typename number_traits<typename linalg_traits<M>::value_type>
  664. ::magnitude_type
  665. mat_norminf(const M &m, col_and_row)
  666. { return mat_norminf(m, row_major()); }
  667. template <typename M>
  668. typename number_traits<typename linalg_traits<M>::value_type>
  669. ::magnitude_type
  670. mat_norminf(const M &m, row_and_col)
  671. { return mat_norminf(m, row_major()); }
  672. ///@endcond
  673. /** infinity-norm of a matrix.*/
  674. template <typename M>
  675. typename number_traits<typename linalg_traits<M>::value_type>
  676. ::magnitude_type
  677. mat_norminf(const M &m) {
  678. return mat_norminf(m, typename linalg_traits<M>::sub_orientation());
  679. }
  680. /* ******************************************************************** */
  681. /* Max norm for matrices */
  682. /* ******************************************************************** */
  683. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  684. template <typename M>
  685. typename number_traits<typename linalg_traits<M>::value_type>
  686. ::magnitude_type
  687. mat_maxnorm(const M &m, row_major) {
  688. typename number_traits<typename linalg_traits<M>::value_type>
  689. ::magnitude_type res(0);
  690. for (size_type i = 0; i < mat_nrows(m); ++i)
  691. res = std::max(res, vect_norminf(mat_const_row(m,i)));
  692. return res;
  693. }
  694. template <typename M>
  695. typename number_traits<typename linalg_traits<M>::value_type>
  696. ::magnitude_type
  697. mat_maxnorm(const M &m, col_major) {
  698. typename number_traits<typename linalg_traits<M>::value_type>
  699. ::magnitude_type res(0);
  700. for (size_type i = 0; i < mat_ncols(m); ++i)
  701. res = std::max(res, vect_norminf(mat_const_col(m,i)));
  702. return res;
  703. }
  704. ///@endcond
  705. /** max-norm of a matrix. */
  706. template <typename M>
  707. typename number_traits<typename linalg_traits<M>::value_type>
  708. ::magnitude_type
  709. mat_maxnorm(const M &m) {
  710. return mat_maxnorm(m,
  711. typename principal_orientation_type<typename
  712. linalg_traits<M>::sub_orientation>::potype());
  713. }
  714. /* ******************************************************************** */
  715. /* Clean */
  716. /* ******************************************************************** */
  717. /** Clean a vector or matrix (replace near-zero entries with zeroes). */
  718. template <typename L> inline void clean(L &l, double threshold);
  719. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  720. template <typename L, typename T>
  721. void clean(L &l, double threshold, abstract_dense, T) {
  722. typedef typename number_traits<T>::magnitude_type R;
  723. auto it = vect_begin(l), ite = vect_end(l);
  724. for (; it != ite; ++it)
  725. if (gmm::abs(*it) < R(threshold)) *it = T(0);
  726. }
  727. template <typename L, typename T>
  728. void clean(L &l, double threshold, abstract_skyline, T)
  729. { gmm::clean(l, threshold, abstract_dense(), T()); }
  730. template <typename L, typename T>
  731. void clean(L &l, double threshold, abstract_sparse, T) {
  732. typedef typename number_traits<T>::magnitude_type R;
  733. auto it = vect_begin(l), ite = vect_end(l);
  734. std::vector<size_type> ind;
  735. for (; it != ite; ++it)
  736. if (gmm::abs(*it) < R(threshold)) ind.push_back(it.index());
  737. for (size_type i = 0; i < ind.size(); ++i) l[ind[i]] = T(0);
  738. }
  739. template <typename L, typename T>
  740. void clean(L &l, double threshold, abstract_dense, std::complex<T>) {
  741. auto it = vect_begin(l), ite = vect_end(l);
  742. for (; it != ite; ++it){
  743. if (gmm::abs((*it).real()) < T(threshold))
  744. *it = std::complex<T>(T(0), (*it).imag());
  745. if (gmm::abs((*it).imag()) < T(threshold))
  746. *it = std::complex<T>((*it).real(), T(0));
  747. }
  748. }
  749. template <typename L, typename T>
  750. void clean(L &l, double threshold, abstract_skyline, std::complex<T>)
  751. { gmm::clean(l, threshold, abstract_dense(), std::complex<T>()); }
  752. template <typename L, typename T>
  753. void clean(L &l, double threshold, abstract_sparse, std::complex<T>) {
  754. auto it = vect_begin(l), ite = vect_end(l);
  755. std::vector<size_type> ind;
  756. for (; it != ite; ++it) {
  757. bool r = (gmm::abs((*it).real()) < T(threshold));
  758. bool i = (gmm::abs((*it).imag()) < T(threshold));
  759. if (r && i) ind.push_back(it.index());
  760. else if (r) *it = std::complex<T>(T(0), (*it).imag());
  761. else if (i) *it = std::complex<T>((*it).real(), T(0));
  762. }
  763. for (size_type i = 0; i < ind.size(); ++i)
  764. l[ind[i]] = std::complex<T>(T(0),T(0));
  765. }
  766. template <typename L> inline void clean(L &l, double threshold,
  767. abstract_vector) {
  768. gmm::clean(l, threshold, typename linalg_traits<L>::storage_type(),
  769. typename linalg_traits<L>::value_type());
  770. }
  771. template <typename L> inline void clean(const L &l, double threshold);
  772. template <typename L> void clean(L &l, double threshold, row_major) {
  773. for (size_type i = 0; i < mat_nrows(l); ++i)
  774. gmm::clean(mat_row(l, i), threshold);
  775. }
  776. template <typename L> void clean(L &l, double threshold, col_major) {
  777. for (size_type i = 0; i < mat_ncols(l); ++i)
  778. gmm::clean(mat_col(l, i), threshold);
  779. }
  780. template <typename L> inline void clean(L &l, double threshold,
  781. abstract_matrix) {
  782. gmm::clean(l, threshold,
  783. typename principal_orientation_type<typename
  784. linalg_traits<L>::sub_orientation>::potype());
  785. }
  786. template <typename L> inline void clean(L &l, double threshold)
  787. { clean(l, threshold, typename linalg_traits<L>::linalg_type()); }
  788. template <typename L> inline void clean(const L &l, double threshold)
  789. { gmm::clean(linalg_const_cast(l), threshold); }
  790. /* ******************************************************************** */
  791. /* Copy */
  792. /* ******************************************************************** */
  793. ///@endcond
  794. /** Copy vectors or matrices.
  795. @param l1 source vector or matrix.
  796. @param l2 destination.
  797. */
  798. template <typename L1, typename L2> inline
  799. void copy(const L1& l1, L2& l2) {
  800. if ((const void *)(&l1) != (const void *)(&l2)) {
  801. if (same_origin(l1,l2))
  802. GMM_WARNING2("Warning : a conflict is possible in copy\n");
  803. copy(l1, l2, typename linalg_traits<L1>::linalg_type(),
  804. typename linalg_traits<L2>::linalg_type());
  805. }
  806. }
  807. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  808. template <typename L1, typename L2> inline
  809. void copy(const L1& l1, const L2& l2) { copy(l1, linalg_const_cast(l2)); }
  810. template <typename L1, typename L2> inline
  811. void copy(const L1& l1, L2& l2, abstract_vector, abstract_vector) {
  812. GMM_ASSERT2(vect_size(l1) == vect_size(l2), "dimensions mismatch, "
  813. << vect_size(l1) << " !=" << vect_size(l2));
  814. copy_vect(l1, l2, typename linalg_traits<L1>::storage_type(),
  815. typename linalg_traits<L2>::storage_type());
  816. }
  817. template <typename L1, typename L2> inline
  818. void copy(const L1& l1, L2& l2, abstract_matrix, abstract_matrix) {
  819. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  820. if (!m || !n) return;
  821. GMM_ASSERT2(n==mat_ncols(l2) && m==mat_nrows(l2), "dimensions mismatch");
  822. copy_mat(l1, l2, typename linalg_traits<L1>::sub_orientation(),
  823. typename linalg_traits<L2>::sub_orientation());
  824. }
  825. template <typename V1, typename V2, typename C1, typename C2> inline
  826. void copy_vect(const V1 &v1, const V2 &v2, C1, C2)
  827. { copy_vect(v1, const_cast<V2 &>(v2), C1(), C2()); }
  828. template <typename L1, typename L2>
  829. void copy_mat_by_row(const L1& l1, L2& l2) {
  830. size_type nbr = mat_nrows(l1);
  831. for (size_type i = 0; i < nbr; ++i)
  832. copy(mat_const_row(l1, i), mat_row(l2, i));
  833. }
  834. template <typename L1, typename L2>
  835. void copy_mat_by_col(const L1 &l1, L2 &l2) {
  836. size_type nbc = mat_ncols(l1);
  837. for (size_type i = 0; i < nbc; ++i) {
  838. copy(mat_const_col(l1, i), mat_col(l2, i));
  839. }
  840. }
  841. template <typename L1, typename L2> inline
  842. void copy_mat(const L1& l1, L2& l2, row_major, row_major)
  843. { copy_mat_by_row(l1, l2); }
  844. template <typename L1, typename L2> inline
  845. void copy_mat(const L1& l1, L2& l2, row_major, row_and_col)
  846. { copy_mat_by_row(l1, l2); }
  847. template <typename L1, typename L2> inline
  848. void copy_mat(const L1& l1, L2& l2, row_and_col, row_and_col)
  849. { copy_mat_by_row(l1, l2); }
  850. template <typename L1, typename L2> inline
  851. void copy_mat(const L1& l1, L2& l2, row_and_col, row_major)
  852. { copy_mat_by_row(l1, l2); }
  853. template <typename L1, typename L2> inline
  854. void copy_mat(const L1& l1, L2& l2, col_and_row, row_major)
  855. { copy_mat_by_row(l1, l2); }
  856. template <typename L1, typename L2> inline
  857. void copy_mat(const L1& l1, L2& l2, row_major, col_and_row)
  858. { copy_mat_by_row(l1, l2); }
  859. template <typename L1, typename L2> inline
  860. void copy_mat(const L1& l1, L2& l2, col_and_row, row_and_col)
  861. { copy_mat_by_row(l1, l2); }
  862. template <typename L1, typename L2> inline
  863. void copy_mat(const L1& l1, L2& l2, row_and_col, col_and_row)
  864. { copy_mat_by_row(l1, l2); }
  865. template <typename L1, typename L2> inline
  866. void copy_mat(const L1& l1, L2& l2, col_major, col_major)
  867. { copy_mat_by_col(l1, l2); }
  868. template <typename L1, typename L2> inline
  869. void copy_mat(const L1& l1, L2& l2, col_major, col_and_row)
  870. { copy_mat_by_col(l1, l2); }
  871. template <typename L1, typename L2> inline
  872. void copy_mat(const L1& l1, L2& l2, col_major, row_and_col)
  873. { copy_mat_by_col(l1, l2); }
  874. template <typename L1, typename L2> inline
  875. void copy_mat(const L1& l1, L2& l2, row_and_col, col_major)
  876. { copy_mat_by_col(l1, l2); }
  877. template <typename L1, typename L2> inline
  878. void copy_mat(const L1& l1, L2& l2, col_and_row, col_major)
  879. { copy_mat_by_col(l1, l2); }
  880. template <typename L1, typename L2> inline
  881. void copy_mat(const L1& l1, L2& l2, col_and_row, col_and_row)
  882. { copy_mat_by_col(l1, l2); }
  883. template <typename L1, typename L2> inline
  884. void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i) {
  885. copy_mat_mixed_rc(l1, l2, i, typename linalg_traits<L1>::storage_type());
  886. }
  887. template <typename L1, typename L2>
  888. void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_sparse) {
  889. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  890. for (; it != ite; ++it)
  891. l2(i, it.index()) = *it;
  892. }
  893. template <typename L1, typename L2>
  894. void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_skyline) {
  895. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  896. for (; it != ite; ++it)
  897. l2(i, it.index()) = *it;
  898. }
  899. template <typename L1, typename L2>
  900. void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_dense) {
  901. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  902. for (size_type j = 0; it != ite; ++it, ++j) l2(i, j) = *it;
  903. }
  904. template <typename L1, typename L2> inline
  905. void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i) {
  906. copy_mat_mixed_cr(l1, l2, i, typename linalg_traits<L1>::storage_type());
  907. }
  908. template <typename L1, typename L2>
  909. void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_sparse) {
  910. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  911. for (; it != ite; ++it) l2(it.index(), i) = *it;
  912. }
  913. template <typename L1, typename L2>
  914. void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_skyline) {
  915. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  916. for (; it != ite; ++it) l2(it.index(), i) = *it;
  917. }
  918. template <typename L1, typename L2>
  919. void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_dense) {
  920. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  921. for (size_type j = 0; it != ite; ++it, ++j) l2(j, i) = *it;
  922. }
  923. template <typename L1, typename L2>
  924. void copy_mat(const L1& l1, L2& l2, row_major, col_major) {
  925. clear(l2);
  926. size_type nbr = mat_nrows(l1);
  927. for (size_type i = 0; i < nbr; ++i)
  928. copy_mat_mixed_rc(mat_const_row(l1, i), l2, i);
  929. }
  930. template <typename L1, typename L2>
  931. void copy_mat(const L1& l1, L2& l2, col_major, row_major) {
  932. clear(l2);
  933. size_type nbc = mat_ncols(l1);
  934. for (size_type i = 0; i < nbc; ++i)
  935. copy_mat_mixed_cr(mat_const_col(l1, i), l2, i);
  936. }
  937. template <typename L1, typename L2> inline
  938. void copy_vect(const L1 &l1, L2 &l2, abstract_dense, abstract_dense) {
  939. std::copy(vect_const_begin(l1), vect_const_end(l1), vect_begin(l2));
  940. }
  941. template <typename L1, typename L2> inline // to be optimised ?
  942. void copy_vect(const L1 &l1, L2 &l2, abstract_skyline, abstract_skyline) {
  943. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  944. while (it1 != ite1 && *it1 == typename linalg_traits<L1>::value_type(0))
  945. ++it1;
  946. if (ite1 - it1 > 0) {
  947. clear(l2);
  948. auto it2 = vect_begin(l2), ite2 = vect_end(l2);
  949. while (*(ite1-1) == typename linalg_traits<L1>::value_type(0)) ite1--;
  950. if (it2 == ite2) {
  951. l2[it1.index()] = *it1; ++it1;
  952. l2[ite1.index()-1] = *(ite1-1); --ite1;
  953. if (it1 < ite1)
  954. { it2 = vect_begin(l2); ++it2; std::copy(it1, ite1, it2); }
  955. }
  956. else {
  957. ptrdiff_t m = it1.index() - it2.index();
  958. if (m >= 0 && ite1.index() <= ite2.index())
  959. std::copy(it1, ite1, it2 + m);
  960. else {
  961. if (m < 0) l2[it1.index()] = *it1;
  962. if (ite1.index() > ite2.index()) l2[ite1.index()-1] = *(ite1-1);
  963. it2 = vect_begin(l2); ite2 = vect_end(l2);
  964. m = it1.index() - it2.index();
  965. std::copy(it1, ite1, it2 + m);
  966. }
  967. }
  968. }
  969. }
  970. template <typename L1, typename L2>
  971. void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_dense) {
  972. clear(l2);
  973. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  974. for (; it != ite; ++it) { l2[it.index()] = *it; }
  975. }
  976. template <typename L1, typename L2>
  977. void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_skyline) {
  978. clear(l2);
  979. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  980. for (; it != ite; ++it) l2[it.index()] = *it;
  981. }
  982. template <typename L1, typename L2>
  983. void copy_vect(const L1& l1, L2& l2, abstract_skyline, abstract_dense) {
  984. typedef typename linalg_traits<L1>::value_type T;
  985. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  986. if (it == ite)
  987. gmm::clear(l2);
  988. else {
  989. auto it2 = vect_begin(l2), ite2 = vect_end(l2);
  990. size_type i = it.index(), j;
  991. for (j = 0; j < i; ++j, ++it2) *it2 = T(0);
  992. for (; it != ite; ++it, ++it2) *it2 = *it;
  993. for (; it2 != ite2; ++it2) *it2 = T(0);
  994. }
  995. }
  996. template <typename L1, typename L2>
  997. void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_sparse) {
  998. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  999. clear(l2);
  1000. // cout << "copy " << l1 << " of size " << vect_size(l1) << " nnz = " << nnz(l1) << endl;
  1001. for (; it != ite; ++it) {
  1002. // cout << "*it = " << *it << endl;
  1003. // cout << "it.index() = " << it.index() << endl;
  1004. if (*it != (typename linalg_traits<L1>::value_type)(0))
  1005. l2[it.index()] = *it;
  1006. }
  1007. }
  1008. template <typename L1, typename L2>
  1009. void copy_vect(const L1& l1, L2& l2, abstract_dense, abstract_sparse) {
  1010. clear(l2);
  1011. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1012. for (size_type i = 0; it != ite; ++it, ++i)
  1013. if (*it != (typename linalg_traits<L1>::value_type)(0))
  1014. l2[i] = *it;
  1015. }
  1016. template <typename L1, typename L2> // to be optimised ...
  1017. void copy_vect(const L1& l1, L2& l2, abstract_dense, abstract_skyline) {
  1018. clear(l2);
  1019. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1020. for (size_type i = 0; it != ite; ++it, ++i)
  1021. if (*it != (typename linalg_traits<L1>::value_type)(0))
  1022. l2[i] = *it;
  1023. }
  1024. template <typename L1, typename L2>
  1025. void copy_vect(const L1& l1, L2& l2, abstract_skyline, abstract_sparse) {
  1026. clear(l2);
  1027. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1028. for (; it != ite; ++it)
  1029. if (*it != (typename linalg_traits<L1>::value_type)(0))
  1030. l2[it.index()] = *it;
  1031. }
  1032. /* ******************************************************************** */
  1033. /* Matrix and vector addition */
  1034. /* algorithms are built in order to avoid some conflicts with */
  1035. /* repeated arguments or with overlapping part of a same object. */
  1036. /* In the latter case, conflicts are still possible. */
  1037. /* ******************************************************************** */
  1038. ///@endcond
  1039. /** Add two vectors or matrices
  1040. @param l1
  1041. @param l2 contains on output, l2+l1.
  1042. */
  1043. template <typename L1, typename L2> inline
  1044. void add(const L1& l1, L2& l2) {
  1045. add_spec(l1, l2, typename linalg_traits<L2>::linalg_type());
  1046. }
  1047. ///@cond
  1048. template <typename L1, typename L2> inline
  1049. void add(const L1& l1, const L2& l2) { add(l1, linalg_const_cast(l2)); }
  1050. template <typename L1, typename L2> inline
  1051. void add_spec(const L1& l1, L2& l2, abstract_vector) {
  1052. GMM_ASSERT2(vect_size(l1) == vect_size(l2), "dimensions mismatch, "
  1053. << vect_size(l1) << " !=" << vect_size(l2));
  1054. add(l1, l2, typename linalg_traits<L1>::storage_type(),
  1055. typename linalg_traits<L2>::storage_type());
  1056. }
  1057. template <typename L1, typename L2> inline
  1058. void add_spec(const L1& l1, L2& l2, abstract_matrix) {
  1059. GMM_ASSERT2(mat_nrows(l1)==mat_nrows(l2) && mat_ncols(l1)==mat_ncols(l2),
  1060. "dimensions mismatch l1 is " << mat_nrows(l1) << "x"
  1061. << mat_ncols(l1) << " and l2 is " << mat_nrows(l2)
  1062. << "x" << mat_ncols(l2));
  1063. add(l1, l2, typename linalg_traits<L1>::sub_orientation(),
  1064. typename linalg_traits<L2>::sub_orientation());
  1065. }
  1066. template <typename L1, typename L2>
  1067. void add(const L1& l1, L2& l2, row_major, row_major) {
  1068. auto it1 = mat_row_begin(l1), ite = mat_row_end(l1);
  1069. auto it2 = mat_row_begin(l2);
  1070. for ( ; it1 != ite; ++it1, ++it2)
  1071. add(linalg_traits<L1>::row(it1), linalg_traits<L2>::row(it2));
  1072. }
  1073. template <typename L1, typename L2>
  1074. void add(const L1& l1, L2& l2, col_major, col_major) {
  1075. auto it1 = mat_col_const_begin(l1), ite = mat_col_const_end(l1);
  1076. typename linalg_traits<L2>::col_iterator it2 = mat_col_begin(l2);
  1077. for ( ; it1 != ite; ++it1, ++it2)
  1078. add(linalg_traits<L1>::col(it1), linalg_traits<L2>::col(it2));
  1079. }
  1080. template <typename L1, typename L2> inline
  1081. void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i) {
  1082. add_mat_mixed_rc(l1, l2, i, typename linalg_traits<L1>::storage_type());
  1083. }
  1084. template <typename L1, typename L2>
  1085. void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_sparse) {
  1086. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1087. for (; it != ite; ++it) l2(i, it.index()) += *it;
  1088. }
  1089. template <typename L1, typename L2>
  1090. void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_skyline) {
  1091. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1092. for (; it != ite; ++it) l2(i, it.index()) += *it;
  1093. }
  1094. template <typename L1, typename L2>
  1095. void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_dense) {
  1096. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1097. for (size_type j = 0; it != ite; ++it, ++j) l2(i, j) += *it;
  1098. }
  1099. template <typename L1, typename L2> inline
  1100. void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i) {
  1101. add_mat_mixed_cr(l1, l2, i, typename linalg_traits<L1>::storage_type());
  1102. }
  1103. template <typename L1, typename L2>
  1104. void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_sparse) {
  1105. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1106. for (; it != ite; ++it) l2(it.index(), i) += *it;
  1107. }
  1108. template <typename L1, typename L2>
  1109. void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_skyline) {
  1110. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1111. for (; it != ite; ++it) l2(it.index(), i) += *it;
  1112. }
  1113. template <typename L1, typename L2>
  1114. void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_dense) {
  1115. auto it = vect_const_begin(l1), ite = vect_const_end(l1);
  1116. for (size_type j = 0; it != ite; ++it, ++j) l2(j, i) += *it;
  1117. }
  1118. template <typename L1, typename L2>
  1119. void add(const L1& l1, L2& l2, row_major, col_major) {
  1120. size_type nbr = mat_nrows(l1);
  1121. for (size_type i = 0; i < nbr; ++i)
  1122. add_mat_mixed_rc(mat_const_row(l1, i), l2, i);
  1123. }
  1124. template <typename L1, typename L2>
  1125. void add(const L1& l1, L2& l2, col_major, row_major) {
  1126. size_type nbc = mat_ncols(l1);
  1127. for (size_type i = 0; i < nbc; ++i)
  1128. add_mat_mixed_cr(mat_const_col(l1, i), l2, i);
  1129. }
  1130. template <typename L1, typename L2> inline
  1131. void add(const L1& l1, L2& l2, row_and_col, row_major)
  1132. { add(l1, l2, row_major(), row_major()); }
  1133. template <typename L1, typename L2> inline
  1134. void add(const L1& l1, L2& l2, row_and_col, row_and_col)
  1135. { add(l1, l2, row_major(), row_major()); }
  1136. template <typename L1, typename L2> inline
  1137. void add(const L1& l1, L2& l2, row_and_col, col_and_row)
  1138. { add(l1, l2, row_major(), row_major()); }
  1139. template <typename L1, typename L2> inline
  1140. void add(const L1& l1, L2& l2, col_and_row, row_and_col)
  1141. { add(l1, l2, row_major(), row_major()); }
  1142. template <typename L1, typename L2> inline
  1143. void add(const L1& l1, L2& l2, row_major, row_and_col)
  1144. { add(l1, l2, row_major(), row_major()); }
  1145. template <typename L1, typename L2> inline
  1146. void add(const L1& l1, L2& l2, col_and_row, row_major)
  1147. { add(l1, l2, row_major(), row_major()); }
  1148. template <typename L1, typename L2> inline
  1149. void add(const L1& l1, L2& l2, row_major, col_and_row)
  1150. { add(l1, l2, row_major(), row_major()); }
  1151. template <typename L1, typename L2> inline
  1152. void add(const L1& l1, L2& l2, row_and_col, col_major)
  1153. { add(l1, l2, col_major(), col_major()); }
  1154. template <typename L1, typename L2> inline
  1155. void add(const L1& l1, L2& l2, col_major, row_and_col)
  1156. { add(l1, l2, col_major(), col_major()); }
  1157. template <typename L1, typename L2> inline
  1158. void add(const L1& l1, L2& l2, col_and_row, col_major)
  1159. { add(l1, l2, col_major(), col_major()); }
  1160. template <typename L1, typename L2> inline
  1161. void add(const L1& l1, L2& l2, col_and_row, col_and_row)
  1162. { add(l1, l2, col_major(), col_major()); }
  1163. template <typename L1, typename L2> inline
  1164. void add(const L1& l1, L2& l2, col_major, col_and_row)
  1165. { add(l1, l2, col_major(), col_major()); }
  1166. ///@endcond
  1167. /** Addition of two vectors/matrices
  1168. @param l1
  1169. @param l2
  1170. @param l3 contains l1+l2 on output
  1171. */
  1172. template <typename L1, typename L2, typename L3> inline
  1173. void add(const L1& l1, const L2& l2, L3& l3) {
  1174. add_spec(l1, l2, l3, typename linalg_traits<L2>::linalg_type());
  1175. }
  1176. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  1177. template <typename L1, typename L2, typename L3> inline
  1178. void add(const L1& l1, const L2& l2, const L3& l3)
  1179. { add(l1, l2, linalg_const_cast(l3)); }
  1180. template <typename L1, typename L2, typename L3> inline
  1181. void add_spec(const L1& l1, const L2& l2, L3& l3, abstract_matrix)
  1182. { copy(l2, l3); add(l1, l3); }
  1183. template <typename L1, typename L2, typename L3> inline
  1184. void add_spec(const L1& l1, const L2& l2, L3& l3, abstract_vector) {
  1185. GMM_ASSERT2(vect_size(l1) == vect_size(l2) &&
  1186. vect_size(l1) == vect_size(l3), "dimensions mismatch");
  1187. if ((const void *)(&l1) == (const void *)(&l3))
  1188. add(l2, l3);
  1189. else if ((const void *)(&l2) == (const void *)(&l3))
  1190. add(l1, l3);
  1191. else
  1192. add(l1, l2, l3, typename linalg_traits<L1>::storage_type(),
  1193. typename linalg_traits<L2>::storage_type(),
  1194. typename linalg_traits<L3>::storage_type());
  1195. }
  1196. template <typename IT1, typename IT2, typename IT3>
  1197. void add_full_(IT1 it1, IT2 it2, IT3 it3, IT3 ite) {
  1198. for (; it3 != ite; ++it3, ++it2, ++it1) *it3 = *it1 + *it2;
  1199. }
  1200. template <typename IT1, typename IT2, typename IT3>
  1201. void add_almost_full_(IT1 it1, IT1 ite1, IT2 it2, IT3 it3, IT3 ite3) {
  1202. IT3 it = it3;
  1203. for (; it != ite3; ++it, ++it2) *it = *it2;
  1204. for (; it1 != ite1; ++it1)
  1205. *(it3 + it1.index()) += *it1;
  1206. }
  1207. template <typename IT1, typename IT2, typename IT3>
  1208. void add_to_full_(IT1 it1, IT1 ite1, IT2 it2, IT2 ite2,
  1209. IT3 it3, IT3 ite3) {
  1210. typedef typename std::iterator_traits<IT3>::value_type T;
  1211. IT3 it = it3;
  1212. for (; it != ite3; ++it) *it = T(0);
  1213. for (; it1 != ite1; ++it1) *(it3 + it1.index()) = *it1;
  1214. for (; it2 != ite2; ++it2) *(it3 + it2.index()) += *it2;
  1215. }
  1216. template <typename L1, typename L2, typename L3> inline
  1217. void add(const L1& l1, const L2& l2, L3& l3,
  1218. abstract_dense, abstract_dense, abstract_dense) {
  1219. add_full_(vect_const_begin(l1), vect_const_begin(l2),
  1220. vect_begin(l3), vect_end(l3));
  1221. }
  1222. // generic function for add(v1, v2, v3).
  1223. // Need to be specialized to optimize particular additions.
  1224. template <typename L1, typename L2, typename L3,
  1225. typename ST1, typename ST2, typename ST3>
  1226. inline void add(const L1& l1, const L2& l2, L3& l3, ST1, ST2, ST3)
  1227. { copy(l2, l3); add(l1, l3, ST1(), ST3()); }
  1228. template <typename L1, typename L2, typename L3> inline
  1229. void add(const L1& l1, const L2& l2, L3& l3,
  1230. abstract_sparse, abstract_dense, abstract_dense) {
  1231. add_almost_full_(vect_const_begin(l1), vect_const_end(l1),
  1232. vect_const_begin(l2), vect_begin(l3), vect_end(l3));
  1233. }
  1234. template <typename L1, typename L2, typename L3> inline
  1235. void add(const L1& l1, const L2& l2, L3& l3,
  1236. abstract_dense, abstract_sparse, abstract_dense)
  1237. { add(l2, l1, l3, abstract_sparse(), abstract_dense(), abstract_dense()); }
  1238. template <typename L1, typename L2, typename L3> inline
  1239. void add(const L1& l1, const L2& l2, L3& l3,
  1240. abstract_sparse, abstract_sparse, abstract_dense) {
  1241. add_to_full_(vect_const_begin(l1), vect_const_end(l1),
  1242. vect_const_begin(l2), vect_const_end(l2),
  1243. vect_begin(l3), vect_end(l3));
  1244. }
  1245. template <typename L1, typename L2, typename L3>
  1246. void add_spspsp(const L1& l1, const L2& l2, L3& l3, linalg_true) {
  1247. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1248. auto it2 = vect_const_begin(l2), ite2 = vect_const_end(l2);
  1249. clear(l3);
  1250. while (it1 != ite1 && it2 != ite2) {
  1251. ptrdiff_t d = it1.index() - it2.index();
  1252. if (d < 0)
  1253. { l3[it1.index()] += *it1; ++it1; }
  1254. else if (d > 0)
  1255. { l3[it2.index()] += *it2; ++it2; }
  1256. else
  1257. { l3[it1.index()] = *it1 + *it2; ++it1; ++it2; }
  1258. }
  1259. for (; it1 != ite1; ++it1) l3[it1.index()] += *it1;
  1260. for (; it2 != ite2; ++it2) l3[it2.index()] += *it2;
  1261. }
  1262. template <typename L1, typename L2, typename L3>
  1263. void add_spspsp(const L1& l1, const L2& l2, L3& l3, linalg_false)
  1264. { copy(l2, l3); add(l2, l3); }
  1265. template <typename L1, typename L2, typename L3>
  1266. void add(const L1& l1, const L2& l2, L3& l3,
  1267. abstract_sparse, abstract_sparse, abstract_sparse) {
  1268. add_spspsp(l1, l2, l3, typename linalg_and<typename
  1269. linalg_traits<L1>::index_sorted,
  1270. typename linalg_traits<L2>::index_sorted>::bool_type());
  1271. }
  1272. template <typename L1, typename L2>
  1273. void add(const L1& l1, L2& l2, abstract_dense, abstract_dense) {
  1274. auto it1 = vect_const_begin(l1);
  1275. auto it2 = vect_begin(l2), ite = vect_end(l2);
  1276. for (; it2 != ite; ++it2, ++it1) *it2 += *it1;
  1277. }
  1278. template <typename L1, typename L2>
  1279. void add(const L1& l1, L2& l2, abstract_dense, abstract_skyline) {
  1280. typedef typename linalg_traits<L1>::value_type T;
  1281. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1282. size_type i1 = 0, ie1 = vect_size(l1);
  1283. while (it1 != ite1 && *it1 == T(0)) { ++it1; ++i1; }
  1284. if (it1 != ite1) {
  1285. auto it2 = vect_begin(l2), ite2 = vect_end(l2);
  1286. while (ie1 && *(ite1-1) == T(0)) { ite1--; --ie1; }
  1287. if (it2 == ite2 || i1 < it2.index()) {
  1288. l2[i1] = *it1; ++i1; ++it1;
  1289. if (it1 == ite1) return;
  1290. it2 = vect_begin(l2); ite2 = vect_end(l2);
  1291. }
  1292. if (ie1 > ite2.index()) {
  1293. --ite1; l2[ie1 - 1] = *ite1;
  1294. it2 = vect_begin(l2);
  1295. }
  1296. it2 += i1 - it2.index();
  1297. for (; it1 != ite1; ++it1, ++it2) { *it2 += *it1; }
  1298. }
  1299. }
  1300. template <typename L1, typename L2>
  1301. void add(const L1& l1, L2& l2, abstract_skyline, abstract_dense) {
  1302. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1303. if (it1 != ite1) {
  1304. auto it2 = vect_begin(l2);
  1305. it2 += it1.index();
  1306. for (; it1 != ite1; ++it2, ++it1) *it2 += *it1;
  1307. }
  1308. }
  1309. template <typename L1, typename L2>
  1310. void add(const L1& l1, L2& l2, abstract_sparse, abstract_dense) {
  1311. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1312. for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
  1313. }
  1314. template <typename L1, typename L2>
  1315. void add(const L1& l1, L2& l2, abstract_sparse, abstract_sparse) {
  1316. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1317. for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
  1318. }
  1319. template <typename L1, typename L2>
  1320. void add(const L1& l1, L2& l2, abstract_sparse, abstract_skyline) {
  1321. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1322. for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
  1323. }
  1324. template <typename L1, typename L2>
  1325. void add(const L1& l1, L2& l2, abstract_skyline, abstract_sparse) {
  1326. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1327. for (; it1 != ite1; ++it1)
  1328. if (*it1 != typename linalg_traits<L1>::value_type(0))
  1329. l2[it1.index()] += *it1;
  1330. }
  1331. template <typename L1, typename L2>
  1332. void add(const L1& l1, L2& l2, abstract_skyline, abstract_skyline) {
  1333. typedef typename linalg_traits<L1>::value_type T1;
  1334. typedef typename linalg_traits<L2>::value_type T2;
  1335. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1336. while (it1 != ite1 && *it1 == T1(0)) ++it1;
  1337. if (ite1 != it1) {
  1338. auto it2 = vect_begin(l2), ite2 = vect_end(l2);
  1339. while (*(ite1-1) == T1(0)) ite1--;
  1340. if (it2 == ite2 || it1.index() < it2.index()) {
  1341. l2[it1.index()] = T2(0);
  1342. it2 = vect_begin(l2); ite2 = vect_end(l2);
  1343. }
  1344. if (ite1.index() > ite2.index()) {
  1345. l2[ite1.index() - 1] = T2(0);
  1346. it2 = vect_begin(l2);
  1347. }
  1348. it2 += it1.index() - it2.index();
  1349. for (; it1 != ite1; ++it1, ++it2) *it2 += *it1;
  1350. }
  1351. }
  1352. template <typename L1, typename L2>
  1353. void add(const L1& l1, L2& l2, abstract_dense, abstract_sparse) {
  1354. auto it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1355. for (size_type i = 0; it1 != ite1; ++it1, ++i)
  1356. if (*it1 != typename linalg_traits<L1>::value_type(0)) l2[i] += *it1;
  1357. }
  1358. /* ******************************************************************** */
  1359. /* Matrix-vector mult */
  1360. /* ******************************************************************** */
  1361. ///@endcond
  1362. /** matrix-vector or matrix-matrix product.
  1363. @param l1 a matrix.
  1364. @param l2 a vector or matrix.
  1365. @param l3 the product l1*l2.
  1366. */
  1367. template <typename L1, typename L2, typename L3> inline
  1368. void mult(const L1& l1, const L2& l2, L3& l3) {
  1369. mult_dispatch(l1, l2, l3, typename linalg_traits<L2>::linalg_type());
  1370. }
  1371. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  1372. template <typename L1, typename L2, typename L3> inline
  1373. void mult(const L1& l1, const L2& l2, const L3& l3)
  1374. { mult(l1, l2, linalg_const_cast(l3)); }
  1375. template <typename L1, typename L2, typename L3> inline
  1376. void mult_dispatch(const L1& l1, const L2& l2, L3& l3, abstract_vector) {
  1377. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1378. if (!m || !n) { gmm::clear(l3); return; }
  1379. GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3), "dimensions mismatch");
  1380. if (!same_origin(l2, l3))
  1381. mult_spec(l1, l2, l3, typename principal_orientation_type<typename
  1382. linalg_traits<L1>::sub_orientation>::potype());
  1383. else {
  1384. GMM_WARNING2("Warning, A temporary is used for mult\n");
  1385. typename temporary_vector<L3>::vector_type temp(vect_size(l3));
  1386. mult_spec(l1, l2, temp, typename principal_orientation_type<typename
  1387. linalg_traits<L1>::sub_orientation>::potype());
  1388. copy(temp, l3);
  1389. }
  1390. }
  1391. template <typename L1, typename L2, typename L3>
  1392. void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
  1393. typedef typename linalg_traits<L3>::value_type T;
  1394. clear(l3);
  1395. size_type nr = mat_nrows(l1);
  1396. for (size_type i = 0; i < nr; ++i) {
  1397. T aux = vect_sp(mat_const_row(l1, i), l2);
  1398. if (aux != T(0)) l3[i] = aux;
  1399. }
  1400. }
  1401. template <typename L1, typename L2, typename L3>
  1402. void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
  1403. typedef typename linalg_traits<L3>::value_type T;
  1404. clear(l3);
  1405. size_type nr = mat_nrows(l1);
  1406. for (size_type i = 0; i < nr; ++i) {
  1407. T aux = vect_sp(mat_const_row(l1, i), l2);
  1408. if (aux != T(0)) l3[i] = aux;
  1409. }
  1410. }
  1411. #ifdef STORM_HAVE_INTELTBB
  1412. // Official Intel Hint on blocked_range vs. linear iterators: http://software.intel.com/en-us/forums/topic/289505
  1413. template <typename IT1, typename IT2>
  1414. class forward_range_mult {
  1415. IT1 my_begin;
  1416. IT1 my_end;
  1417. IT2 my_begin_row;
  1418. size_t my_size;
  1419. public:
  1420. IT1 begin() const {return my_begin;}
  1421. IT2 begin_row() const {return my_begin_row;}
  1422. IT1 end() const {return my_end;}
  1423. bool empty() const {return my_begin==my_end;}
  1424. bool is_divisible() const {return my_size>1;}
  1425. forward_range_mult( IT1 first, IT1 last, IT2 row_first, size_t size ) : my_begin(first), my_end(last), my_begin_row(row_first), my_size(size) {
  1426. assert( size==size_t(std::distance( first,last )));
  1427. }
  1428. forward_range_mult( IT1 first, IT1 last, IT2 row_first) : my_begin(first), my_end(last), my_begin_row(row_first) {
  1429. my_size = std::distance( first,last );
  1430. }
  1431. forward_range_mult( forward_range_mult& r, tbb::split ) {
  1432. size_t h = r.my_size/2;
  1433. my_end = r.my_end;
  1434. my_begin = r.my_begin;
  1435. my_begin_row = r.my_begin_row;
  1436. std::advance( my_begin, h ); // Might be scaling issue
  1437. std::advance( my_begin_row, h );
  1438. my_size = r.my_size-h;
  1439. r.my_end = my_begin;
  1440. r.my_size = h;
  1441. }
  1442. };
  1443. template <typename L1, typename L2, typename L3>
  1444. class tbbHelper_mult_by_row {
  1445. L2 const* my_l2;
  1446. // Typedefs for Iterator Types
  1447. typedef typename linalg_traits<L3>::iterator frm_IT1;
  1448. typedef typename linalg_traits<L1>::const_row_iterator frm_IT2;
  1449. public:
  1450. void operator()( const forward_range_mult<frm_IT1, frm_IT2>& r ) const {
  1451. L2 const& l2 = *my_l2;
  1452. frm_IT1 it = r.begin();
  1453. frm_IT1 ite = r.end();
  1454. frm_IT2 itr = r.begin_row();
  1455. for (; it != ite; ++it, ++itr) {
  1456. *it = vect_sp(linalg_traits<L1>::row(itr), l2,
  1457. typename linalg_traits<L1>::storage_type(),
  1458. typename linalg_traits<L2>::storage_type());
  1459. }
  1460. }
  1461. tbbHelper_mult_by_row(L2 const* l2) :
  1462. my_l2(l2)
  1463. {}
  1464. };
  1465. template <typename L1, typename L2, typename L3, typename L4>
  1466. class tbbHelper_mult_add_by_row {
  1467. L2 const* my_l2;
  1468. L3 const* my_l3;
  1469. // Typedefs for Iterator Types
  1470. typedef typename linalg_traits<L3>::iterator frm_IT1;
  1471. typedef typename linalg_traits<L1>::const_row_iterator frm_IT2;
  1472. typedef typename linalg_traits<L4>::const_iterator frm_IT3;
  1473. public:
  1474. void operator()( const forward_range_mult<frm_IT1, frm_IT2>& r ) const {
  1475. L2 const& l2 = *my_l2;
  1476. frm_IT1 it = r.begin();
  1477. frm_IT1 ite = r.end();
  1478. frm_IT2 itr = r.begin_row();
  1479. frm_IT3 addIt = my_l3->begin();
  1480. for (; it != ite; ++it, ++itr, ++addIt) {
  1481. *it = vect_sp(linalg_traits<L1>::row(itr), l2,
  1482. typename linalg_traits<L1>::storage_type(),
  1483. typename linalg_traits<L2>::storage_type()) + *addIt;
  1484. }
  1485. }
  1486. tbbHelper_mult_add_by_row(L2 const* l2, L2 const* l3) : my_l2(l2), my_l3(l3) {
  1487. // Intentionally left empty.
  1488. }
  1489. };
  1490. template<typename L1, typename L2, typename L3>
  1491. class TbbMultFunctor {
  1492. public:
  1493. TbbMultFunctor(L1 const& l1, L2 const& l2, L3& l3) : l1(l1), l2(l2), l3(l3) {
  1494. // Intentionally left empty.
  1495. }
  1496. void operator()(tbb::blocked_range<unsigned long> const& range) const {
  1497. auto itr = mat_row_const_begin(l1) + range.begin();
  1498. auto l2it = l2.begin() + range.begin();
  1499. auto l3it = l3.begin() + range.begin();
  1500. auto l3ite = l3.begin() + range.end();
  1501. for (; l3it != l3ite; ++l3it, ++l2it, ++itr) {
  1502. *l3it = vect_sp(linalg_traits<L1>::row(itr), l2, typename linalg_traits<L1>::storage_type(), typename linalg_traits<L2>::storage_type());
  1503. }
  1504. }
  1505. private:
  1506. L1 const& l1;
  1507. L2 const& l2;
  1508. L3& l3;
  1509. };
  1510. template <typename L1, typename L2, typename L3>
  1511. void mult_by_row_parallel(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1512. tbb::parallel_for(tbb::blocked_range<unsigned long>(0, vect_size(l3), 10), TbbMultFunctor<L1, L2, L3>(l1, l2, l3));
  1513. }
  1514. template<typename L1, typename L2, typename L3, typename L4>
  1515. class TbbMultAddFunctor {
  1516. public:
  1517. TbbMultAddFunctor(L1 const& l1, L2 const& l2, L3 const& l3, L4& l4) : l1(l1), l2(l2), l3(l3), l4(l4) {
  1518. // Intentionally left empty.
  1519. }
  1520. void operator()(tbb::blocked_range<unsigned long> const& range) const {
  1521. auto itr = mat_row_const_begin(l1) + range.begin();
  1522. auto l2it = l2.begin() + range.begin();
  1523. auto l3it = l3.begin() + range.begin();
  1524. auto l4it = l4.begin() + range.begin();
  1525. auto l4ite = l4.begin() + range.end();
  1526. for (; l4it != l4ite; ++l4it, ++l3it, ++l2it, ++itr) {
  1527. *l4it = vect_sp(linalg_traits<L1>::row(itr), l2, typename linalg_traits<L1>::storage_type(), typename linalg_traits<L2>::storage_type()) + *l3it;
  1528. }
  1529. }
  1530. private:
  1531. L1 const& l1;
  1532. L2 const& l2;
  1533. L3 const& l3;
  1534. L4& l4;
  1535. };
  1536. template <typename L1, typename L2, typename L3, typename L4>
  1537. void mult_add_by_row_parallel(const L1& l1, const L2& l2, const L3& l3, L4& l4, abstract_dense) {
  1538. tbb::parallel_for(tbb::blocked_range<unsigned long>(0, vect_size(l4), 10), TbbMultAddFunctor<L1, L2, L3, L4>(l1, l2, l3, l4));
  1539. }
  1540. #endif
  1541. template <typename L1, typename L2, typename L3>
  1542. void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1543. typename linalg_traits<L3>::iterator it=vect_begin(l3), ite=vect_end(l3);
  1544. auto itr = mat_row_const_begin(l1);
  1545. for (; it != ite; ++it, ++itr)
  1546. *it = vect_sp(linalg_traits<L1>::row(itr), l2,
  1547. typename linalg_traits<L1>::storage_type(),
  1548. typename linalg_traits<L2>::storage_type());
  1549. }
  1550. template <typename L1, typename L2, typename L3>
  1551. void mult_by_row_bwd(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1552. typename linalg_traits<L3>::iterator target_it=vect_end(l3) - 1, target_ite=vect_begin(l3) - 1;
  1553. typename linalg_traits<L1>::const_row_iterator
  1554. itr = mat_row_const_end(l1) - 1;
  1555. for (; target_it != target_ite; --target_it, --itr)
  1556. *target_it = vect_sp(linalg_traits<L1>::row(itr), l2);
  1557. }
  1558. template <typename L1, typename L2, typename L3>
  1559. void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1560. clear(l3);
  1561. size_type nc = mat_ncols(l1);
  1562. for (size_type i = 0; i < nc; ++i)
  1563. add(scaled(mat_const_col(l1, i), l2[i]), l3);
  1564. }
  1565. template <typename L1, typename L2, typename L3>
  1566. void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
  1567. typedef typename linalg_traits<L2>::value_type T;
  1568. clear(l3);
  1569. auto it = vect_const_begin(l2), ite = vect_const_end(l2);
  1570. for (; it != ite; ++it)
  1571. if (*it != T(0)) add(scaled(mat_const_col(l1, it.index()), *it), l3);
  1572. }
  1573. template <typename L1, typename L2, typename L3>
  1574. void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
  1575. typedef typename linalg_traits<L2>::value_type T;
  1576. clear(l3);
  1577. auto it = vect_const_begin(l2), ite = vect_const_end(l2);
  1578. for (; it != ite; ++it)
  1579. if (*it != T(0)) add(scaled(mat_const_col(l1, it.index()), *it), l3);
  1580. }
  1581. template <typename L1, typename L2, typename L3> inline
  1582. void mult_spec(const L1& l1, const L2& l2, L3& l3, row_major)
  1583. { mult_by_row(l1, l2, l3, typename linalg_traits<L3>::storage_type()); }
  1584. #ifdef STORM_HAVE_INTELTBB
  1585. template <typename L1, typename L2, typename L3> inline
  1586. void mult_parallel_spec(const L1& l1, const L2& l2, L3& l3, row_major)
  1587. { mult_by_row_parallel(l1, l2, l3, typename linalg_traits<L3>::storage_type()); }
  1588. #endif
  1589. template <typename L1, typename L2, typename L3> inline
  1590. void mult_spec(const L1& l1, const L2& l2, L3& l3, col_major)
  1591. { mult_by_col(l1, l2, l3, typename linalg_traits<L2>::storage_type()); }
  1592. template <typename L1, typename L2, typename L3> inline
  1593. void mult_spec(const L1& l1, const L2& l2, L3& l3, abstract_null_type)
  1594. { mult_ind(l1, l2, l3, typename linalg_traits<L1>::storage_type()); }
  1595. template <typename L1, typename L2, typename L3>
  1596. void mult_ind(const L1& l1, const L2& l2, L3& l3, abstract_indirect) {
  1597. GMM_ASSERT1(false, "gmm::mult(m, ., .) undefined for this kind of matrix");
  1598. }
  1599. template <typename L1, typename L2, typename L3, typename L4> inline
  1600. void mult(const L1& l1, const L2& l2, const L3& l3, L4& l4) {
  1601. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1602. copy(l3, l4);
  1603. if (!m || !n) { gmm::copy(l3, l4); return; }
  1604. GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l4), "dimensions mismatch");
  1605. if (!same_origin(l2, l4)) {
  1606. mult_add_spec(l1, l2, l4, typename principal_orientation_type<typename
  1607. linalg_traits<L1>::sub_orientation>::potype());
  1608. }
  1609. else {
  1610. GMM_WARNING2("Warning, A temporary is used for mult\n");
  1611. typename temporary_vector<L2>::vector_type temp(vect_size(l2));
  1612. copy(l2, temp);
  1613. mult_add_spec(l1,temp, l4, typename principal_orientation_type<typename
  1614. linalg_traits<L1>::sub_orientation>::potype());
  1615. }
  1616. }
  1617. template <typename L1, typename L2, typename L3, typename L4> inline
  1618. void mult(const L1& l1, const L2& l2, const L3& l3, const L4& l4)
  1619. { mult(l1, l2, l3, linalg_const_cast(l4)); }
  1620. ///@endcond
  1621. /** Multiply-accumulate. l3 += l1*l2; */
  1622. template <typename L1, typename L2, typename L3> inline
  1623. void mult_add(const L1& l1, const L2& l2, L3& l3) {
  1624. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1625. if (!m || !n) return;
  1626. GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3), "dimensions mismatch");
  1627. if (!same_origin(l2, l3)) {
  1628. mult_add_spec(l1, l2, l3, typename principal_orientation_type<typename
  1629. linalg_traits<L1>::sub_orientation>::potype());
  1630. }
  1631. else {
  1632. GMM_WARNING2("Warning, A temporary is used for mult\n");
  1633. typename temporary_vector<L3>::vector_type temp(vect_size(l2));
  1634. copy(l2, temp);
  1635. mult_add_spec(l1,temp, l3, typename principal_orientation_type<typename
  1636. linalg_traits<L1>::sub_orientation>::potype());
  1637. }
  1638. }
  1639. /** Multiply-accumulate. l4 = l1*l2 + l3; */
  1640. template <typename L1, typename L2, typename L3, typename L4> inline
  1641. void mult_add(const L1& l1, const L2& l2, const L3& l3, L4& l4) {
  1642. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1643. if (!m || !n) return;
  1644. GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3) && vect_size(l3) == vect_size(l4), "dimensions mismatch");
  1645. if (!same_origin(l2, l4) && !same_origin(l3, l4) && !same_origin(l2, l3)) {
  1646. mult_add_spec(l1, l2, l3, l4, typename principal_orientation_type<typename
  1647. linalg_traits<L1>::sub_orientation>::potype());
  1648. } else {
  1649. GMM_WARNING2("Warning, Multiple temporaries are used for mult\n");
  1650. typename temporary_vector<L2>::vector_type l2tmp(vect_size(l2));
  1651. copy(l2, l2tmp);
  1652. typename temporary_vector<L3>::vector_type l3tmp(vect_size(l3));
  1653. copy(l3, l3tmp);
  1654. mult_add_spec(l1, l2tmp, l3tmp, l4, typename principal_orientation_type<typename
  1655. linalg_traits<L1>::sub_orientation>::potype());
  1656. }
  1657. }
  1658. #ifdef STORM_HAVE_INTELTBB
  1659. /** Multiply. l3 = l1*l2; */
  1660. template <typename L1, typename L2, typename L3> inline
  1661. void mult_parallel(const L1& l1, const L2& l2, L3& l3) {
  1662. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1663. if (!m || !n) return;
  1664. GMM_ASSERT2(n==vect_size(l2), "dimensions mismatch");
  1665. if (!same_origin(l2, l3)) {
  1666. mult_parallel_spec(l1, l2, l3, typename principal_orientation_type<typename
  1667. linalg_traits<L1>::sub_orientation>::potype());
  1668. } else {
  1669. GMM_WARNING2("Warning, temporaries are used for mult\n");
  1670. typename temporary_vector<L2>::vector_type l2tmp(vect_size(l2));
  1671. copy(l2, l2tmp);
  1672. mult_parallel_spec(l1, l2tmp, l3, typename principal_orientation_type<typename
  1673. linalg_traits<L1>::sub_orientation>::potype());
  1674. }
  1675. }
  1676. /** Multiply-accumulate. l4 = l1*l2 + l3; */
  1677. template <typename L1, typename L2, typename L3, typename L4> inline
  1678. void mult_add_parallel(const L1& l1, const L2& l2, const L3& l3, L4& l4) {
  1679. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1680. if (!m || !n) return;
  1681. GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3) && vect_size(l3) == vect_size(l4), "dimensions mismatch");
  1682. if (!same_origin(l2, l4) && !same_origin(l3, l4) && !same_origin(l2, l3)) {
  1683. mult_add_parallel_spec(l1, l2, l3, l4, typename principal_orientation_type<typename linalg_traits<L1>::sub_orientation>::potype());
  1684. } else {
  1685. GMM_WARNING2("Warning, Multiple temporaries are used for mult\n");
  1686. typename temporary_vector<L2>::vector_type l2tmp(vect_size(l2));
  1687. copy(l2, l2tmp);
  1688. typename temporary_vector<L3>::vector_type l3tmp(vect_size(l3));
  1689. copy(l3, l3tmp);
  1690. mult_add_parallel_spec(l1, l2tmp, l3tmp, l4, typename principal_orientation_type<typename linalg_traits<L1>::sub_orientation>::potype());
  1691. }
  1692. }
  1693. #endif
  1694. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  1695. template <typename L1, typename L2, typename L3> inline
  1696. void mult_add(const L1& l1, const L2& l2, const L3& l3)
  1697. { mult_add(l1, l2, linalg_const_cast(l3)); }
  1698. template <typename L1, typename L2, typename L3>
  1699. void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
  1700. typedef typename linalg_traits<L3>::value_type T;
  1701. size_type nr = mat_nrows(l1);
  1702. for (size_type i = 0; i < nr; ++i) {
  1703. T aux = vect_sp(mat_const_row(l1, i), l2);
  1704. if (aux != T(0)) l3[i] += aux;
  1705. }
  1706. }
  1707. template <typename L1, typename L2, typename L3>
  1708. void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
  1709. typedef typename linalg_traits<L3>::value_type T;
  1710. size_type nr = mat_nrows(l1);
  1711. for (size_type i = 0; i < nr; ++i) {
  1712. T aux = vect_sp(mat_const_row(l1, i), l2);
  1713. if (aux != T(0)) l3[i] += aux;
  1714. }
  1715. }
  1716. template <typename L1, typename L2, typename L3>
  1717. void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1718. auto it=vect_begin(l3), ite=vect_end(l3);
  1719. auto itr = mat_row_const_begin(l1);
  1720. for (; it != ite; ++it, ++itr)
  1721. *it += vect_sp(linalg_traits<L1>::row(itr), l2);
  1722. }
  1723. template <typename L1, typename L2, typename L3, typename L4>
  1724. void mult_add_by_row(const L1& l1, const L2& l2, const L3& l3, L4& l4, abstract_dense) {
  1725. typename linalg_traits<L3>::const_iterator add_it=vect_begin(l3), add_ite=vect_end(l3);
  1726. typename linalg_traits<L4>::iterator target_it=vect_begin(l4), target_ite=vect_end(l4);
  1727. typename linalg_traits<L1>::const_row_iterator
  1728. itr = mat_row_const_begin(l1);
  1729. for (; add_it != add_ite; ++add_it, ++target_it, ++itr)
  1730. *target_it = vect_sp(linalg_traits<L1>::row(itr), l2) + *add_it;
  1731. }
  1732. template <typename L1, typename L2, typename L3, typename L4>
  1733. void mult_add_by_row_bwd(const L1& l1, const L2& l2, const L3& l3, L4& l4, abstract_dense) {
  1734. typename linalg_traits<L3>::const_iterator add_it=vect_end(l3) - 1, add_ite=vect_begin(l3) - 1;
  1735. typename linalg_traits<L4>::iterator target_it=vect_end(l4) - 1, target_ite=vect_begin(l4) - 1;
  1736. typename linalg_traits<L1>::const_row_iterator
  1737. itr = mat_row_const_end(l1) - 1;
  1738. for (; add_it != add_ite; --add_it, --target_it, --itr)
  1739. *target_it = vect_sp(linalg_traits<L1>::row(itr), l2) + *add_it;
  1740. }
  1741. template <typename L1, typename L2, typename L3>
  1742. void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1743. size_type nc = mat_ncols(l1);
  1744. for (size_type i = 0; i < nc; ++i)
  1745. add(scaled(mat_const_col(l1, i), l2[i]), l3);
  1746. }
  1747. template <typename L1, typename L2, typename L3>
  1748. void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
  1749. auto it = vect_const_begin(l2), ite = vect_const_end(l2);
  1750. for (; it != ite; ++it)
  1751. if (*it != typename linalg_traits<L2>::value_type(0))
  1752. add(scaled(mat_const_col(l1, it.index()), *it), l3);
  1753. }
  1754. template <typename L1, typename L2, typename L3>
  1755. void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
  1756. auto it = vect_const_begin(l2), ite = vect_const_end(l2);
  1757. for (; it != ite; ++it)
  1758. if (*it != typename linalg_traits<L2>::value_type(0))
  1759. add(scaled(mat_const_col(l1, it.index()), *it), l3);
  1760. }
  1761. template <typename L1, typename L2, typename L3> inline
  1762. void mult_add_spec(const L1& l1, const L2& l2, L3& l3, row_major)
  1763. { mult_add_by_row(l1, l2, l3, typename linalg_traits<L3>::storage_type()); }
  1764. template <typename L1, typename L2, typename L3, typename L4> inline
  1765. void mult_add_spec(const L1& l1, const L2& l2, const L3& l3, L4& l4, row_major)
  1766. { mult_add_by_row(l1, l2, l3, l4, typename linalg_traits<L3>::storage_type()); }
  1767. #ifdef STORM_HAVE_INTELTBB
  1768. template <typename L1, typename L2, typename L3, typename L4> inline
  1769. void mult_add_parallel_spec(const L1& l1, const L2& l2, const L3& l3, L4& l4, row_major)
  1770. { mult_add_by_row_parallel(l1, l2, l3, l4, typename linalg_traits<L4>::storage_type()); }
  1771. #endif
  1772. template <typename L1, typename L2, typename L3> inline
  1773. void mult_add_spec(const L1& l1, const L2& l2, L3& l3, col_major)
  1774. { mult_add_by_col(l1, l2, l3, typename linalg_traits<L2>::storage_type()); }
  1775. template <typename L1, typename L2, typename L3> inline
  1776. void mult_add_spec(const L1& l1, const L2& l2, L3& l3, abstract_null_type)
  1777. { mult_ind(l1, l2, l3, typename linalg_traits<L1>::storage_type()); }
  1778. template <typename L1, typename L2, typename L3>
  1779. void transposed_mult(const L1& l1, const L2& l2, const L3& l3)
  1780. { mult(gmm::transposed(l1), l2, l3); }
  1781. /* ******************************************************************** */
  1782. /* Matrix-matrix mult */
  1783. /* ******************************************************************** */
  1784. struct g_mult {}; // generic mult, less optimized
  1785. struct c_mult {}; // col x col -> col mult
  1786. struct r_mult {}; // row x row -> row mult
  1787. struct rcmult {}; // row x col -> col mult
  1788. struct crmult {}; // col x row -> row mult
  1789. template<typename SO1, typename SO2, typename SO3> struct mult_t;
  1790. #define DEFMU__ template<> struct mult_t
  1791. DEFMU__<row_major , row_major , row_major > { typedef r_mult t; };
  1792. DEFMU__<row_major , row_major , col_major > { typedef g_mult t; };
  1793. DEFMU__<row_major , row_major , col_and_row> { typedef r_mult t; };
  1794. DEFMU__<row_major , row_major , row_and_col> { typedef r_mult t; };
  1795. DEFMU__<row_major , col_major , row_major > { typedef rcmult t; };
  1796. DEFMU__<row_major , col_major , col_major > { typedef rcmult t; };
  1797. DEFMU__<row_major , col_major , col_and_row> { typedef rcmult t; };
  1798. DEFMU__<row_major , col_major , row_and_col> { typedef rcmult t; };
  1799. DEFMU__<row_major , col_and_row, row_major > { typedef r_mult t; };
  1800. DEFMU__<row_major , col_and_row, col_major > { typedef rcmult t; };
  1801. DEFMU__<row_major , col_and_row, col_and_row> { typedef rcmult t; };
  1802. DEFMU__<row_major , col_and_row, row_and_col> { typedef rcmult t; };
  1803. DEFMU__<row_major , row_and_col, row_major > { typedef r_mult t; };
  1804. DEFMU__<row_major , row_and_col, col_major > { typedef rcmult t; };
  1805. DEFMU__<row_major , row_and_col, col_and_row> { typedef r_mult t; };
  1806. DEFMU__<row_major , row_and_col, row_and_col> { typedef r_mult t; };
  1807. DEFMU__<col_major , row_major , row_major > { typedef crmult t; };
  1808. DEFMU__<col_major , row_major , col_major > { typedef g_mult t; };
  1809. DEFMU__<col_major , row_major , col_and_row> { typedef crmult t; };
  1810. DEFMU__<col_major , row_major , row_and_col> { typedef crmult t; };
  1811. DEFMU__<col_major , col_major , row_major > { typedef g_mult t; };
  1812. DEFMU__<col_major , col_major , col_major > { typedef c_mult t; };
  1813. DEFMU__<col_major , col_major , col_and_row> { typedef c_mult t; };
  1814. DEFMU__<col_major , col_major , row_and_col> { typedef c_mult t; };
  1815. DEFMU__<col_major , col_and_row, row_major > { typedef crmult t; };
  1816. DEFMU__<col_major , col_and_row, col_major > { typedef c_mult t; };
  1817. DEFMU__<col_major , col_and_row, col_and_row> { typedef c_mult t; };
  1818. DEFMU__<col_major , col_and_row, row_and_col> { typedef c_mult t; };
  1819. DEFMU__<col_major , row_and_col, row_major > { typedef crmult t; };
  1820. DEFMU__<col_major , row_and_col, col_major > { typedef c_mult t; };
  1821. DEFMU__<col_major , row_and_col, col_and_row> { typedef c_mult t; };
  1822. DEFMU__<col_major , row_and_col, row_and_col> { typedef c_mult t; };
  1823. DEFMU__<col_and_row, row_major , row_major > { typedef r_mult t; };
  1824. DEFMU__<col_and_row, row_major , col_major > { typedef c_mult t; };
  1825. DEFMU__<col_and_row, row_major , col_and_row> { typedef r_mult t; };
  1826. DEFMU__<col_and_row, row_major , row_and_col> { typedef r_mult t; };
  1827. DEFMU__<col_and_row, col_major , row_major > { typedef rcmult t; };
  1828. DEFMU__<col_and_row, col_major , col_major > { typedef c_mult t; };
  1829. DEFMU__<col_and_row, col_major , col_and_row> { typedef c_mult t; };
  1830. DEFMU__<col_and_row, col_major , row_and_col> { typedef c_mult t; };
  1831. DEFMU__<col_and_row, col_and_row, row_major > { typedef r_mult t; };
  1832. DEFMU__<col_and_row, col_and_row, col_major > { typedef c_mult t; };
  1833. DEFMU__<col_and_row, col_and_row, col_and_row> { typedef c_mult t; };
  1834. DEFMU__<col_and_row, col_and_row, row_and_col> { typedef c_mult t; };
  1835. DEFMU__<col_and_row, row_and_col, row_major > { typedef r_mult t; };
  1836. DEFMU__<col_and_row, row_and_col, col_major > { typedef c_mult t; };
  1837. DEFMU__<col_and_row, row_and_col, col_and_row> { typedef c_mult t; };
  1838. DEFMU__<col_and_row, row_and_col, row_and_col> { typedef r_mult t; };
  1839. DEFMU__<row_and_col, row_major , row_major > { typedef r_mult t; };
  1840. DEFMU__<row_and_col, row_major , col_major > { typedef c_mult t; };
  1841. DEFMU__<row_and_col, row_major , col_and_row> { typedef r_mult t; };
  1842. DEFMU__<row_and_col, row_major , row_and_col> { typedef r_mult t; };
  1843. DEFMU__<row_and_col, col_major , row_major > { typedef rcmult t; };
  1844. DEFMU__<row_and_col, col_major , col_major > { typedef c_mult t; };
  1845. DEFMU__<row_and_col, col_major , col_and_row> { typedef c_mult t; };
  1846. DEFMU__<row_and_col, col_major , row_and_col> { typedef c_mult t; };
  1847. DEFMU__<row_and_col, col_and_row, row_major > { typedef rcmult t; };
  1848. DEFMU__<row_and_col, col_and_row, col_major > { typedef rcmult t; };
  1849. DEFMU__<row_and_col, col_and_row, col_and_row> { typedef rcmult t; };
  1850. DEFMU__<row_and_col, col_and_row, row_and_col> { typedef rcmult t; };
  1851. DEFMU__<row_and_col, row_and_col, row_major > { typedef r_mult t; };
  1852. DEFMU__<row_and_col, row_and_col, col_major > { typedef c_mult t; };
  1853. DEFMU__<row_and_col, row_and_col, col_and_row> { typedef r_mult t; };
  1854. DEFMU__<row_and_col, row_and_col, row_and_col> { typedef r_mult t; };
  1855. template <typename L1, typename L2, typename L3>
  1856. void mult_dispatch(const L1& l1, const L2& l2, L3& l3, abstract_matrix) {
  1857. typedef typename temporary_matrix<L3>::matrix_type temp_mat_type;
  1858. size_type n = mat_ncols(l1);
  1859. if (n == 0) { gmm::clear(l3); return; }
  1860. GMM_ASSERT2(n == mat_nrows(l2) && mat_nrows(l1) == mat_nrows(l3) &&
  1861. mat_ncols(l2) == mat_ncols(l3), "dimensions mismatch");
  1862. if (same_origin(l2, l3) || same_origin(l1, l3)) {
  1863. GMM_WARNING2("A temporary is used for mult");
  1864. temp_mat_type temp(mat_nrows(l3), mat_ncols(l3));
  1865. mult_spec(l1, l2, temp, typename mult_t<
  1866. typename linalg_traits<L1>::sub_orientation,
  1867. typename linalg_traits<L2>::sub_orientation,
  1868. typename linalg_traits<temp_mat_type>::sub_orientation>::t());
  1869. copy(temp, l3);
  1870. }
  1871. else
  1872. mult_spec(l1, l2, l3, typename mult_t<
  1873. typename linalg_traits<L1>::sub_orientation,
  1874. typename linalg_traits<L2>::sub_orientation,
  1875. typename linalg_traits<L3>::sub_orientation>::t());
  1876. }
  1877. // Completely generic but inefficient
  1878. template <typename L1, typename L2, typename L3>
  1879. void mult_spec(const L1& l1, const L2& l2, L3& l3, g_mult) {
  1880. typedef typename linalg_traits<L3>::value_type T;
  1881. GMM_WARNING2("Inefficient generic matrix-matrix mult is used");
  1882. for (size_type i = 0; i < mat_nrows(l3) ; ++i)
  1883. for (size_type j = 0; j < mat_ncols(l3) ; ++j) {
  1884. T a(0);
  1885. for (size_type k = 0; k < mat_nrows(l2) ; ++k) a += l1(i, k)*l2(k, j);
  1886. l3(i, j) = a;
  1887. }
  1888. }
  1889. // row x col matrix-matrix mult
  1890. template <typename L1, typename L2, typename L3>
  1891. void mult_row_col_with_temp(const L1& l1, const L2& l2, L3& l3, col_major) {
  1892. typedef typename temporary_col_matrix<L1>::matrix_type temp_col_mat;
  1893. temp_col_mat temp(mat_nrows(l1), mat_ncols(l1));
  1894. copy(l1, temp);
  1895. mult(temp, l2, l3);
  1896. }
  1897. template <typename L1, typename L2, typename L3>
  1898. void mult_row_col_with_temp(const L1& l1, const L2& l2, L3& l3, row_major) {
  1899. typedef typename temporary_row_matrix<L2>::matrix_type temp_row_mat;
  1900. temp_row_mat temp(mat_nrows(l2), mat_ncols(l2));
  1901. copy(l2, temp);
  1902. mult(l1, temp, l3);
  1903. }
  1904. template <typename L1, typename L2, typename L3>
  1905. void mult_spec(const L1& l1, const L2& l2, L3& l3, rcmult) {
  1906. if (is_sparse(l1) && is_sparse(l2)) {
  1907. GMM_WARNING3("Inefficient row matrix - col matrix mult for "
  1908. "sparse matrices, using temporary");
  1909. mult_row_col_with_temp(l1, l2, l3,
  1910. typename principal_orientation_type<typename
  1911. linalg_traits<L3>::sub_orientation>::potype());
  1912. }
  1913. else {
  1914. auto it2b = linalg_traits<L2>::col_begin(l2), it2 = it2b,
  1915. ite = linalg_traits<L2>::col_end(l2);
  1916. size_type i,j, k = mat_nrows(l1);
  1917. for (i = 0; i < k; ++i) {
  1918. typename linalg_traits<L1>::const_sub_row_type r1=mat_const_row(l1, i);
  1919. for (it2 = it2b, j = 0; it2 != ite; ++it2, ++j)
  1920. l3(i,j) = vect_sp(r1, linalg_traits<L2>::col(it2));
  1921. }
  1922. }
  1923. }
  1924. // row - row matrix-matrix mult
  1925. template <typename L1, typename L2, typename L3> inline
  1926. void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult) {
  1927. mult_spec(l1, l2, l3,r_mult(),typename linalg_traits<L1>::storage_type());
  1928. }
  1929. template <typename L1, typename L2, typename L3>
  1930. void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_dense) {
  1931. // optimizable
  1932. clear(l3);
  1933. size_type nn = mat_nrows(l3), mm = mat_nrows(l2);
  1934. for (size_type i = 0; i < nn; ++i) {
  1935. for (size_type j = 0; j < mm; ++j) {
  1936. add(scaled(mat_const_row(l2, j), l1(i, j)), mat_row(l3, i));
  1937. }
  1938. }
  1939. }
  1940. template <typename L1, typename L2, typename L3>
  1941. void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_sparse) {
  1942. // optimizable
  1943. clear(l3);
  1944. size_type nn = mat_nrows(l3);
  1945. for (size_type i = 0; i < nn; ++i) {
  1946. typename linalg_traits<L1>::const_sub_row_type rl1=mat_const_row(l1, i);
  1947. auto it = vect_const_begin(rl1), ite = vect_const_end(rl1);
  1948. for (; it != ite; ++it)
  1949. add(scaled(mat_const_row(l2, it.index()), *it), mat_row(l3, i));
  1950. }
  1951. }
  1952. template <typename L1, typename L2, typename L3>
  1953. void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_skyline)
  1954. { mult_spec(l1, l2, l3, r_mult(), abstract_sparse()); }
  1955. // col - col matrix-matrix mult
  1956. template <typename L1, typename L2, typename L3> inline
  1957. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult) {
  1958. mult_spec(l1, l2,l3,c_mult(),typename linalg_traits<L2>::storage_type(),
  1959. typename linalg_traits<L2>::sub_orientation());
  1960. }
  1961. template <typename L1, typename L2, typename L3, typename ORIEN>
  1962. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
  1963. abstract_dense, ORIEN) {
  1964. typedef typename linalg_traits<L2>::value_type T;
  1965. size_type nn = mat_ncols(l3), mm = mat_ncols(l1);
  1966. for (size_type i = 0; i < nn; ++i) {
  1967. clear(mat_col(l3, i));
  1968. for (size_type j = 0; j < mm; ++j) {
  1969. T b = l2(j, i);
  1970. if (b != T(0)) add(scaled(mat_const_col(l1, j), b), mat_col(l3, i));
  1971. }
  1972. }
  1973. }
  1974. template <typename L1, typename L2, typename L3, typename ORIEN>
  1975. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
  1976. abstract_sparse, ORIEN) {
  1977. // optimizable
  1978. clear(l3);
  1979. size_type nn = mat_ncols(l3);
  1980. for (size_type i = 0; i < nn; ++i) {
  1981. typename linalg_traits<L2>::const_sub_col_type rc2 = mat_const_col(l2, i);
  1982. auto it = vect_const_begin(rc2), ite = vect_const_end(rc2);
  1983. for (; it != ite; ++it)
  1984. add(scaled(mat_const_col(l1, it.index()), *it), mat_col(l3, i));
  1985. }
  1986. }
  1987. template <typename L1, typename L2, typename L3>
  1988. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
  1989. abstract_sparse, row_major) {
  1990. typedef typename linalg_traits<L2>::value_type T;
  1991. GMM_WARNING3("Inefficient matrix-matrix mult for sparse matrices");
  1992. clear(l3);
  1993. size_type mm = mat_nrows(l2), nn = mat_ncols(l3);
  1994. for (size_type i = 0; i < nn; ++i)
  1995. for (size_type j = 0; j < mm; ++j) {
  1996. T a = l2(i,j);
  1997. if (a != T(0)) add(scaled(mat_const_col(l1, j), a), mat_col(l3, i));
  1998. }
  1999. }
  2000. template <typename L1, typename L2, typename L3, typename ORIEN> inline
  2001. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
  2002. abstract_skyline, ORIEN)
  2003. { mult_spec(l1, l2, l3, c_mult(), abstract_sparse(), ORIEN()); }
  2004. // col - row matrix-matrix mult
  2005. template <typename L1, typename L2, typename L3> inline
  2006. void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult)
  2007. { mult_spec(l1,l2,l3,crmult(), typename linalg_traits<L1>::storage_type()); }
  2008. template <typename L1, typename L2, typename L3>
  2009. void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_dense) {
  2010. // optimizable
  2011. clear(l3);
  2012. size_type nn = mat_ncols(l1), mm = mat_nrows(l1);
  2013. for (size_type i = 0; i < nn; ++i) {
  2014. for (size_type j = 0; j < mm; ++j)
  2015. add(scaled(mat_const_row(l2, i), l1(j, i)), mat_row(l3, j));
  2016. }
  2017. }
  2018. template <typename L1, typename L2, typename L3>
  2019. void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_sparse) {
  2020. // optimizable
  2021. clear(l3);
  2022. size_type nn = mat_ncols(l1);
  2023. for (size_type i = 0; i < nn; ++i) {
  2024. typename linalg_traits<L1>::const_sub_col_type rc1 = mat_const_col(l1, i);
  2025. auto it = vect_const_begin(rc1), ite = vect_const_end(rc1);
  2026. for (; it != ite; ++it)
  2027. add(scaled(mat_const_row(l2, i), *it), mat_row(l3, it.index()));
  2028. }
  2029. }
  2030. template <typename L1, typename L2, typename L3> inline
  2031. void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_skyline)
  2032. { mult_spec(l1, l2, l3, crmult(), abstract_sparse()); }
  2033. /* ******************************************************************** */
  2034. /* Symmetry test. */
  2035. /* ******************************************************************** */
  2036. ///@endcond
  2037. /** test if A is symmetric.
  2038. @param A a matrix.
  2039. @param tol a threshold.
  2040. */
  2041. template <typename MAT> inline
  2042. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol
  2043. = magnitude_of_linalg(MAT)(-1)) {
  2044. typedef magnitude_of_linalg(MAT) R;
  2045. if (tol < R(0)) tol = default_tol(R()) * mat_maxnorm(A);
  2046. if (mat_nrows(A) != mat_ncols(A)) return false;
  2047. return is_symmetric(A, tol, typename linalg_traits<MAT>::storage_type());
  2048. }
  2049. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  2050. template <typename MAT>
  2051. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2052. abstract_dense) {
  2053. size_type m = mat_nrows(A);
  2054. for (size_type i = 1; i < m; ++i)
  2055. for (size_type j = 0; j < i; ++j)
  2056. if (gmm::abs(A(i, j)-A(j, i)) > tol) return false;
  2057. return true;
  2058. }
  2059. template <typename MAT>
  2060. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2061. abstract_sparse) {
  2062. return is_symmetric(A, tol, typename principal_orientation_type<typename
  2063. linalg_traits<MAT>::sub_orientation>::potype());
  2064. }
  2065. template <typename MAT>
  2066. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2067. row_major) {
  2068. for (size_type i = 0; i < mat_nrows(A); ++i) {
  2069. typename linalg_traits<MAT>::const_sub_row_type row = mat_const_row(A, i);
  2070. auto it = vect_const_begin(row), ite = vect_const_end(row);
  2071. for (; it != ite; ++it)
  2072. if (gmm::abs(*it - A(it.index(), i)) > tol) return false;
  2073. }
  2074. return true;
  2075. }
  2076. template <typename MAT>
  2077. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2078. col_major) {
  2079. for (size_type i = 0; i < mat_ncols(A); ++i) {
  2080. typename linalg_traits<MAT>::const_sub_col_type col = mat_const_col(A, i);
  2081. auto it = vect_const_begin(col), ite = vect_const_end(col);
  2082. for (; it != ite; ++it)
  2083. if (gmm::abs(*it - A(i, it.index())) > tol) return false;
  2084. }
  2085. return true;
  2086. }
  2087. template <typename MAT>
  2088. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2089. abstract_skyline)
  2090. { return is_symmetric(A, tol, abstract_sparse()); }
  2091. ///@endcond
  2092. /** test if A is Hermitian.
  2093. @param A a matrix.
  2094. @param tol a threshold.
  2095. */
  2096. template <typename MAT> inline
  2097. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol
  2098. = magnitude_of_linalg(MAT)(-1)) {
  2099. typedef magnitude_of_linalg(MAT) R;
  2100. if (tol < R(0)) tol = default_tol(R()) * mat_maxnorm(A);
  2101. if (mat_nrows(A) != mat_ncols(A)) return false;
  2102. return is_hermitian(A, tol, typename linalg_traits<MAT>::storage_type());
  2103. }
  2104. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  2105. template <typename MAT>
  2106. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2107. abstract_dense) {
  2108. size_type m = mat_nrows(A);
  2109. for (size_type i = 1; i < m; ++i)
  2110. for (size_type j = 0; j < i; ++j)
  2111. if (gmm::abs(A(i, j)-gmm::conj(A(j, i))) > tol) return false;
  2112. return true;
  2113. }
  2114. template <typename MAT>
  2115. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2116. abstract_sparse) {
  2117. return is_hermitian(A, tol, typename principal_orientation_type<typename
  2118. linalg_traits<MAT>::sub_orientation>::potype());
  2119. }
  2120. template <typename MAT>
  2121. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2122. row_major) {
  2123. for (size_type i = 0; i < mat_nrows(A); ++i) {
  2124. typename linalg_traits<MAT>::const_sub_row_type row = mat_const_row(A, i);
  2125. auto it = vect_const_begin(row), ite = vect_const_end(row);
  2126. for (; it != ite; ++it)
  2127. if (gmm::abs(gmm::conj(*it) - A(it.index(), i)) > tol) return false;
  2128. }
  2129. return true;
  2130. }
  2131. template <typename MAT>
  2132. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2133. col_major) {
  2134. for (size_type i = 0; i < mat_ncols(A); ++i) {
  2135. typename linalg_traits<MAT>::const_sub_col_type col = mat_const_col(A, i);
  2136. auto it = vect_const_begin(col), ite = vect_const_end(col);
  2137. for (; it != ite; ++it)
  2138. if (gmm::abs(gmm::conj(*it) - A(i, it.index())) > tol) return false;
  2139. }
  2140. return true;
  2141. }
  2142. template <typename MAT>
  2143. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2144. abstract_skyline)
  2145. { return is_hermitian(A, tol, abstract_sparse()); }
  2146. ///@endcond
  2147. }
  2148. #endif // GMM_BLAS_H__