You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

2425 lines
91 KiB

  1. /* -*- c++ -*- (enables emacs c++ mode) */
  2. /*===========================================================================
  3. Copyright (C) 2002-2012 Yves Renard
  4. This file is a part of GETFEM++
  5. Getfem++ is free software; you can redistribute it and/or modify it
  6. under the terms of the GNU Lesser General Public License as published
  7. by the Free Software Foundation; either version 3 of the License, or
  8. (at your option) any later version along with the GCC Runtime Library
  9. Exception either version 3.1 or (at your option) any later version.
  10. This program is distributed in the hope that it will be useful, but
  11. WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12. or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
  13. License and GCC Runtime Library Exception for more details.
  14. You should have received a copy of the GNU Lesser General Public License
  15. along with this program; if not, write to the Free Software Foundation,
  16. Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
  17. As a special exception, you may use this file as it is a part of a free
  18. software library without restriction. Specifically, if other files
  19. instantiate templates or use macros or inline functions from this file,
  20. or you compile this file and link it with other files to produce an
  21. executable, this file does not by itself cause the resulting executable
  22. to be covered by the GNU Lesser General Public License. This exception
  23. does not however invalidate any other reasons why the executable file
  24. might be covered by the GNU Lesser General Public License.
  25. ===========================================================================*/
  26. /**@file gmm_blas.h
  27. @author Yves Renard <Yves.Renard@insa-lyon.fr>
  28. @date October 13, 2002.
  29. @brief Basic linear algebra functions.
  30. */
  31. #ifndef GMM_BLAS_H__
  32. #define GMM_BLAS_H__
  33. #ifdef STORM_USE_TBB
  34. # include <new> // This fixes a potential dependency ordering problem between GMM and TBB
  35. # include "tbb/tbb.h"
  36. # include <iterator>
  37. #endif
  38. #include "gmm_scaled.h"
  39. #include "gmm_transposed.h"
  40. #include "gmm_conjugated.h"
  41. namespace gmm {
  42. /* ******************************************************************** */
  43. /* */
  44. /* Generic algorithms */
  45. /* */
  46. /* ******************************************************************** */
  47. /* ******************************************************************** */
  48. /* Miscellaneous */
  49. /* ******************************************************************** */
  50. /** clear (fill with zeros) a vector or matrix. */
  51. template <typename L> inline void clear(L &l)
  52. { linalg_traits<L>::do_clear(l); }
  53. /** @cond DOXY_SHOW_ALL_FUNCTIONS
  54. skip all these redundant definitions in doxygen documentation..
  55. */
  56. template <typename L> inline void clear(const L &l)
  57. { linalg_traits<L>::do_clear(linalg_const_cast(l)); }
  58. ///@endcond
  59. /** count the number of non-zero entries of a vector or matrix. */ template <typename L> inline size_type nnz(const L& l)
  60. { return nnz(l, typename linalg_traits<L>::linalg_type()); }
  61. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  62. template <typename L> inline size_type nnz(const L& l, abstract_vector) {
  63. typename linalg_traits<L>::const_iterator it = vect_const_begin(l),
  64. ite = vect_const_end(l);
  65. size_type res(0);
  66. for (; it != ite; ++it) ++res;
  67. return res;
  68. }
  69. template <typename L> inline size_type nnz(const L& l, abstract_matrix) {
  70. return nnz(l, typename principal_orientation_type<typename
  71. linalg_traits<L>::sub_orientation>::potype());
  72. }
  73. template <typename L> inline size_type nnz(const L& l, row_major) {
  74. size_type res(0);
  75. for (size_type i = 0; i < mat_nrows(l); ++i)
  76. res += nnz(mat_const_row(l, i));
  77. return res;
  78. }
  79. template <typename L> inline size_type nnz(const L& l, col_major) {
  80. size_type res(0);
  81. for (size_type i = 0; i < mat_ncols(l); ++i)
  82. res += nnz(mat_const_col(l, i));
  83. return res;
  84. }
  85. ///@endcond
  86. /** fill a vector or matrix with x. */
  87. template <typename L> inline
  88. void fill(L& l, typename gmm::linalg_traits<L>::value_type x) {
  89. typedef typename gmm::linalg_traits<L>::value_type T;
  90. if (x == T(0)) gmm::clear(l);
  91. fill(l, x, typename linalg_traits<L>::linalg_type());
  92. }
  93. template <typename L> inline
  94. void fill(const L& l, typename gmm::linalg_traits<L>::value_type x) {
  95. fill(linalg_const_cast(l), x);
  96. }
  97. template <typename L> inline // to be optimized for dense vectors ...
  98. void fill(L& l, typename gmm::linalg_traits<L>::value_type x,
  99. abstract_vector) {
  100. for (size_type i = 0; i < vect_size(l); ++i) l[i] = x;
  101. }
  102. template <typename L> inline // to be optimized for dense matrices ...
  103. void fill(L& l, typename gmm::linalg_traits<L>::value_type x,
  104. abstract_matrix) {
  105. for (size_type i = 0; i < mat_nrows(l); ++i)
  106. for (size_type j = 0; j < mat_ncols(l); ++j)
  107. l(i,j) = x;
  108. }
  109. /** fill a vector or matrix with random value (uniform [-1,1]). */
  110. template <typename L> inline void fill_random(L& l)
  111. { fill_random(l, typename linalg_traits<L>::linalg_type()); }
  112. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  113. template <typename L> inline void fill_random(const L& l) {
  114. fill_random(linalg_const_cast(l),
  115. typename linalg_traits<L>::linalg_type());
  116. }
  117. template <typename L> inline void fill_random(L& l, abstract_vector) {
  118. for (size_type i = 0; i < vect_size(l); ++i)
  119. l[i] = gmm::random(typename linalg_traits<L>::value_type());
  120. }
  121. template <typename L> inline void fill_random(L& l, abstract_matrix) {
  122. for (size_type i = 0; i < mat_nrows(l); ++i)
  123. for (size_type j = 0; j < mat_ncols(l); ++j)
  124. l(i,j) = gmm::random(typename linalg_traits<L>::value_type());
  125. }
  126. ///@endcond
  127. /** fill a vector or matrix with random value.
  128. @param l a vector or matrix.
  129. @param cfill probability of a non-zero value.
  130. */
  131. template <typename L> inline void fill_random(L& l, double cfill)
  132. { fill_random(l, cfill, typename linalg_traits<L>::linalg_type()); }
  133. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  134. template <typename L> inline void fill_random(const L& l, double cfill) {
  135. fill_random(linalg_const_cast(l), cfill,
  136. typename linalg_traits<L>::linalg_type());
  137. }
  138. template <typename L> inline
  139. void fill_random(L& l, double cfill, abstract_vector) {
  140. typedef typename linalg_traits<L>::value_type T;
  141. size_type ntot = std::min(vect_size(l),
  142. size_type(double(vect_size(l))*cfill) + 1);
  143. for (size_type nb = 0; nb < ntot;) {
  144. size_type i = gmm::irandom(vect_size(l));
  145. if (l[i] == T(0)) {
  146. l[i] = gmm::random(typename linalg_traits<L>::value_type());
  147. ++nb;
  148. }
  149. }
  150. }
  151. template <typename L> inline
  152. void fill_random(L& l, double cfill, abstract_matrix) {
  153. fill_random(l, cfill, typename principal_orientation_type<typename
  154. linalg_traits<L>::sub_orientation>::potype());
  155. }
  156. template <typename L> inline
  157. void fill_random(L& l, double cfill, row_major) {
  158. for (size_type i=0; i < mat_nrows(l); ++i) fill_random(mat_row(l,i),cfill);
  159. }
  160. template <typename L> inline
  161. void fill_random(L& l, double cfill, col_major) {
  162. for (size_type j=0; j < mat_ncols(l); ++j) fill_random(mat_col(l,j),cfill);
  163. }
  164. /* resize a vector */
  165. template <typename V> inline
  166. void resize(V &v, size_type n, linalg_false)
  167. { linalg_traits<V>::resize(v, n); }
  168. template <typename V> inline
  169. void resize(V &, size_type , linalg_modifiable)
  170. { GMM_ASSERT1(false, "You cannot resize a reference"); }
  171. template <typename V> inline
  172. void resize(V &, size_type , linalg_const)
  173. { GMM_ASSERT1(false, "You cannot resize a reference"); }
  174. ///@endcond
  175. /** resize a vector. */
  176. template <typename V> inline
  177. void resize(V &v, size_type n) {
  178. resize(v, n, typename linalg_traits<V>::is_reference());
  179. }
  180. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  181. /** resize a matrix **/
  182. template <typename M> inline
  183. void resize(M &v, size_type m, size_type n, linalg_false) {
  184. linalg_traits<M>::resize(v, m, n);
  185. }
  186. template <typename M> inline
  187. void resize(M &, size_type, size_type, linalg_modifiable)
  188. { GMM_ASSERT1(false, "You cannot resize a reference"); }
  189. template <typename M> inline
  190. void resize(M &, size_type, size_type, linalg_const)
  191. { GMM_ASSERT1(false, "You cannot resize a reference"); }
  192. ///@endcond
  193. /** resize a matrix */
  194. template <typename M> inline
  195. void resize(M &v, size_type m, size_type n)
  196. { resize(v, m, n, typename linalg_traits<M>::is_reference()); }
  197. ///@cond
  198. template <typename M> inline
  199. void reshape(M &v, size_type m, size_type n, linalg_false)
  200. { linalg_traits<M>::reshape(v, m, n); }
  201. template <typename M> inline
  202. void reshape(M &, size_type, size_type, linalg_modifiable)
  203. { GMM_ASSERT1(false, "You cannot reshape a reference"); }
  204. template <typename M> inline
  205. void reshape(M &, size_type, size_type, linalg_const)
  206. { GMM_ASSERT1(false, "You cannot reshape a reference"); }
  207. ///@endcond
  208. /** reshape a matrix */
  209. template <typename M> inline
  210. void reshape(M &v, size_type m, size_type n)
  211. { reshape(v, m, n, typename linalg_traits<M>::is_reference()); }
  212. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  213. /* ******************************************************************** */
  214. /* Scalar product */
  215. /* ******************************************************************** */
  216. ///@endcond
  217. /** scalar product between two vectors */
  218. template <typename V1, typename V2> inline
  219. typename strongest_value_type<V1,V2>::value_type
  220. vect_sp(const V1 &v1, const V2 &v2) {
  221. GMM_ASSERT2(vect_size(v1) == vect_size(v2), "dimensions mismatch");
  222. return vect_sp(v1, v2,
  223. typename linalg_traits<V1>::storage_type(),
  224. typename linalg_traits<V2>::storage_type());
  225. }
  226. /** scalar product between two vectors, using a matrix.
  227. @param ps the matrix of the scalar product.
  228. @param v1 the first vector
  229. @param v2 the second vector
  230. */
  231. template <typename MATSP, typename V1, typename V2> inline
  232. typename strongest_value_type3<V1,V2,MATSP>::value_type
  233. vect_sp(const MATSP &ps, const V1 &v1, const V2 &v2) {
  234. return vect_sp_with_mat(ps, v1, v2,
  235. typename linalg_traits<MATSP>::sub_orientation());
  236. }
  237. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  238. template <typename MATSP, typename V1, typename V2> inline
  239. typename strongest_value_type3<V1,V2,MATSP>::value_type
  240. vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2, row_major) {
  241. return vect_sp_with_matr(ps, v1, v2,
  242. typename linalg_traits<V2>::storage_type());
  243. }
  244. template <typename MATSP, typename V1, typename V2> inline
  245. typename strongest_value_type3<V1,V2,MATSP>::value_type
  246. vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
  247. abstract_sparse) {
  248. GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
  249. vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
  250. size_type nr = mat_nrows(ps);
  251. typename linalg_traits<V2>::const_iterator
  252. it = vect_const_begin(v2), ite = vect_const_end(v2);
  253. typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
  254. for (; it != ite; ++it)
  255. res += vect_sp(mat_const_row(ps, it.index()), v1)* (*it);
  256. return res;
  257. }
  258. template <typename MATSP, typename V1, typename V2> inline
  259. typename strongest_value_type3<V1,V2,MATSP>::value_type
  260. vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
  261. abstract_skyline)
  262. { return vect_sp_with_matr(ps, v1, v2, abstract_sparse()); }
  263. template <typename MATSP, typename V1, typename V2> inline
  264. typename strongest_value_type3<V1,V2,MATSP>::value_type
  265. vect_sp_with_matr(const MATSP &ps, const V1 &v1, const V2 &v2,
  266. abstract_dense) {
  267. GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
  268. vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
  269. typename linalg_traits<V2>::const_iterator
  270. it = vect_const_begin(v2), ite = vect_const_end(v2);
  271. typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
  272. for (size_type i = 0; it != ite; ++i, ++it)
  273. res += vect_sp(mat_const_row(ps, i), v1) * (*it);
  274. return res;
  275. }
  276. template <typename MATSP, typename V1, typename V2> inline
  277. typename strongest_value_type3<V1,V2,MATSP>::value_type
  278. vect_sp_with_mat(const MATSP &ps, const V1 &v1,const V2 &v2,row_and_col)
  279. { return vect_sp_with_mat(ps, v1, v2, row_major()); }
  280. template <typename MATSP, typename V1, typename V2> inline
  281. typename strongest_value_type3<V1,V2,MATSP>::value_type
  282. vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2,col_major){
  283. return vect_sp_with_matc(ps, v1, v2,
  284. typename linalg_traits<V1>::storage_type());
  285. }
  286. template <typename MATSP, typename V1, typename V2> inline
  287. typename strongest_value_type3<V1,V2,MATSP>::value_type
  288. vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
  289. abstract_sparse) {
  290. GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
  291. vect_size(v2) == mat_nrows(ps),"dimensions mismatch");
  292. typename linalg_traits<V1>::const_iterator
  293. it = vect_const_begin(v1), ite = vect_const_end(v1);
  294. typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
  295. for (; it != ite; ++it)
  296. res += vect_sp(mat_const_col(ps, it.index()), v2) * (*it);
  297. return res;
  298. }
  299. template <typename MATSP, typename V1, typename V2> inline
  300. typename strongest_value_type3<V1,V2,MATSP>::value_type
  301. vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
  302. abstract_skyline)
  303. { return vect_sp_with_matc(ps, v1, v2, abstract_sparse()); }
  304. template <typename MATSP, typename V1, typename V2> inline
  305. typename strongest_value_type3<V1,V2,MATSP>::value_type
  306. vect_sp_with_matc(const MATSP &ps, const V1 &v1, const V2 &v2,
  307. abstract_dense) {
  308. GMM_ASSERT2(vect_size(v1) == mat_ncols(ps) &&
  309. vect_size(v2) == mat_nrows(ps), "dimensions mismatch");
  310. typename linalg_traits<V1>::const_iterator
  311. it = vect_const_begin(v1), ite = vect_const_end(v1);
  312. typename strongest_value_type3<V1,V2,MATSP>::value_type res(0);
  313. for (size_type i = 0; it != ite; ++i, ++it)
  314. res += vect_sp(mat_const_col(ps, i), v2) * (*it);
  315. return res;
  316. }
  317. template <typename MATSP, typename V1, typename V2> inline
  318. typename strongest_value_type3<V1,V2,MATSP>::value_type
  319. vect_sp_with_mat(const MATSP &ps, const V1 &v1,const V2 &v2,col_and_row)
  320. { return vect_sp_with_mat(ps, v1, v2, col_major()); }
  321. template <typename MATSP, typename V1, typename V2> inline
  322. typename strongest_value_type3<V1,V2,MATSP>::value_type
  323. vect_sp_with_mat(const MATSP &ps, const V1 &v1, const V2 &v2,
  324. abstract_null_type) {
  325. typename temporary_vector<V1>::vector_type w(mat_nrows(ps));
  326. GMM_WARNING2("Warning, a temporary is used in scalar product\n");
  327. mult(ps, v1, w);
  328. return vect_sp(w, v2);
  329. }
  330. template <typename IT1, typename IT2> inline
  331. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  332. typename std::iterator_traits<IT2>::value_type>::T
  333. vect_sp_dense_(IT1 it, IT1 ite, IT2 it2) {
  334. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  335. typename std::iterator_traits<IT2>::value_type>::T res(0);
  336. for (; it != ite; ++it, ++it2) res += (*it) * (*it2);
  337. return res;
  338. }
  339. #ifdef STORM_USE_TBB
  340. /* Official Intel Hint on blocked_range vs. linear iterators: http://software.intel.com/en-us/forums/topic/289505
  341. */
  342. template <typename IT1>
  343. class forward_range {
  344. IT1 my_begin;
  345. IT1 my_end;
  346. size_t my_size;
  347. public:
  348. IT1 begin() const {return my_begin;}
  349. IT1 end() const {return my_end;}
  350. bool empty() const {return my_begin==my_end;}
  351. bool is_divisible() const {return my_size>1;}
  352. forward_range( IT1 first, IT1 last, size_t size ) : my_begin(first), my_end(last), my_size(size) {
  353. assert( size==size_t(std::distance( first,last )));
  354. }
  355. forward_range( IT1 first, IT1 last) : my_begin(first), my_end(last) {
  356. my_size = std::distance( first,last );
  357. }
  358. forward_range( forward_range& r, tbb::split ) {
  359. size_t h = r.my_size/2;
  360. my_end = r.my_end;
  361. my_begin = r.my_begin;
  362. std::advance( my_begin, h ); // Might be scaling issue
  363. my_size = r.my_size-h;
  364. r.my_end = my_begin;
  365. r.my_size = h;
  366. }
  367. };
  368. template <typename IT1, typename V>
  369. class tbbHelper_vect_sp_sparse {
  370. V const* my_v;
  371. public:
  372. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  373. typename linalg_traits<V>::value_type>::T my_sum;
  374. void operator()( const forward_range<IT1>& r ) {
  375. V const* v = my_v;
  376. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  377. typename linalg_traits<V>::value_type>::T sum = my_sum;
  378. IT1 end = r.end();
  379. for( IT1 i=r.begin(); i!=end; ++i) {
  380. sum += (*i) * v->at(i.index());
  381. }
  382. my_sum = sum;
  383. }
  384. tbbHelper_vect_sp_sparse( tbbHelper_vect_sp_sparse& x, tbb::split ) : my_v(x.my_v), my_sum(0) {}
  385. void join( const tbbHelper_vect_sp_sparse& y ) {my_sum+=y.my_sum;}
  386. tbbHelper_vect_sp_sparse(V const* v) :
  387. my_v(v), my_sum(0)
  388. {}
  389. };
  390. #endif
  391. template <typename IT1, typename V> inline
  392. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  393. typename linalg_traits<V>::value_type>::T
  394. vect_sp_sparse_(IT1 it, IT1 ite, const V &v) {
  395. typename strongest_numeric_type<typename std::iterator_traits<IT1>::value_type,
  396. typename linalg_traits<V>::value_type>::T res(0);
  397. #if defined(STORM_USE_TBB) && defined(STORM_USE_TBB_FOR_INNER)
  398. tbbHelper_vect_sp_sparse<IT1, V> tbbHelper(&v);
  399. tbb::parallel_reduce(forward_range<IT1>(it, ite), tbbHelper);
  400. res = tbbHelper.my_sum;
  401. #else
  402. for (; it != ite; ++it) res += (*it) * v[it.index()];
  403. #endif
  404. return res;
  405. }
  406. template <typename V1, typename V2> inline
  407. typename strongest_value_type<V1,V2>::value_type
  408. vect_sp(const V1 &v1, const V2 &v2, abstract_dense, abstract_dense) {
  409. return vect_sp_dense_(vect_const_begin(v1), vect_const_end(v1),
  410. vect_const_begin(v2));
  411. }
  412. template <typename V1, typename V2> inline
  413. typename strongest_value_type<V1,V2>::value_type
  414. vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_dense) {
  415. typename linalg_traits<V1>::const_iterator it1 = vect_const_begin(v1),
  416. ite = vect_const_end(v1);
  417. typename linalg_traits<V2>::const_iterator it2 = vect_const_begin(v2);
  418. return vect_sp_dense_(it1, ite, it2 + it1.index());
  419. }
  420. template <typename V1, typename V2> inline
  421. typename strongest_value_type<V1,V2>::value_type
  422. vect_sp(const V1 &v1, const V2 &v2, abstract_dense, abstract_skyline) {
  423. typename linalg_traits<V2>::const_iterator it1 = vect_const_begin(v2),
  424. ite = vect_const_end(v2);
  425. typename linalg_traits<V1>::const_iterator it2 = vect_const_begin(v1);
  426. return vect_sp_dense_(it1, ite, it2 + it1.index());
  427. }
  428. template <typename V1, typename V2> inline
  429. typename strongest_value_type<V1,V2>::value_type
  430. vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_skyline) {
  431. typedef typename strongest_value_type<V1,V2>::value_type T;
  432. typename linalg_traits<V1>::const_iterator it1 = vect_const_begin(v1),
  433. ite1 = vect_const_end(v1);
  434. typename linalg_traits<V2>::const_iterator it2 = vect_const_begin(v2),
  435. ite2 = vect_const_end(v2);
  436. size_type n = std::min(ite1.index(), ite2.index());
  437. size_type l = std::max(it1.index(), it2.index());
  438. if (l < n) {
  439. size_type m = l - it1.index(), p = l - it2.index(), q = m + n - l;
  440. return vect_sp_dense_(it1+m, it1+q, it2 + p);
  441. }
  442. return T(0);
  443. }
  444. template <typename V1, typename V2> inline
  445. typename strongest_value_type<V1,V2>::value_type
  446. vect_sp(const V1 &v1, const V2 &v2,abstract_sparse,abstract_dense) {
  447. return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
  448. }
  449. template <typename V1, typename V2> inline
  450. typename strongest_value_type<V1,V2>::value_type
  451. vect_sp(const V1 &v1, const V2 &v2, abstract_sparse, abstract_skyline) {
  452. return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
  453. }
  454. template <typename V1, typename V2> inline
  455. typename strongest_value_type<V1,V2>::value_type
  456. vect_sp(const V1 &v1, const V2 &v2, abstract_skyline, abstract_sparse) {
  457. return vect_sp_sparse_(vect_const_begin(v2), vect_const_end(v2), v1);
  458. }
  459. template <typename V1, typename V2> inline
  460. typename strongest_value_type<V1,V2>::value_type
  461. vect_sp(const V1 &v1, const V2 &v2, abstract_dense,abstract_sparse) {
  462. return vect_sp_sparse_(vect_const_begin(v2), vect_const_end(v2), v1);
  463. }
  464. template <typename V1, typename V2> inline
  465. typename strongest_value_type<V1,V2>::value_type
  466. vect_sp_sparse_sparse(const V1 &v1, const V2 &v2, linalg_true) {
  467. typename linalg_traits<V1>::const_iterator it1 = vect_const_begin(v1),
  468. ite1 = vect_const_end(v1);
  469. typename linalg_traits<V2>::const_iterator it2 = vect_const_begin(v2),
  470. ite2 = vect_const_end(v2);
  471. typename strongest_value_type<V1,V2>::value_type res(0);
  472. while (it1 != ite1 && it2 != ite2) {
  473. if (it1.index() == it2.index())
  474. { res += (*it1) * *it2; ++it1; ++it2; }
  475. else if (it1.index() < it2.index()) ++it1; else ++it2;
  476. }
  477. return res;
  478. }
  479. template <typename V1, typename V2> inline
  480. typename strongest_value_type<V1,V2>::value_type
  481. vect_sp_sparse_sparse(const V1 &v1, const V2 &v2, linalg_false) {
  482. return vect_sp_sparse_(vect_const_begin(v1), vect_const_end(v1), v2);
  483. }
  484. template <typename V1, typename V2> inline
  485. typename strongest_value_type<V1,V2>::value_type
  486. vect_sp(const V1 &v1, const V2 &v2,abstract_sparse,abstract_sparse) {
  487. return vect_sp_sparse_sparse(v1, v2,
  488. typename linalg_and<typename linalg_traits<V1>::index_sorted,
  489. typename linalg_traits<V2>::index_sorted>::bool_type());
  490. }
  491. /* ******************************************************************** */
  492. /* Hermitian product */
  493. /* ******************************************************************** */
  494. ///@endcond
  495. /** Hermitian product. */
  496. template <typename V1, typename V2>
  497. inline typename strongest_value_type<V1,V2>::value_type
  498. vect_hp(const V1 &v1, const V2 &v2)
  499. { return vect_sp(v1, conjugated(v2)); }
  500. /** Hermitian product with a matrix. */
  501. template <typename MATSP, typename V1, typename V2> inline
  502. typename strongest_value_type3<V1,V2,MATSP>::value_type
  503. vect_hp(const MATSP &ps, const V1 &v1, const V2 &v2) {
  504. return vect_sp(ps, v1, gmm::conjugated(v2));
  505. }
  506. /* ******************************************************************** */
  507. /* Trace of a matrix */
  508. /* ******************************************************************** */
  509. /** Trace of a matrix */
  510. template <typename M>
  511. typename linalg_traits<M>::value_type
  512. mat_trace(const M &m) {
  513. typedef typename linalg_traits<M>::value_type T;
  514. T res(0);
  515. for (size_type i = 0; i < std::min(mat_nrows(m), mat_ncols(m)); ++i)
  516. res += m(i,i);
  517. return res;
  518. }
  519. /* ******************************************************************** */
  520. /* Euclidean norm */
  521. /* ******************************************************************** */
  522. /** Euclidean norm of a vector. */
  523. template <typename V>
  524. typename number_traits<typename linalg_traits<V>::value_type>
  525. ::magnitude_type
  526. vect_norm2_sqr(const V &v) {
  527. typedef typename linalg_traits<V>::value_type T;
  528. typedef typename number_traits<T>::magnitude_type R;
  529. typename linalg_traits<V>::const_iterator
  530. it = vect_const_begin(v), ite = vect_const_end(v);
  531. R res(0);
  532. for (; it != ite; ++it) res += gmm::abs_sqr(*it);
  533. return res;
  534. }
  535. /** squared Euclidean norm of a vector. */
  536. template <typename V> inline
  537. typename number_traits<typename linalg_traits<V>::value_type>
  538. ::magnitude_type
  539. vect_norm2(const V &v)
  540. { return sqrt(vect_norm2_sqr(v)); }
  541. /** squared Euclidean distance between two vectors */
  542. template <typename V1, typename V2> inline
  543. typename number_traits<typename linalg_traits<V1>::value_type>
  544. ::magnitude_type
  545. vect_dist2_sqr(const V1 &v1, const V2 &v2) { // not fully optimized
  546. typedef typename linalg_traits<V1>::value_type T;
  547. typedef typename number_traits<T>::magnitude_type R;
  548. typename linalg_traits<V1>::const_iterator
  549. it1 = vect_const_begin(v1), ite1 = vect_const_end(v1);
  550. typename linalg_traits<V2>::const_iterator
  551. it2 = vect_const_begin(v2), ite2 = vect_const_end(v2);
  552. size_type k1(0), k2(0);
  553. R res(0);
  554. while (it1 != ite1 && it2 != ite2) {
  555. size_type i1 = index_of_it(it1, k1,
  556. typename linalg_traits<V1>::storage_type());
  557. size_type i2 = index_of_it(it2, k2,
  558. typename linalg_traits<V2>::storage_type());
  559. if (i1 == i2) {
  560. res += gmm::abs_sqr(*it2 - *it1); ++it1; ++k1; ++it2; ++k2;
  561. }
  562. else if (i1 < i2) {
  563. res += gmm::abs_sqr(*it1); ++it1; ++k1;
  564. }
  565. else {
  566. res += gmm::abs_sqr(*it2); ++it2; ++k2;
  567. }
  568. }
  569. while (it1 != ite1) { res += gmm::abs_sqr(*it1); ++it1; }
  570. while (it2 != ite2) { res += gmm::abs_sqr(*it2); ++it2; }
  571. return res;
  572. }
  573. /** Euclidean distance between two vectors */
  574. template <typename V1, typename V2> inline
  575. typename number_traits<typename linalg_traits<V1>::value_type>
  576. ::magnitude_type
  577. vect_dist2(const V1 &v1, const V2 &v2)
  578. { return sqrt(vect_dist2_sqr(v1, v2)); }
  579. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  580. template <typename M>
  581. typename number_traits<typename linalg_traits<M>::value_type>
  582. ::magnitude_type
  583. mat_euclidean_norm_sqr(const M &m, row_major) {
  584. typename number_traits<typename linalg_traits<M>::value_type>
  585. ::magnitude_type res(0);
  586. for (size_type i = 0; i < mat_nrows(m); ++i)
  587. res += vect_norm2_sqr(mat_const_row(m, i));
  588. return res;
  589. }
  590. template <typename M>
  591. typename number_traits<typename linalg_traits<M>::value_type>
  592. ::magnitude_type
  593. mat_euclidean_norm_sqr(const M &m, col_major) {
  594. typename number_traits<typename linalg_traits<M>::value_type>
  595. ::magnitude_type res(0);
  596. for (size_type i = 0; i < mat_ncols(m); ++i)
  597. res += vect_norm2_sqr(mat_const_col(m, i));
  598. return res;
  599. }
  600. ///@endcond
  601. /** squared Euclidean norm of a matrix. */
  602. template <typename M> inline
  603. typename number_traits<typename linalg_traits<M>::value_type>
  604. ::magnitude_type
  605. mat_euclidean_norm_sqr(const M &m) {
  606. return mat_euclidean_norm_sqr(m,
  607. typename principal_orientation_type<typename
  608. linalg_traits<M>::sub_orientation>::potype());
  609. }
  610. /** Euclidean norm of a matrix. */
  611. template <typename M> inline
  612. typename number_traits<typename linalg_traits<M>::value_type>
  613. ::magnitude_type
  614. mat_euclidean_norm(const M &m)
  615. { return gmm::sqrt(mat_euclidean_norm_sqr(m)); }
  616. /* ******************************************************************** */
  617. /* vector norm1 */
  618. /* ******************************************************************** */
  619. /** 1-norm of a vector */
  620. template <typename V>
  621. typename number_traits<typename linalg_traits<V>::value_type>
  622. ::magnitude_type
  623. vect_norm1(const V &v) {
  624. typename linalg_traits<V>::const_iterator
  625. it = vect_const_begin(v), ite = vect_const_end(v);
  626. typename number_traits<typename linalg_traits<V>::value_type>
  627. ::magnitude_type res(0);
  628. for (; it != ite; ++it) res += gmm::abs(*it);
  629. return res;
  630. }
  631. /* ******************************************************************** */
  632. /* vector Infinity norm */
  633. /* ******************************************************************** */
  634. /** Infinity norm of a vector. */
  635. template <typename V>
  636. typename number_traits<typename linalg_traits<V>::value_type>
  637. ::magnitude_type
  638. vect_norminf(const V &v) {
  639. typename linalg_traits<V>::const_iterator
  640. it = vect_const_begin(v), ite = vect_const_end(v);
  641. typename number_traits<typename linalg_traits<V>::value_type>
  642. ::magnitude_type res(0);
  643. for (; it != ite; ++it) res = std::max(res, gmm::abs(*it));
  644. return res;
  645. }
  646. /* ******************************************************************** */
  647. /* matrix norm1 */
  648. /* ******************************************************************** */
  649. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  650. template <typename M>
  651. typename number_traits<typename linalg_traits<M>::value_type>
  652. ::magnitude_type
  653. mat_norm1(const M &m, col_major) {
  654. typename number_traits<typename linalg_traits<M>::value_type>
  655. ::magnitude_type res(0);
  656. for (size_type i = 0; i < mat_ncols(m); ++i)
  657. res = std::max(res, vect_norm1(mat_const_col(m,i)));
  658. return res;
  659. }
  660. template <typename M>
  661. typename number_traits<typename linalg_traits<M>::value_type>
  662. ::magnitude_type
  663. mat_norm1(const M &m, row_major) {
  664. typedef typename linalg_traits<M>::value_type T;
  665. typedef typename number_traits<T>::magnitude_type R;
  666. typedef typename linalg_traits<M>::storage_type store_type;
  667. std::vector<R> aux(mat_ncols(m));
  668. for (size_type i = 0; i < mat_nrows(m); ++i) {
  669. typedef typename linalg_traits<M>::const_sub_row_type row_type;
  670. row_type row = mat_const_row(m, i);
  671. typename linalg_traits<row_type>::const_iterator
  672. it = vect_const_begin(row), ite = vect_const_end(row);
  673. for (size_type k = 0; it != ite; ++it, ++k)
  674. aux[index_of_it(it, k, store_type())] += gmm::abs(*it);
  675. }
  676. return vect_norminf(aux);
  677. }
  678. template <typename M>
  679. typename number_traits<typename linalg_traits<M>::value_type>
  680. ::magnitude_type
  681. mat_norm1(const M &m, col_and_row)
  682. { return mat_norm1(m, col_major()); }
  683. template <typename M>
  684. typename number_traits<typename linalg_traits<M>::value_type>
  685. ::magnitude_type
  686. mat_norm1(const M &m, row_and_col)
  687. { return mat_norm1(m, col_major()); }
  688. ///@endcond
  689. /** 1-norm of a matrix */
  690. template <typename M>
  691. typename number_traits<typename linalg_traits<M>::value_type>
  692. ::magnitude_type
  693. mat_norm1(const M &m) {
  694. return mat_norm1(m, typename linalg_traits<M>::sub_orientation());
  695. }
  696. /* ******************************************************************** */
  697. /* matrix Infinity norm */
  698. /* ******************************************************************** */
  699. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  700. template <typename M>
  701. typename number_traits<typename linalg_traits<M>::value_type>
  702. ::magnitude_type
  703. mat_norminf(const M &m, row_major) {
  704. typename number_traits<typename linalg_traits<M>::value_type>
  705. ::magnitude_type res(0);
  706. for (size_type i = 0; i < mat_nrows(m); ++i)
  707. res = std::max(res, vect_norm1(mat_const_row(m,i)));
  708. return res;
  709. }
  710. template <typename M>
  711. typename number_traits<typename linalg_traits<M>::value_type>
  712. ::magnitude_type
  713. mat_norminf(const M &m, col_major) {
  714. typedef typename linalg_traits<M>::value_type T;
  715. typedef typename number_traits<T>::magnitude_type R;
  716. typedef typename linalg_traits<M>::storage_type store_type;
  717. std::vector<R> aux(mat_nrows(m));
  718. for (size_type i = 0; i < mat_ncols(m); ++i) {
  719. typedef typename linalg_traits<M>::const_sub_col_type col_type;
  720. col_type col = mat_const_col(m, i);
  721. typename linalg_traits<col_type>::const_iterator
  722. it = vect_const_begin(col), ite = vect_const_end(col);
  723. for (size_type k = 0; it != ite; ++it, ++k)
  724. aux[index_of_it(it, k, store_type())] += gmm::abs(*it);
  725. }
  726. return vect_norminf(aux);
  727. }
  728. template <typename M>
  729. typename number_traits<typename linalg_traits<M>::value_type>
  730. ::magnitude_type
  731. mat_norminf(const M &m, col_and_row)
  732. { return mat_norminf(m, row_major()); }
  733. template <typename M>
  734. typename number_traits<typename linalg_traits<M>::value_type>
  735. ::magnitude_type
  736. mat_norminf(const M &m, row_and_col)
  737. { return mat_norminf(m, row_major()); }
  738. ///@endcond
  739. /** infinity-norm of a matrix.*/
  740. template <typename M>
  741. typename number_traits<typename linalg_traits<M>::value_type>
  742. ::magnitude_type
  743. mat_norminf(const M &m) {
  744. return mat_norminf(m, typename linalg_traits<M>::sub_orientation());
  745. }
  746. /* ******************************************************************** */
  747. /* Max norm for matrices */
  748. /* ******************************************************************** */
  749. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  750. template <typename M>
  751. typename number_traits<typename linalg_traits<M>::value_type>
  752. ::magnitude_type
  753. mat_maxnorm(const M &m, row_major) {
  754. typename number_traits<typename linalg_traits<M>::value_type>
  755. ::magnitude_type res(0);
  756. for (size_type i = 0; i < mat_nrows(m); ++i)
  757. res = std::max(res, vect_norminf(mat_const_row(m,i)));
  758. return res;
  759. }
  760. template <typename M>
  761. typename number_traits<typename linalg_traits<M>::value_type>
  762. ::magnitude_type
  763. mat_maxnorm(const M &m, col_major) {
  764. typename number_traits<typename linalg_traits<M>::value_type>
  765. ::magnitude_type res(0);
  766. for (size_type i = 0; i < mat_ncols(m); ++i)
  767. res = std::max(res, vect_norminf(mat_const_col(m,i)));
  768. return res;
  769. }
  770. ///@endcond
  771. /** max-norm of a matrix. */
  772. template <typename M>
  773. typename number_traits<typename linalg_traits<M>::value_type>
  774. ::magnitude_type
  775. mat_maxnorm(const M &m) {
  776. return mat_maxnorm(m,
  777. typename principal_orientation_type<typename
  778. linalg_traits<M>::sub_orientation>::potype());
  779. }
  780. /* ******************************************************************** */
  781. /* Clean */
  782. /* ******************************************************************** */
  783. /** Clean a vector or matrix (replace near-zero entries with zeroes). */
  784. template <typename L> inline void clean(L &l, double threshold);
  785. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  786. template <typename L, typename T>
  787. void clean(L &l, double threshold, abstract_dense, T) {
  788. typedef typename number_traits<T>::magnitude_type R;
  789. typename linalg_traits<L>::iterator it = vect_begin(l), ite = vect_end(l);
  790. for (; it != ite; ++it)
  791. if (gmm::abs(*it) < R(threshold)) *it = T(0);
  792. }
  793. template <typename L, typename T>
  794. void clean(L &l, double threshold, abstract_skyline, T)
  795. { gmm::clean(l, threshold, abstract_dense(), T()); }
  796. template <typename L, typename T>
  797. void clean(L &l, double threshold, abstract_sparse, T) {
  798. typedef typename number_traits<T>::magnitude_type R;
  799. typename linalg_traits<L>::iterator it = vect_begin(l), ite = vect_end(l);
  800. std::vector<size_type> ind;
  801. for (; it != ite; ++it)
  802. if (gmm::abs(*it) < R(threshold)) ind.push_back(it.index());
  803. for (size_type i = 0; i < ind.size(); ++i) l[ind[i]] = T(0);
  804. }
  805. template <typename L, typename T>
  806. void clean(L &l, double threshold, abstract_dense, std::complex<T>) {
  807. typename linalg_traits<L>::iterator it = vect_begin(l), ite = vect_end(l);
  808. for (; it != ite; ++it){
  809. if (gmm::abs((*it).real()) < T(threshold))
  810. *it = std::complex<T>(T(0), (*it).imag());
  811. if (gmm::abs((*it).imag()) < T(threshold))
  812. *it = std::complex<T>((*it).real(), T(0));
  813. }
  814. }
  815. template <typename L, typename T>
  816. void clean(L &l, double threshold, abstract_skyline, std::complex<T>)
  817. { gmm::clean(l, threshold, abstract_dense(), std::complex<T>()); }
  818. template <typename L, typename T>
  819. void clean(L &l, double threshold, abstract_sparse, std::complex<T>) {
  820. typename linalg_traits<L>::iterator it = vect_begin(l), ite = vect_end(l);
  821. std::vector<size_type> ind;
  822. for (; it != ite; ++it) {
  823. bool r = (gmm::abs((*it).real()) < T(threshold));
  824. bool i = (gmm::abs((*it).imag()) < T(threshold));
  825. if (r && i) ind.push_back(it.index());
  826. else if (r) (*it).real() = T(0);
  827. else if (i) (*it).imag() = T(0);
  828. }
  829. for (size_type i = 0; i < ind.size(); ++i)
  830. l[ind[i]] = std::complex<T>(T(0),T(0));
  831. }
  832. template <typename L> inline void clean(L &l, double threshold,
  833. abstract_vector) {
  834. gmm::clean(l, threshold, typename linalg_traits<L>::storage_type(),
  835. typename linalg_traits<L>::value_type());
  836. }
  837. template <typename L> inline void clean(const L &l, double threshold);
  838. template <typename L> void clean(L &l, double threshold, row_major) {
  839. for (size_type i = 0; i < mat_nrows(l); ++i)
  840. gmm::clean(mat_row(l, i), threshold);
  841. }
  842. template <typename L> void clean(L &l, double threshold, col_major) {
  843. for (size_type i = 0; i < mat_ncols(l); ++i)
  844. gmm::clean(mat_col(l, i), threshold);
  845. }
  846. template <typename L> inline void clean(L &l, double threshold,
  847. abstract_matrix) {
  848. gmm::clean(l, threshold,
  849. typename principal_orientation_type<typename
  850. linalg_traits<L>::sub_orientation>::potype());
  851. }
  852. template <typename L> inline void clean(L &l, double threshold)
  853. { clean(l, threshold, typename linalg_traits<L>::linalg_type()); }
  854. template <typename L> inline void clean(const L &l, double threshold)
  855. { gmm::clean(linalg_const_cast(l), threshold); }
  856. /* ******************************************************************** */
  857. /* Copy */
  858. /* ******************************************************************** */
  859. ///@endcond
  860. /** Copy vectors or matrices.
  861. @param l1 source vector or matrix.
  862. @param l2 destination.
  863. */
  864. template <typename L1, typename L2> inline
  865. void copy(const L1& l1, L2& l2) {
  866. if ((const void *)(&l1) != (const void *)(&l2)) {
  867. if (same_origin(l1,l2))
  868. GMM_WARNING2("Warning : a conflict is possible in copy\n");
  869. copy(l1, l2, typename linalg_traits<L1>::linalg_type(),
  870. typename linalg_traits<L2>::linalg_type());
  871. }
  872. }
  873. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  874. template <typename L1, typename L2> inline
  875. void copy(const L1& l1, const L2& l2) { copy(l1, linalg_const_cast(l2)); }
  876. template <typename L1, typename L2> inline
  877. void copy(const L1& l1, L2& l2, abstract_vector, abstract_vector) {
  878. GMM_ASSERT2(vect_size(l1) == vect_size(l2), "dimensions mismatch");
  879. copy_vect(l1, l2, typename linalg_traits<L1>::storage_type(),
  880. typename linalg_traits<L2>::storage_type());
  881. }
  882. template <typename L1, typename L2> inline
  883. void copy(const L1& l1, L2& l2, abstract_matrix, abstract_matrix) {
  884. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  885. if (!m || !n) return;
  886. GMM_ASSERT2(n==mat_ncols(l2) && m==mat_nrows(l2), "dimensions mismatch");
  887. copy_mat(l1, l2, typename linalg_traits<L1>::sub_orientation(),
  888. typename linalg_traits<L2>::sub_orientation());
  889. }
  890. template <typename V1, typename V2, typename C1, typename C2> inline
  891. void copy_vect(const V1 &v1, const V2 &v2, C1, C2)
  892. { copy_vect(v1, const_cast<V2 &>(v2), C1(), C2()); }
  893. template <typename L1, typename L2>
  894. void copy_mat_by_row(const L1& l1, L2& l2) {
  895. size_type nbr = mat_nrows(l1);
  896. for (size_type i = 0; i < nbr; ++i)
  897. copy_vect(mat_const_row(l1, i), mat_row(l2, i),
  898. typename linalg_traits<L1>::storage_type(),
  899. typename linalg_traits<L2>::storage_type());
  900. }
  901. template <typename L1, typename L2>
  902. void copy_mat_by_col(const L1 &l1, L2 &l2) {
  903. size_type nbc = mat_ncols(l1);
  904. for (size_type i = 0; i < nbc; ++i) {
  905. copy_vect(mat_const_col(l1, i), mat_col(l2, i),
  906. typename linalg_traits<L1>::storage_type(),
  907. typename linalg_traits<L2>::storage_type());
  908. }
  909. }
  910. template <typename L1, typename L2> inline
  911. void copy_mat(const L1& l1, L2& l2, row_major, row_major)
  912. { copy_mat_by_row(l1, l2); }
  913. template <typename L1, typename L2> inline
  914. void copy_mat(const L1& l1, L2& l2, row_major, row_and_col)
  915. { copy_mat_by_row(l1, l2); }
  916. template <typename L1, typename L2> inline
  917. void copy_mat(const L1& l1, L2& l2, row_and_col, row_and_col)
  918. { copy_mat_by_row(l1, l2); }
  919. template <typename L1, typename L2> inline
  920. void copy_mat(const L1& l1, L2& l2, row_and_col, row_major)
  921. { copy_mat_by_row(l1, l2); }
  922. template <typename L1, typename L2> inline
  923. void copy_mat(const L1& l1, L2& l2, col_and_row, row_major)
  924. { copy_mat_by_row(l1, l2); }
  925. template <typename L1, typename L2> inline
  926. void copy_mat(const L1& l1, L2& l2, row_major, col_and_row)
  927. { copy_mat_by_row(l1, l2); }
  928. template <typename L1, typename L2> inline
  929. void copy_mat(const L1& l1, L2& l2, col_and_row, row_and_col)
  930. { copy_mat_by_row(l1, l2); }
  931. template <typename L1, typename L2> inline
  932. void copy_mat(const L1& l1, L2& l2, row_and_col, col_and_row)
  933. { copy_mat_by_row(l1, l2); }
  934. template <typename L1, typename L2> inline
  935. void copy_mat(const L1& l1, L2& l2, col_major, col_major)
  936. { copy_mat_by_col(l1, l2); }
  937. template <typename L1, typename L2> inline
  938. void copy_mat(const L1& l1, L2& l2, col_major, col_and_row)
  939. { copy_mat_by_col(l1, l2); }
  940. template <typename L1, typename L2> inline
  941. void copy_mat(const L1& l1, L2& l2, col_major, row_and_col)
  942. { copy_mat_by_col(l1, l2); }
  943. template <typename L1, typename L2> inline
  944. void copy_mat(const L1& l1, L2& l2, row_and_col, col_major)
  945. { copy_mat_by_col(l1, l2); }
  946. template <typename L1, typename L2> inline
  947. void copy_mat(const L1& l1, L2& l2, col_and_row, col_major)
  948. { copy_mat_by_col(l1, l2); }
  949. template <typename L1, typename L2> inline
  950. void copy_mat(const L1& l1, L2& l2, col_and_row, col_and_row)
  951. { copy_mat_by_col(l1, l2); }
  952. template <typename L1, typename L2> inline
  953. void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i) {
  954. copy_mat_mixed_rc(l1, l2, i, typename linalg_traits<L1>::storage_type());
  955. }
  956. template <typename L1, typename L2>
  957. void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_sparse) {
  958. typename linalg_traits<L1>::const_iterator
  959. it = vect_const_begin(l1), ite = vect_const_end(l1);
  960. for (; it != ite; ++it)
  961. l2(i, it.index()) = *it;
  962. }
  963. template <typename L1, typename L2>
  964. void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_skyline) {
  965. typename linalg_traits<L1>::const_iterator
  966. it = vect_const_begin(l1), ite = vect_const_end(l1);
  967. for (; it != ite; ++it)
  968. l2(i, it.index()) = *it;
  969. }
  970. template <typename L1, typename L2>
  971. void copy_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_dense) {
  972. typename linalg_traits<L1>::const_iterator
  973. it = vect_const_begin(l1), ite = vect_const_end(l1);
  974. for (size_type j = 0; it != ite; ++it, ++j) l2(i, j) = *it;
  975. }
  976. template <typename L1, typename L2> inline
  977. void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i) {
  978. copy_mat_mixed_cr(l1, l2, i, typename linalg_traits<L1>::storage_type());
  979. }
  980. template <typename L1, typename L2>
  981. void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_sparse) {
  982. typename linalg_traits<L1>::const_iterator
  983. it = vect_const_begin(l1), ite = vect_const_end(l1);
  984. for (; it != ite; ++it) l2(it.index(), i) = *it;
  985. }
  986. template <typename L1, typename L2>
  987. void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_skyline) {
  988. typename linalg_traits<L1>::const_iterator
  989. it = vect_const_begin(l1), ite = vect_const_end(l1);
  990. for (; it != ite; ++it) l2(it.index(), i) = *it;
  991. }
  992. template <typename L1, typename L2>
  993. void copy_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_dense) {
  994. typename linalg_traits<L1>::const_iterator
  995. it = vect_const_begin(l1), ite = vect_const_end(l1);
  996. for (size_type j = 0; it != ite; ++it, ++j) l2(j, i) = *it;
  997. }
  998. template <typename L1, typename L2>
  999. void copy_mat(const L1& l1, L2& l2, row_major, col_major) {
  1000. clear(l2);
  1001. size_type nbr = mat_nrows(l1);
  1002. for (size_type i = 0; i < nbr; ++i)
  1003. copy_mat_mixed_rc(mat_const_row(l1, i), l2, i);
  1004. }
  1005. template <typename L1, typename L2>
  1006. void copy_mat(const L1& l1, L2& l2, col_major, row_major) {
  1007. clear(l2);
  1008. size_type nbc = mat_ncols(l1);
  1009. for (size_type i = 0; i < nbc; ++i)
  1010. copy_mat_mixed_cr(mat_const_col(l1, i), l2, i);
  1011. }
  1012. template <typename L1, typename L2> inline
  1013. void copy_vect(const L1 &l1, L2 &l2, abstract_dense, abstract_dense) {
  1014. std::copy(vect_const_begin(l1), vect_const_end(l1), vect_begin(l2));
  1015. }
  1016. template <typename L1, typename L2> inline // to be optimised ?
  1017. void copy_vect(const L1 &l1, L2 &l2, abstract_skyline, abstract_skyline) {
  1018. typename linalg_traits<L1>::const_iterator it1 = vect_const_begin(l1),
  1019. ite1 = vect_const_end(l1);
  1020. while (it1 != ite1 && *it1 == typename linalg_traits<L1>::value_type(0))
  1021. ++it1;
  1022. if (ite1 - it1 > 0) {
  1023. clear(l2);
  1024. typename linalg_traits<L2>::iterator it2 = vect_begin(l2),
  1025. ite2 = vect_end(l2);
  1026. while (*(ite1-1) == typename linalg_traits<L1>::value_type(0)) ite1--;
  1027. if (it2 == ite2) {
  1028. l2[it1.index()] = *it1; ++it1;
  1029. l2[ite1.index()-1] = *(ite1-1); --ite1;
  1030. if (it1 < ite1)
  1031. { it2 = vect_begin(l2); ++it2; std::copy(it1, ite1, it2); }
  1032. }
  1033. else {
  1034. ptrdiff_t m = it1.index() - it2.index();
  1035. if (m >= 0 && ite1.index() <= ite2.index())
  1036. std::copy(it1, ite1, it2 + m);
  1037. else {
  1038. if (m < 0) l2[it1.index()] = *it1;
  1039. if (ite1.index() > ite2.index()) l2[ite1.index()-1] = *(ite1-1);
  1040. it2 = vect_begin(l2); ite2 = vect_end(l2);
  1041. m = it1.index() - it2.index();
  1042. std::copy(it1, ite1, it2 + m);
  1043. }
  1044. }
  1045. }
  1046. }
  1047. template <typename L1, typename L2>
  1048. void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_dense) {
  1049. clear(l2);
  1050. typename linalg_traits<L1>::const_iterator
  1051. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1052. for (; it != ite; ++it) { l2[it.index()] = *it; }
  1053. }
  1054. template <typename L1, typename L2>
  1055. void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_skyline) {
  1056. clear(l2);
  1057. typename linalg_traits<L1>::const_iterator
  1058. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1059. for (; it != ite; ++it) l2[it.index()] = *it;
  1060. }
  1061. template <typename L1, typename L2>
  1062. void copy_vect(const L1& l1, L2& l2, abstract_skyline, abstract_dense) {
  1063. typedef typename linalg_traits<L1>::value_type T;
  1064. typedef typename linalg_traits<L1>::const_iterator l1_const_iterator;
  1065. typedef typename linalg_traits<L2>::iterator l2_iterator;
  1066. l1_const_iterator it = vect_const_begin(l1), ite = vect_const_end(l1);
  1067. if (it == ite)
  1068. gmm::clear(l2);
  1069. else {
  1070. l2_iterator it2 = vect_begin(l2), ite2 = vect_end(l2);
  1071. size_type i = it.index(), j;
  1072. for (j = 0; j < i; ++j, ++it2) *it2 = T(0);
  1073. for (; it != ite; ++it, ++it2) *it2 = *it;
  1074. for (; it2 != ite2; ++it2) *it2 = T(0);
  1075. }
  1076. }
  1077. template <typename L1, typename L2>
  1078. void copy_vect(const L1& l1, L2& l2, abstract_sparse, abstract_sparse) {
  1079. typename linalg_traits<L1>::const_iterator
  1080. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1081. clear(l2);
  1082. for (; it != ite; ++it)
  1083. if (*it != (typename linalg_traits<L1>::value_type)(0))
  1084. l2[it.index()] = *it;
  1085. }
  1086. template <typename L1, typename L2>
  1087. void copy_vect(const L1& l1, L2& l2, abstract_dense, abstract_sparse) {
  1088. clear(l2);
  1089. typename linalg_traits<L1>::const_iterator
  1090. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1091. for (size_type i = 0; it != ite; ++it, ++i)
  1092. if (*it != (typename linalg_traits<L1>::value_type)(0))
  1093. l2[i] = *it;
  1094. }
  1095. template <typename L1, typename L2> // to be optimised ...
  1096. void copy_vect(const L1& l1, L2& l2, abstract_dense, abstract_skyline) {
  1097. clear(l2);
  1098. typename linalg_traits<L1>::const_iterator
  1099. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1100. for (size_type i = 0; it != ite; ++it, ++i)
  1101. if (*it != (typename linalg_traits<L1>::value_type)(0))
  1102. l2[i] = *it;
  1103. }
  1104. template <typename L1, typename L2>
  1105. void copy_vect(const L1& l1, L2& l2, abstract_skyline, abstract_sparse) {
  1106. clear(l2);
  1107. typename linalg_traits<L1>::const_iterator
  1108. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1109. for (; it != ite; ++it)
  1110. if (*it != (typename linalg_traits<L1>::value_type)(0))
  1111. l2[it.index()] = *it;
  1112. }
  1113. /* ******************************************************************** */
  1114. /* Matrix and vector addition */
  1115. /* algorithms are built in order to avoid some conflicts whith */
  1116. /* repeated arguments or with overlapping part of a same object. */
  1117. /* In the latter case, conflicts are still possible. */
  1118. /* ******************************************************************** */
  1119. ///@endcond
  1120. /** Add two vectors or matrices
  1121. @param l1
  1122. @param l2 contains on output, l2+l1.
  1123. */
  1124. template <typename L1, typename L2> inline
  1125. void add(const L1& l1, L2& l2) {
  1126. add_spec(l1, l2, typename linalg_traits<L2>::linalg_type());
  1127. }
  1128. ///@cond
  1129. template <typename L1, typename L2> inline
  1130. void add(const L1& l1, const L2& l2) { add(l1, linalg_const_cast(l2)); }
  1131. template <typename L1, typename L2> inline
  1132. void add_spec(const L1& l1, L2& l2, abstract_vector) {
  1133. GMM_ASSERT2(vect_size(l1) == vect_size(l2), "dimensions mismatch");
  1134. add(l1, l2, typename linalg_traits<L1>::storage_type(),
  1135. typename linalg_traits<L2>::storage_type());
  1136. }
  1137. template <typename L1, typename L2> inline
  1138. void add_spec(const L1& l1, L2& l2, abstract_matrix) {
  1139. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1140. GMM_ASSERT2(m==mat_nrows(l2) && n==mat_ncols(l2), "dimensions mismatch");
  1141. add(l1, l2, typename linalg_traits<L1>::sub_orientation(),
  1142. typename linalg_traits<L2>::sub_orientation());
  1143. }
  1144. template <typename L1, typename L2>
  1145. void add(const L1& l1, L2& l2, row_major, row_major) {
  1146. typename linalg_traits<L1>::const_row_iterator it1 = mat_row_begin(l1),
  1147. ite = mat_row_end(l1);
  1148. typename linalg_traits<L2>::row_iterator it2 = mat_row_begin(l2);
  1149. for ( ; it1 != ite; ++it1, ++it2)
  1150. add(linalg_traits<L1>::row(it1), linalg_traits<L2>::row(it2));
  1151. }
  1152. template <typename L1, typename L2>
  1153. void add(const L1& l1, L2& l2, col_major, col_major) {
  1154. typename linalg_traits<L1>::const_col_iterator
  1155. it1 = mat_col_const_begin(l1),
  1156. ite = mat_col_const_end(l1);
  1157. typename linalg_traits<L2>::col_iterator it2 = mat_col_begin(l2);
  1158. for ( ; it1 != ite; ++it1, ++it2)
  1159. add(linalg_traits<L1>::col(it1), linalg_traits<L2>::col(it2));
  1160. }
  1161. template <typename L1, typename L2> inline
  1162. void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i) {
  1163. add_mat_mixed_rc(l1, l2, i, typename linalg_traits<L1>::storage_type());
  1164. }
  1165. template <typename L1, typename L2>
  1166. void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_sparse) {
  1167. typename linalg_traits<L1>::const_iterator
  1168. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1169. for (; it != ite; ++it) l2(i, it.index()) += *it;
  1170. }
  1171. template <typename L1, typename L2>
  1172. void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_skyline) {
  1173. typename linalg_traits<L1>::const_iterator
  1174. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1175. for (; it != ite; ++it) l2(i, it.index()) += *it;
  1176. }
  1177. template <typename L1, typename L2>
  1178. void add_mat_mixed_rc(const L1& l1, L2& l2, size_type i, abstract_dense) {
  1179. typename linalg_traits<L1>::const_iterator
  1180. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1181. for (size_type j = 0; it != ite; ++it, ++j) l2(i, j) += *it;
  1182. }
  1183. template <typename L1, typename L2> inline
  1184. void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i) {
  1185. add_mat_mixed_cr(l1, l2, i, typename linalg_traits<L1>::storage_type());
  1186. }
  1187. template <typename L1, typename L2>
  1188. void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_sparse) {
  1189. typename linalg_traits<L1>::const_iterator
  1190. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1191. for (; it != ite; ++it) l2(it.index(), i) += *it;
  1192. }
  1193. template <typename L1, typename L2>
  1194. void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_skyline) {
  1195. typename linalg_traits<L1>::const_iterator
  1196. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1197. for (; it != ite; ++it) l2(it.index(), i) += *it;
  1198. }
  1199. template <typename L1, typename L2>
  1200. void add_mat_mixed_cr(const L1& l1, L2& l2, size_type i, abstract_dense) {
  1201. typename linalg_traits<L1>::const_iterator
  1202. it = vect_const_begin(l1), ite = vect_const_end(l1);
  1203. for (size_type j = 0; it != ite; ++it, ++j) l2(j, i) += *it;
  1204. }
  1205. template <typename L1, typename L2>
  1206. void add(const L1& l1, L2& l2, row_major, col_major) {
  1207. size_type nbr = mat_nrows(l1);
  1208. for (size_type i = 0; i < nbr; ++i)
  1209. add_mat_mixed_rc(mat_const_row(l1, i), l2, i);
  1210. }
  1211. template <typename L1, typename L2>
  1212. void add(const L1& l1, L2& l2, col_major, row_major) {
  1213. size_type nbc = mat_ncols(l1);
  1214. for (size_type i = 0; i < nbc; ++i)
  1215. add_mat_mixed_cr(mat_const_col(l1, i), l2, i);
  1216. }
  1217. template <typename L1, typename L2> inline
  1218. void add(const L1& l1, L2& l2, row_and_col, row_major)
  1219. { add(l1, l2, row_major(), row_major()); }
  1220. template <typename L1, typename L2> inline
  1221. void add(const L1& l1, L2& l2, row_and_col, row_and_col)
  1222. { add(l1, l2, row_major(), row_major()); }
  1223. template <typename L1, typename L2> inline
  1224. void add(const L1& l1, L2& l2, row_and_col, col_and_row)
  1225. { add(l1, l2, row_major(), row_major()); }
  1226. template <typename L1, typename L2> inline
  1227. void add(const L1& l1, L2& l2, col_and_row, row_and_col)
  1228. { add(l1, l2, row_major(), row_major()); }
  1229. template <typename L1, typename L2> inline
  1230. void add(const L1& l1, L2& l2, row_major, row_and_col)
  1231. { add(l1, l2, row_major(), row_major()); }
  1232. template <typename L1, typename L2> inline
  1233. void add(const L1& l1, L2& l2, col_and_row, row_major)
  1234. { add(l1, l2, row_major(), row_major()); }
  1235. template <typename L1, typename L2> inline
  1236. void add(const L1& l1, L2& l2, row_major, col_and_row)
  1237. { add(l1, l2, row_major(), row_major()); }
  1238. template <typename L1, typename L2> inline
  1239. void add(const L1& l1, L2& l2, row_and_col, col_major)
  1240. { add(l1, l2, col_major(), col_major()); }
  1241. template <typename L1, typename L2> inline
  1242. void add(const L1& l1, L2& l2, col_major, row_and_col)
  1243. { add(l1, l2, col_major(), col_major()); }
  1244. template <typename L1, typename L2> inline
  1245. void add(const L1& l1, L2& l2, col_and_row, col_major)
  1246. { add(l1, l2, col_major(), col_major()); }
  1247. template <typename L1, typename L2> inline
  1248. void add(const L1& l1, L2& l2, col_and_row, col_and_row)
  1249. { add(l1, l2, col_major(), col_major()); }
  1250. template <typename L1, typename L2> inline
  1251. void add(const L1& l1, L2& l2, col_major, col_and_row)
  1252. { add(l1, l2, col_major(), col_major()); }
  1253. ///@endcond
  1254. /** Addition of two vectors/matrices
  1255. @param l1
  1256. @param l2
  1257. @param l3 contains l1+l2 on output
  1258. */
  1259. template <typename L1, typename L2, typename L3> inline
  1260. void add(const L1& l1, const L2& l2, L3& l3) {
  1261. add_spec(l1, l2, l3, typename linalg_traits<L2>::linalg_type());
  1262. }
  1263. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  1264. template <typename L1, typename L2, typename L3> inline
  1265. void add(const L1& l1, const L2& l2, const L3& l3)
  1266. { add(l1, l2, linalg_const_cast(l3)); }
  1267. template <typename L1, typename L2, typename L3> inline
  1268. void add_spec(const L1& l1, const L2& l2, L3& l3, abstract_matrix)
  1269. { copy(l2, l3); add(l1, l3); }
  1270. template <typename L1, typename L2, typename L3> inline
  1271. void add_spec(const L1& l1, const L2& l2, L3& l3, abstract_vector) {
  1272. GMM_ASSERT2(vect_size(l1) == vect_size(l2) &&
  1273. vect_size(l1) == vect_size(l3), "dimensions mismatch");
  1274. if ((const void *)(&l1) == (const void *)(&l3))
  1275. add(l2, l3);
  1276. else if ((const void *)(&l2) == (const void *)(&l3))
  1277. add(l1, l3);
  1278. else
  1279. add(l1, l2, l3, typename linalg_traits<L1>::storage_type(),
  1280. typename linalg_traits<L2>::storage_type(),
  1281. typename linalg_traits<L3>::storage_type());
  1282. }
  1283. template <typename IT1, typename IT2, typename IT3>
  1284. void add_full_(IT1 it1, IT2 it2, IT3 it3, IT3 ite) {
  1285. for (; it3 != ite; ++it3, ++it2, ++it1) *it3 = *it1 + *it2;
  1286. }
  1287. template <typename IT1, typename IT2, typename IT3>
  1288. void add_almost_full_(IT1 it1, IT1 ite1, IT2 it2, IT3 it3, IT3 ite3) {
  1289. IT3 it = it3;
  1290. for (; it != ite3; ++it, ++it2) *it = *it2;
  1291. for (; it1 != ite1; ++it1)
  1292. *(it3 + it1.index()) += *it1;
  1293. }
  1294. template <typename IT1, typename IT2, typename IT3>
  1295. void add_to_full_(IT1 it1, IT1 ite1, IT2 it2, IT2 ite2,
  1296. IT3 it3, IT3 ite3) {
  1297. typedef typename std::iterator_traits<IT3>::value_type T;
  1298. IT3 it = it3;
  1299. for (; it != ite3; ++it) *it = T(0);
  1300. for (; it1 != ite1; ++it1) *(it3 + it1.index()) = *it1;
  1301. for (; it2 != ite2; ++it2) *(it3 + it2.index()) += *it2;
  1302. }
  1303. template <typename L1, typename L2, typename L3> inline
  1304. void add(const L1& l1, const L2& l2, L3& l3,
  1305. abstract_dense, abstract_dense, abstract_dense) {
  1306. add_full_(vect_const_begin(l1), vect_const_begin(l2),
  1307. vect_begin(l3), vect_end(l3));
  1308. }
  1309. // generic function for add(v1, v2, v3).
  1310. // Need to be specialized to optimize particular additions.
  1311. template <typename L1, typename L2, typename L3,
  1312. typename ST1, typename ST2, typename ST3>
  1313. inline void add(const L1& l1, const L2& l2, L3& l3, ST1, ST2, ST3)
  1314. { copy(l2, l3); add(l1, l3, ST1(), ST3()); }
  1315. template <typename L1, typename L2, typename L3> inline
  1316. void add(const L1& l1, const L2& l2, L3& l3,
  1317. abstract_sparse, abstract_dense, abstract_dense) {
  1318. add_almost_full_(vect_const_begin(l1), vect_const_end(l1),
  1319. vect_const_begin(l2), vect_begin(l3), vect_end(l3));
  1320. }
  1321. template <typename L1, typename L2, typename L3> inline
  1322. void add(const L1& l1, const L2& l2, L3& l3,
  1323. abstract_dense, abstract_sparse, abstract_dense)
  1324. { add(l2, l1, l3, abstract_sparse(), abstract_dense(), abstract_dense()); }
  1325. template <typename L1, typename L2, typename L3> inline
  1326. void add(const L1& l1, const L2& l2, L3& l3,
  1327. abstract_sparse, abstract_sparse, abstract_dense) {
  1328. add_to_full_(vect_const_begin(l1), vect_const_end(l1),
  1329. vect_const_begin(l2), vect_const_end(l2),
  1330. vect_begin(l3), vect_end(l3));
  1331. }
  1332. template <typename L1, typename L2, typename L3>
  1333. void add_spspsp(const L1& l1, const L2& l2, L3& l3, linalg_true) {
  1334. typename linalg_traits<L1>::const_iterator
  1335. it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1336. typename linalg_traits<L2>::const_iterator
  1337. it2 = vect_const_begin(l2), ite2 = vect_const_end(l2);
  1338. clear(l3);
  1339. while (it1 != ite1 && it2 != ite2) {
  1340. ptrdiff_t d = it1.index() - it2.index();
  1341. if (d < 0)
  1342. { l3[it1.index()] += *it1; ++it1; }
  1343. else if (d > 0)
  1344. { l3[it2.index()] += *it2; ++it2; }
  1345. else
  1346. { l3[it1.index()] = *it1 + *it2; ++it1; ++it2; }
  1347. }
  1348. for (; it1 != ite1; ++it1) l3[it1.index()] += *it1;
  1349. for (; it2 != ite2; ++it2) l3[it2.index()] += *it2;
  1350. }
  1351. template <typename L1, typename L2, typename L3>
  1352. void add_spspsp(const L1& l1, const L2& l2, L3& l3, linalg_false)
  1353. { copy(l2, l3); add(l2, l3); }
  1354. template <typename L1, typename L2, typename L3>
  1355. void add(const L1& l1, const L2& l2, L3& l3,
  1356. abstract_sparse, abstract_sparse, abstract_sparse) {
  1357. add_spspsp(l1, l2, l3, typename linalg_and<typename
  1358. linalg_traits<L1>::index_sorted,
  1359. typename linalg_traits<L2>::index_sorted>::bool_type());
  1360. }
  1361. template <typename L1, typename L2>
  1362. void add(const L1& l1, L2& l2, abstract_dense, abstract_dense) {
  1363. typename linalg_traits<L1>::const_iterator it1 = vect_const_begin(l1);
  1364. typename linalg_traits<L2>::iterator
  1365. it2 = vect_begin(l2), ite = vect_end(l2);
  1366. for (; it2 != ite; ++it2, ++it1) *it2 += *it1;
  1367. }
  1368. template <typename L1, typename L2>
  1369. void add(const L1& l1, L2& l2, abstract_dense, abstract_skyline) {
  1370. typedef typename linalg_traits<L1>::const_iterator const_l1_iterator;
  1371. typedef typename linalg_traits<L2>::iterator l2_iterator;
  1372. typedef typename linalg_traits<L1>::value_type T;
  1373. const_l1_iterator it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1374. size_type i1 = 0, ie1 = vect_size(l1);
  1375. while (it1 != ite1 && *it1 == T(0)) { ++it1; ++i1; }
  1376. if (it1 != ite1) {
  1377. l2_iterator it2 = vect_begin(l2), ite2 = vect_end(l2);
  1378. while (ie1 && *(ite1-1) == T(0)) { ite1--; --ie1; }
  1379. if (it2 == ite2 || i1 < it2.index()) {
  1380. l2[i1] = *it1; ++i1; ++it1;
  1381. if (it1 == ite1) return;
  1382. it2 = vect_begin(l2); ite2 = vect_end(l2);
  1383. }
  1384. if (ie1 > ite2.index()) {
  1385. --ite1; l2[ie1 - 1] = *ite1;
  1386. it2 = vect_begin(l2);
  1387. }
  1388. it2 += i1 - it2.index();
  1389. for (; it1 != ite1; ++it1, ++it2) { *it2 += *it1; }
  1390. }
  1391. }
  1392. template <typename L1, typename L2>
  1393. void add(const L1& l1, L2& l2, abstract_skyline, abstract_dense) {
  1394. typename linalg_traits<L1>::const_iterator it1 = vect_const_begin(l1),
  1395. ite1 = vect_const_end(l1);
  1396. if (it1 != ite1) {
  1397. typename linalg_traits<L2>::iterator it2 = vect_begin(l2);
  1398. it2 += it1.index();
  1399. for (; it1 != ite1; ++it2, ++it1) *it2 += *it1;
  1400. }
  1401. }
  1402. template <typename L1, typename L2>
  1403. void add(const L1& l1, L2& l2, abstract_sparse, abstract_dense) {
  1404. typename linalg_traits<L1>::const_iterator
  1405. it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1406. for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
  1407. }
  1408. template <typename L1, typename L2>
  1409. void add(const L1& l1, L2& l2, abstract_sparse, abstract_sparse) {
  1410. typename linalg_traits<L1>::const_iterator
  1411. it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1412. for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
  1413. }
  1414. template <typename L1, typename L2>
  1415. void add(const L1& l1, L2& l2, abstract_sparse, abstract_skyline) {
  1416. typename linalg_traits<L1>::const_iterator
  1417. it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1418. for (; it1 != ite1; ++it1) l2[it1.index()] += *it1;
  1419. }
  1420. template <typename L1, typename L2>
  1421. void add(const L1& l1, L2& l2, abstract_skyline, abstract_sparse) {
  1422. typename linalg_traits<L1>::const_iterator
  1423. it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1424. for (; it1 != ite1; ++it1)
  1425. if (*it1 != typename linalg_traits<L1>::value_type(0))
  1426. l2[it1.index()] += *it1;
  1427. }
  1428. template <typename L1, typename L2>
  1429. void add(const L1& l1, L2& l2, abstract_skyline, abstract_skyline) {
  1430. typedef typename linalg_traits<L1>::const_iterator const_l1_iterator;
  1431. typedef typename linalg_traits<L2>::iterator l2_iterator;
  1432. typedef typename linalg_traits<L1>::value_type T1;
  1433. typedef typename linalg_traits<L2>::value_type T2;
  1434. const_l1_iterator it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1435. while (it1 != ite1 && *it1 == T1(0)) ++it1;
  1436. if (ite1 != it1) {
  1437. l2_iterator it2 = vect_begin(l2), ite2 = vect_end(l2);
  1438. while (*(ite1-1) == T1(0)) ite1--;
  1439. if (it2 == ite2 || it1.index() < it2.index()) {
  1440. l2[it1.index()] = T2(0);
  1441. it2 = vect_begin(l2); ite2 = vect_end(l2);
  1442. }
  1443. if (ite1.index() > ite2.index()) {
  1444. l2[ite1.index() - 1] = T2(0);
  1445. it2 = vect_begin(l2);
  1446. }
  1447. it2 += it1.index() - it2.index();
  1448. for (; it1 != ite1; ++it1, ++it2) *it2 += *it1;
  1449. }
  1450. }
  1451. template <typename L1, typename L2>
  1452. void add(const L1& l1, L2& l2, abstract_dense, abstract_sparse) {
  1453. typename linalg_traits<L1>::const_iterator
  1454. it1 = vect_const_begin(l1), ite1 = vect_const_end(l1);
  1455. for (size_type i = 0; it1 != ite1; ++it1, ++i)
  1456. if (*it1 != typename linalg_traits<L1>::value_type(0)) l2[i] += *it1;
  1457. }
  1458. /* ******************************************************************** */
  1459. /* Matrix-vector mult */
  1460. /* ******************************************************************** */
  1461. ///@endcond
  1462. /** matrix-vector or matrix-matrix product.
  1463. @param l1 a matrix.
  1464. @param l2 a vector or matrix.
  1465. @param l3 the product l1*l2.
  1466. */
  1467. template <typename L1, typename L2, typename L3> inline
  1468. void mult(const L1& l1, const L2& l2, L3& l3) {
  1469. mult_dispatch(l1, l2, l3, typename linalg_traits<L2>::linalg_type());
  1470. }
  1471. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  1472. template <typename L1, typename L2, typename L3> inline
  1473. void mult(const L1& l1, const L2& l2, const L3& l3)
  1474. { mult(l1, l2, linalg_const_cast(l3)); }
  1475. template <typename L1, typename L2, typename L3> inline
  1476. void mult_dispatch(const L1& l1, const L2& l2, L3& l3, abstract_vector) {
  1477. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1478. if (!m || !n) { gmm::clear(l3); return; }
  1479. GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3), "dimensions mismatch");
  1480. if (!same_origin(l2, l3))
  1481. mult_spec(l1, l2, l3, typename principal_orientation_type<typename
  1482. linalg_traits<L1>::sub_orientation>::potype());
  1483. else {
  1484. GMM_WARNING2("Warning, A temporary is used for mult\n");
  1485. typename temporary_vector<L3>::vector_type temp(vect_size(l3));
  1486. mult_spec(l1, l2, temp, typename principal_orientation_type<typename
  1487. linalg_traits<L1>::sub_orientation>::potype());
  1488. copy(temp, l3);
  1489. }
  1490. }
  1491. template <typename L1, typename L2, typename L3>
  1492. void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
  1493. typedef typename linalg_traits<L3>::value_type T;
  1494. clear(l3);
  1495. size_type nr = mat_nrows(l1);
  1496. for (size_type i = 0; i < nr; ++i) {
  1497. T aux = vect_sp(mat_const_row(l1, i), l2);
  1498. if (aux != T(0)) l3[i] = aux;
  1499. }
  1500. }
  1501. template <typename L1, typename L2, typename L3>
  1502. void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
  1503. typedef typename linalg_traits<L3>::value_type T;
  1504. clear(l3);
  1505. size_type nr = mat_nrows(l1);
  1506. for (size_type i = 0; i < nr; ++i) {
  1507. T aux = vect_sp(mat_const_row(l1, i), l2);
  1508. if (aux != T(0)) l3[i] = aux;
  1509. }
  1510. }
  1511. #ifdef STORM_USE_TBB
  1512. /* Official Intel Hint on blocked_range vs. linear iterators: http://software.intel.com/en-us/forums/topic/289505
  1513. */
  1514. template <typename IT1, typename IT2>
  1515. class forward_range_mult {
  1516. IT1 my_begin;
  1517. IT1 my_end;
  1518. IT2 my_begin_row;
  1519. size_t my_size;
  1520. public:
  1521. IT1 begin() const {return my_begin;}
  1522. IT2 begin_row() const {return my_begin_row;}
  1523. IT1 end() const {return my_end;}
  1524. bool empty() const {return my_begin==my_end;}
  1525. bool is_divisible() const {return my_size>1;}
  1526. forward_range_mult( IT1 first, IT1 last, IT2 row_first, size_t size ) : my_begin(first), my_end(last), my_begin_row(row_first), my_size(size) {
  1527. assert( size==size_t(std::distance( first,last )));
  1528. }
  1529. forward_range_mult( IT1 first, IT1 last, IT2 row_first) : my_begin(first), my_end(last), my_begin_row(row_first) {
  1530. my_size = std::distance( first,last );
  1531. }
  1532. forward_range_mult( forward_range_mult& r, tbb::split ) {
  1533. size_t h = r.my_size/2;
  1534. my_end = r.my_end;
  1535. my_begin = r.my_begin;
  1536. my_begin_row = r.my_begin_row;
  1537. std::advance( my_begin, h ); // Might be scaling issue
  1538. std::advance( my_begin_row, h );
  1539. my_size = r.my_size-h;
  1540. r.my_end = my_begin;
  1541. r.my_size = h;
  1542. }
  1543. };
  1544. template <typename L1, typename L2, typename L3>
  1545. class tbbHelper_mult_by_row {
  1546. L2 const* my_l2;
  1547. // Typedefs for Iterator Types
  1548. typedef typename linalg_traits<L3>::iterator frm_IT1;
  1549. typedef typename linalg_traits<L1>::const_row_iterator frm_IT2;
  1550. public:
  1551. void operator()( const forward_range_mult<frm_IT1, frm_IT2>& r ) const {
  1552. L2 const& l2 = *my_l2;
  1553. frm_IT1 it = r.begin();
  1554. frm_IT1 ite = r.end();
  1555. frm_IT2 itr = r.begin_row();
  1556. for (; it != ite; ++it, ++itr) {
  1557. *it = vect_sp(linalg_traits<L1>::row(itr), l2,
  1558. typename linalg_traits<L1>::storage_type(),
  1559. typename linalg_traits<L2>::storage_type());
  1560. }
  1561. }
  1562. tbbHelper_mult_by_row(L2 const* l2) :
  1563. my_l2(l2)
  1564. {}
  1565. };
  1566. #endif
  1567. template <typename L1, typename L2, typename L3>
  1568. void mult_by_row(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1569. typename linalg_traits<L3>::iterator it=vect_begin(l3), ite=vect_end(l3);
  1570. typename linalg_traits<L1>::const_row_iterator
  1571. itr = mat_row_const_begin(l1);
  1572. #ifdef STORM_USE_TBB
  1573. tbb::parallel_for(forward_range_mult<typename linalg_traits<L3>::iterator, typename linalg_traits<L1>::const_row_iterator>(it, ite, itr), tbbHelper_mult_by_row<L1, L2, L3>(&l2));
  1574. #else
  1575. for (; it != ite; ++it, ++itr)
  1576. *it = vect_sp(linalg_traits<L1>::row(itr), l2,
  1577. typename linalg_traits<L1>::storage_type(),
  1578. typename linalg_traits<L2>::storage_type());
  1579. #endif
  1580. }
  1581. template <typename L1, typename L2, typename L3>
  1582. void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1583. clear(l3);
  1584. size_type nc = mat_ncols(l1);
  1585. for (size_type i = 0; i < nc; ++i)
  1586. add(scaled(mat_const_col(l1, i), l2[i]), l3);
  1587. }
  1588. template <typename L1, typename L2, typename L3>
  1589. void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
  1590. typedef typename linalg_traits<L2>::value_type T;
  1591. clear(l3);
  1592. typename linalg_traits<L2>::const_iterator it = vect_const_begin(l2),
  1593. ite = vect_const_end(l2);
  1594. for (; it != ite; ++it)
  1595. if (*it != T(0)) add(scaled(mat_const_col(l1, it.index()), *it), l3);
  1596. }
  1597. template <typename L1, typename L2, typename L3>
  1598. void mult_by_col(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
  1599. typedef typename linalg_traits<L2>::value_type T;
  1600. clear(l3);
  1601. typename linalg_traits<L2>::const_iterator it = vect_const_begin(l2),
  1602. ite = vect_const_end(l2);
  1603. for (; it != ite; ++it)
  1604. if (*it != T(0)) add(scaled(mat_const_col(l1, it.index()), *it), l3);
  1605. }
  1606. template <typename L1, typename L2, typename L3> inline
  1607. void mult_spec(const L1& l1, const L2& l2, L3& l3, row_major)
  1608. { mult_by_row(l1, l2, l3, typename linalg_traits<L3>::storage_type()); }
  1609. template <typename L1, typename L2, typename L3> inline
  1610. void mult_spec(const L1& l1, const L2& l2, L3& l3, col_major)
  1611. { mult_by_col(l1, l2, l3, typename linalg_traits<L2>::storage_type()); }
  1612. template <typename L1, typename L2, typename L3> inline
  1613. void mult_spec(const L1& l1, const L2& l2, L3& l3, abstract_null_type)
  1614. { mult_ind(l1, l2, l3, typename linalg_traits<L1>::storage_type()); }
  1615. template <typename L1, typename L2, typename L3>
  1616. void mult_ind(const L1& l1, const L2& l2, L3& l3, abstract_indirect) {
  1617. GMM_ASSERT1(false, "gmm::mult(m, ., .) undefined for this kind of matrix");
  1618. }
  1619. template <typename L1, typename L2, typename L3, typename L4> inline
  1620. void mult(const L1& l1, const L2& l2, const L3& l3, L4& l4) {
  1621. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1622. copy(l3, l4);
  1623. if (!m || !n) { gmm::copy(l3, l4); return; }
  1624. GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l4), "dimensions mismatch");
  1625. if (!same_origin(l2, l4)) {
  1626. mult_add_spec(l1, l2, l4, typename principal_orientation_type<typename
  1627. linalg_traits<L1>::sub_orientation>::potype());
  1628. }
  1629. else {
  1630. GMM_WARNING2("Warning, A temporary is used for mult\n");
  1631. typename temporary_vector<L2>::vector_type temp(vect_size(l2));
  1632. copy(l2, temp);
  1633. mult_add_spec(l1,temp, l4, typename principal_orientation_type<typename
  1634. linalg_traits<L1>::sub_orientation>::potype());
  1635. }
  1636. }
  1637. template <typename L1, typename L2, typename L3, typename L4> inline
  1638. void mult(const L1& l1, const L2& l2, const L3& l3, const L4& l4)
  1639. { mult(l1, l2, l3, linalg_const_cast(l4)); }
  1640. ///@endcond
  1641. /** Multiply-accumulate. l3 += l1*l2; */
  1642. template <typename L1, typename L2, typename L3> inline
  1643. void mult_add(const L1& l1, const L2& l2, L3& l3) {
  1644. size_type m = mat_nrows(l1), n = mat_ncols(l1);
  1645. if (!m || !n) return;
  1646. GMM_ASSERT2(n==vect_size(l2) && m==vect_size(l3), "dimensions mismatch");
  1647. if (!same_origin(l2, l3)) {
  1648. mult_add_spec(l1, l2, l3, typename principal_orientation_type<typename
  1649. linalg_traits<L1>::sub_orientation>::potype());
  1650. }
  1651. else {
  1652. GMM_WARNING2("Warning, A temporary is used for mult\n");
  1653. typename temporary_vector<L3>::vector_type temp(vect_size(l2));
  1654. copy(l2, temp);
  1655. mult_add_spec(l1,temp, l3, typename principal_orientation_type<typename
  1656. linalg_traits<L1>::sub_orientation>::potype());
  1657. }
  1658. }
  1659. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  1660. template <typename L1, typename L2, typename L3> inline
  1661. void mult_add(const L1& l1, const L2& l2, const L3& l3)
  1662. { mult_add(l1, l2, linalg_const_cast(l3)); }
  1663. template <typename L1, typename L2, typename L3>
  1664. void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
  1665. typedef typename linalg_traits<L3>::value_type T;
  1666. size_type nr = mat_nrows(l1);
  1667. for (size_type i = 0; i < nr; ++i) {
  1668. T aux = vect_sp(mat_const_row(l1, i), l2);
  1669. if (aux != T(0)) l3[i] += aux;
  1670. }
  1671. }
  1672. template <typename L1, typename L2, typename L3>
  1673. void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
  1674. typedef typename linalg_traits<L3>::value_type T;
  1675. size_type nr = mat_nrows(l1);
  1676. for (size_type i = 0; i < nr; ++i) {
  1677. T aux = vect_sp(mat_const_row(l1, i), l2);
  1678. if (aux != T(0)) l3[i] += aux;
  1679. }
  1680. }
  1681. template <typename L1, typename L2, typename L3>
  1682. void mult_add_by_row(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1683. typename linalg_traits<L3>::iterator it=vect_begin(l3), ite=vect_end(l3);
  1684. typename linalg_traits<L1>::const_row_iterator
  1685. itr = mat_row_const_begin(l1);
  1686. for (; it != ite; ++it, ++itr)
  1687. *it += vect_sp(linalg_traits<L1>::row(itr), l2);
  1688. }
  1689. template <typename L1, typename L2, typename L3>
  1690. void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_dense) {
  1691. size_type nc = mat_ncols(l1);
  1692. for (size_type i = 0; i < nc; ++i)
  1693. add(scaled(mat_const_col(l1, i), l2[i]), l3);
  1694. }
  1695. template <typename L1, typename L2, typename L3>
  1696. void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_sparse) {
  1697. typename linalg_traits<L2>::const_iterator it = vect_const_begin(l2),
  1698. ite = vect_const_end(l2);
  1699. for (; it != ite; ++it)
  1700. if (*it != typename linalg_traits<L2>::value_type(0))
  1701. add(scaled(mat_const_col(l1, it.index()), *it), l3);
  1702. }
  1703. template <typename L1, typename L2, typename L3>
  1704. void mult_add_by_col(const L1& l1, const L2& l2, L3& l3, abstract_skyline) {
  1705. typename linalg_traits<L2>::const_iterator it = vect_const_begin(l2),
  1706. ite = vect_const_end(l2);
  1707. for (; it != ite; ++it)
  1708. if (*it != typename linalg_traits<L2>::value_type(0))
  1709. add(scaled(mat_const_col(l1, it.index()), *it), l3);
  1710. }
  1711. template <typename L1, typename L2, typename L3> inline
  1712. void mult_add_spec(const L1& l1, const L2& l2, L3& l3, row_major)
  1713. { mult_add_by_row(l1, l2, l3, typename linalg_traits<L3>::storage_type()); }
  1714. template <typename L1, typename L2, typename L3> inline
  1715. void mult_add_spec(const L1& l1, const L2& l2, L3& l3, col_major)
  1716. { mult_add_by_col(l1, l2, l3, typename linalg_traits<L2>::storage_type()); }
  1717. template <typename L1, typename L2, typename L3> inline
  1718. void mult_add_spec(const L1& l1, const L2& l2, L3& l3, abstract_null_type)
  1719. { mult_ind(l1, l2, l3, typename linalg_traits<L1>::storage_type()); }
  1720. template <typename L1, typename L2, typename L3>
  1721. void transposed_mult(const L1& l1, const L2& l2, const L3& l3)
  1722. { mult(gmm::transposed(l1), l2, l3); }
  1723. /* ******************************************************************** */
  1724. /* Matrix-matrix mult */
  1725. /* ******************************************************************** */
  1726. struct g_mult {}; // generic mult, less optimized
  1727. struct c_mult {}; // col x col -> col mult
  1728. struct r_mult {}; // row x row -> row mult
  1729. struct rcmult {}; // row x col -> col mult
  1730. struct crmult {}; // col x row -> row mult
  1731. template<typename SO1, typename SO2, typename SO3> struct mult_t;
  1732. #define DEFMU__ template<> struct mult_t
  1733. DEFMU__<row_major , row_major , row_major > { typedef r_mult t; };
  1734. DEFMU__<row_major , row_major , col_major > { typedef g_mult t; };
  1735. DEFMU__<row_major , row_major , col_and_row> { typedef r_mult t; };
  1736. DEFMU__<row_major , row_major , row_and_col> { typedef r_mult t; };
  1737. DEFMU__<row_major , col_major , row_major > { typedef rcmult t; };
  1738. DEFMU__<row_major , col_major , col_major > { typedef rcmult t; };
  1739. DEFMU__<row_major , col_major , col_and_row> { typedef rcmult t; };
  1740. DEFMU__<row_major , col_major , row_and_col> { typedef rcmult t; };
  1741. DEFMU__<row_major , col_and_row, row_major > { typedef r_mult t; };
  1742. DEFMU__<row_major , col_and_row, col_major > { typedef rcmult t; };
  1743. DEFMU__<row_major , col_and_row, col_and_row> { typedef rcmult t; };
  1744. DEFMU__<row_major , col_and_row, row_and_col> { typedef rcmult t; };
  1745. DEFMU__<row_major , row_and_col, row_major > { typedef r_mult t; };
  1746. DEFMU__<row_major , row_and_col, col_major > { typedef rcmult t; };
  1747. DEFMU__<row_major , row_and_col, col_and_row> { typedef r_mult t; };
  1748. DEFMU__<row_major , row_and_col, row_and_col> { typedef r_mult t; };
  1749. DEFMU__<col_major , row_major , row_major > { typedef crmult t; };
  1750. DEFMU__<col_major , row_major , col_major > { typedef g_mult t; };
  1751. DEFMU__<col_major , row_major , col_and_row> { typedef crmult t; };
  1752. DEFMU__<col_major , row_major , row_and_col> { typedef crmult t; };
  1753. DEFMU__<col_major , col_major , row_major > { typedef g_mult t; };
  1754. DEFMU__<col_major , col_major , col_major > { typedef c_mult t; };
  1755. DEFMU__<col_major , col_major , col_and_row> { typedef c_mult t; };
  1756. DEFMU__<col_major , col_major , row_and_col> { typedef c_mult t; };
  1757. DEFMU__<col_major , col_and_row, row_major > { typedef crmult t; };
  1758. DEFMU__<col_major , col_and_row, col_major > { typedef c_mult t; };
  1759. DEFMU__<col_major , col_and_row, col_and_row> { typedef c_mult t; };
  1760. DEFMU__<col_major , col_and_row, row_and_col> { typedef c_mult t; };
  1761. DEFMU__<col_major , row_and_col, row_major > { typedef crmult t; };
  1762. DEFMU__<col_major , row_and_col, col_major > { typedef c_mult t; };
  1763. DEFMU__<col_major , row_and_col, col_and_row> { typedef c_mult t; };
  1764. DEFMU__<col_major , row_and_col, row_and_col> { typedef c_mult t; };
  1765. DEFMU__<col_and_row, row_major , row_major > { typedef r_mult t; };
  1766. DEFMU__<col_and_row, row_major , col_major > { typedef c_mult t; };
  1767. DEFMU__<col_and_row, row_major , col_and_row> { typedef r_mult t; };
  1768. DEFMU__<col_and_row, row_major , row_and_col> { typedef r_mult t; };
  1769. DEFMU__<col_and_row, col_major , row_major > { typedef rcmult t; };
  1770. DEFMU__<col_and_row, col_major , col_major > { typedef c_mult t; };
  1771. DEFMU__<col_and_row, col_major , col_and_row> { typedef c_mult t; };
  1772. DEFMU__<col_and_row, col_major , row_and_col> { typedef c_mult t; };
  1773. DEFMU__<col_and_row, col_and_row, row_major > { typedef r_mult t; };
  1774. DEFMU__<col_and_row, col_and_row, col_major > { typedef c_mult t; };
  1775. DEFMU__<col_and_row, col_and_row, col_and_row> { typedef c_mult t; };
  1776. DEFMU__<col_and_row, col_and_row, row_and_col> { typedef c_mult t; };
  1777. DEFMU__<col_and_row, row_and_col, row_major > { typedef r_mult t; };
  1778. DEFMU__<col_and_row, row_and_col, col_major > { typedef c_mult t; };
  1779. DEFMU__<col_and_row, row_and_col, col_and_row> { typedef c_mult t; };
  1780. DEFMU__<col_and_row, row_and_col, row_and_col> { typedef r_mult t; };
  1781. DEFMU__<row_and_col, row_major , row_major > { typedef r_mult t; };
  1782. DEFMU__<row_and_col, row_major , col_major > { typedef c_mult t; };
  1783. DEFMU__<row_and_col, row_major , col_and_row> { typedef r_mult t; };
  1784. DEFMU__<row_and_col, row_major , row_and_col> { typedef r_mult t; };
  1785. DEFMU__<row_and_col, col_major , row_major > { typedef rcmult t; };
  1786. DEFMU__<row_and_col, col_major , col_major > { typedef c_mult t; };
  1787. DEFMU__<row_and_col, col_major , col_and_row> { typedef c_mult t; };
  1788. DEFMU__<row_and_col, col_major , row_and_col> { typedef c_mult t; };
  1789. DEFMU__<row_and_col, col_and_row, row_major > { typedef rcmult t; };
  1790. DEFMU__<row_and_col, col_and_row, col_major > { typedef rcmult t; };
  1791. DEFMU__<row_and_col, col_and_row, col_and_row> { typedef rcmult t; };
  1792. DEFMU__<row_and_col, col_and_row, row_and_col> { typedef rcmult t; };
  1793. DEFMU__<row_and_col, row_and_col, row_major > { typedef r_mult t; };
  1794. DEFMU__<row_and_col, row_and_col, col_major > { typedef c_mult t; };
  1795. DEFMU__<row_and_col, row_and_col, col_and_row> { typedef r_mult t; };
  1796. DEFMU__<row_and_col, row_and_col, row_and_col> { typedef r_mult t; };
  1797. template <typename L1, typename L2, typename L3>
  1798. void mult_dispatch(const L1& l1, const L2& l2, L3& l3, abstract_matrix) {
  1799. typedef typename temporary_matrix<L3>::matrix_type temp_mat_type;
  1800. size_type m = mat_nrows(l1), n = mat_ncols(l1), k = mat_ncols(l2);
  1801. if (n == 0) { gmm::clear(l3); return; }
  1802. GMM_ASSERT2(n == mat_nrows(l2) && m == mat_nrows(l3) && k == mat_ncols(l3),
  1803. "dimensions mismatch");
  1804. if (same_origin(l2, l3) || same_origin(l1, l3)) {
  1805. GMM_WARNING2("A temporary is used for mult");
  1806. temp_mat_type temp(mat_nrows(l3), mat_ncols(l3));
  1807. mult_spec(l1, l2, temp, typename mult_t<
  1808. typename linalg_traits<L1>::sub_orientation,
  1809. typename linalg_traits<L2>::sub_orientation,
  1810. typename linalg_traits<temp_mat_type>::sub_orientation>::t());
  1811. copy(temp, l3);
  1812. }
  1813. else
  1814. mult_spec(l1, l2, l3, typename mult_t<
  1815. typename linalg_traits<L1>::sub_orientation,
  1816. typename linalg_traits<L2>::sub_orientation,
  1817. typename linalg_traits<L3>::sub_orientation>::t());
  1818. }
  1819. // Completely generic but inefficient
  1820. template <typename L1, typename L2, typename L3>
  1821. void mult_spec(const L1& l1, const L2& l2, L3& l3, g_mult) {
  1822. typedef typename linalg_traits<L3>::value_type T;
  1823. GMM_WARNING2("Inefficient generic matrix-matrix mult is used");
  1824. for (size_type i = 0; i < mat_nrows(l3) ; ++i)
  1825. for (size_type j = 0; j < mat_ncols(l3) ; ++j) {
  1826. T a(0);
  1827. for (size_type k = 0; k < mat_nrows(l2) ; ++k) a += l1(i, k)*l2(k, j);
  1828. l3(i, j) = a;
  1829. }
  1830. }
  1831. // row x col matrix-matrix mult
  1832. template <typename L1, typename L2, typename L3>
  1833. void mult_row_col_with_temp(const L1& l1, const L2& l2, L3& l3, col_major) {
  1834. typedef typename temporary_col_matrix<L1>::matrix_type temp_col_mat;
  1835. temp_col_mat temp(mat_nrows(l1), mat_ncols(l1));
  1836. copy(l1, temp);
  1837. mult(temp, l2, l3);
  1838. }
  1839. template <typename L1, typename L2, typename L3>
  1840. void mult_row_col_with_temp(const L1& l1, const L2& l2, L3& l3, row_major) {
  1841. typedef typename temporary_row_matrix<L2>::matrix_type temp_row_mat;
  1842. temp_row_mat temp(mat_nrows(l2), mat_ncols(l2));
  1843. copy(l2, temp);
  1844. mult(l1, temp, l3);
  1845. }
  1846. template <typename L1, typename L2, typename L3>
  1847. void mult_spec(const L1& l1, const L2& l2, L3& l3, rcmult) {
  1848. if (is_sparse(l1) && is_sparse(l2)) {
  1849. GMM_WARNING3("Inefficient row matrix - col matrix mult for "
  1850. "sparse matrices, using temporary");
  1851. mult_row_col_with_temp(l1, l2, l3,
  1852. typename principal_orientation_type<typename
  1853. linalg_traits<L3>::sub_orientation>::potype());
  1854. }
  1855. else {
  1856. typename linalg_traits<L2>::const_col_iterator
  1857. it2b = linalg_traits<L2>::col_begin(l2), it2,
  1858. ite = linalg_traits<L2>::col_end(l2);
  1859. size_type i,j, k = mat_nrows(l1);
  1860. for (i = 0; i < k; ++i) {
  1861. typename linalg_traits<L1>::const_sub_row_type r1=mat_const_row(l1, i);
  1862. for (it2 = it2b, j = 0; it2 != ite; ++it2, ++j)
  1863. l3(i,j) = vect_sp(r1, linalg_traits<L2>::col(it2));
  1864. }
  1865. }
  1866. }
  1867. // row - row matrix-matrix mult
  1868. template <typename L1, typename L2, typename L3> inline
  1869. void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult) {
  1870. mult_spec(l1, l2, l3,r_mult(),typename linalg_traits<L1>::storage_type());
  1871. }
  1872. template <typename L1, typename L2, typename L3>
  1873. void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_dense) {
  1874. // optimizable
  1875. clear(l3);
  1876. size_type nn = mat_nrows(l3), mm = mat_nrows(l2);
  1877. for (size_type i = 0; i < nn; ++i) {
  1878. for (size_type j = 0; j < mm; ++j) {
  1879. add(scaled(mat_const_row(l2, j), l1(i, j)), mat_row(l3, i));
  1880. }
  1881. }
  1882. }
  1883. template <typename L1, typename L2, typename L3>
  1884. void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_sparse) {
  1885. // optimizable
  1886. clear(l3);
  1887. size_type nn = mat_nrows(l3);
  1888. for (size_type i = 0; i < nn; ++i) {
  1889. typename linalg_traits<L1>::const_sub_row_type rl1=mat_const_row(l1, i);
  1890. typename linalg_traits<typename linalg_traits<L1>::const_sub_row_type>::
  1891. const_iterator it = vect_const_begin(rl1), ite = vect_const_end(rl1);
  1892. for (; it != ite; ++it)
  1893. add(scaled(mat_const_row(l2, it.index()), *it), mat_row(l3, i));
  1894. }
  1895. }
  1896. template <typename L1, typename L2, typename L3>
  1897. void mult_spec(const L1& l1, const L2& l2, L3& l3, r_mult, abstract_skyline)
  1898. { mult_spec(l1, l2, l3, r_mult(), abstract_sparse()); }
  1899. // col - col matrix-matrix mult
  1900. template <typename L1, typename L2, typename L3> inline
  1901. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult) {
  1902. mult_spec(l1, l2,l3,c_mult(),typename linalg_traits<L2>::storage_type(),
  1903. typename linalg_traits<L2>::sub_orientation());
  1904. }
  1905. template <typename L1, typename L2, typename L3, typename ORIEN>
  1906. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
  1907. abstract_dense, ORIEN) {
  1908. typedef typename linalg_traits<L2>::value_type T;
  1909. size_type nn = mat_ncols(l3), mm = mat_ncols(l1);
  1910. for (size_type i = 0; i < nn; ++i) {
  1911. clear(mat_col(l3, i));
  1912. for (size_type j = 0; j < mm; ++j) {
  1913. T b = l2(j, i);
  1914. if (b != T(0)) add(scaled(mat_const_col(l1, j), b), mat_col(l3, i));
  1915. }
  1916. }
  1917. }
  1918. template <typename L1, typename L2, typename L3, typename ORIEN>
  1919. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
  1920. abstract_sparse, ORIEN) {
  1921. // optimizable
  1922. clear(l3);
  1923. size_type nn = mat_ncols(l3);
  1924. for (size_type i = 0; i < nn; ++i) {
  1925. typename linalg_traits<L2>::const_sub_col_type rc2=mat_const_col(l2, i);
  1926. typename linalg_traits<typename linalg_traits<L2>::const_sub_col_type>::
  1927. const_iterator it = vect_const_begin(rc2), ite = vect_const_end(rc2);
  1928. for (; it != ite; ++it)
  1929. add(scaled(mat_const_col(l1, it.index()), *it), mat_col(l3, i));
  1930. }
  1931. }
  1932. template <typename L1, typename L2, typename L3>
  1933. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
  1934. abstract_sparse, row_major) {
  1935. typedef typename linalg_traits<L2>::value_type T;
  1936. GMM_WARNING3("Inefficient matrix-matrix mult for sparse matrices");
  1937. clear(l3);
  1938. size_type mm = mat_nrows(l2), nn = mat_ncols(l3);
  1939. for (size_type i = 0; i < nn; ++i)
  1940. for (size_type j = 0; j < mm; ++j) {
  1941. T a = l2(i,j);
  1942. if (a != T(0)) add(scaled(mat_const_col(l1, j), a), mat_col(l3, i));
  1943. }
  1944. }
  1945. template <typename L1, typename L2, typename L3, typename ORIEN> inline
  1946. void mult_spec(const L1& l1, const L2& l2, L3& l3, c_mult,
  1947. abstract_skyline, ORIEN)
  1948. { mult_spec(l1, l2, l3, c_mult(), abstract_sparse(), ORIEN()); }
  1949. // col - row matrix-matrix mult
  1950. template <typename L1, typename L2, typename L3> inline
  1951. void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult)
  1952. { mult_spec(l1,l2,l3,crmult(), typename linalg_traits<L1>::storage_type()); }
  1953. template <typename L1, typename L2, typename L3>
  1954. void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_dense) {
  1955. // optimizable
  1956. clear(l3);
  1957. size_type nn = mat_ncols(l1), mm = mat_nrows(l1);
  1958. for (size_type i = 0; i < nn; ++i) {
  1959. for (size_type j = 0; j < mm; ++j)
  1960. add(scaled(mat_const_row(l2, i), l1(j, i)), mat_row(l3, j));
  1961. }
  1962. }
  1963. template <typename L1, typename L2, typename L3>
  1964. void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_sparse) {
  1965. // optimizable
  1966. clear(l3);
  1967. size_type nn = mat_ncols(l1);
  1968. for (size_type i = 0; i < nn; ++i) {
  1969. typename linalg_traits<L1>::const_sub_col_type rc1=mat_const_col(l1, i);
  1970. typename linalg_traits<typename linalg_traits<L1>::const_sub_col_type>::
  1971. const_iterator it = vect_const_begin(rc1), ite = vect_const_end(rc1);
  1972. for (; it != ite; ++it)
  1973. add(scaled(mat_const_row(l2, i), *it), mat_row(l3, it.index()));
  1974. }
  1975. }
  1976. template <typename L1, typename L2, typename L3> inline
  1977. void mult_spec(const L1& l1, const L2& l2, L3& l3, crmult, abstract_skyline)
  1978. { mult_spec(l1, l2, l3, crmult(), abstract_sparse()); }
  1979. /* ******************************************************************** */
  1980. /* Symmetry test. */
  1981. /* ******************************************************************** */
  1982. ///@endcond
  1983. /** test if A is symmetric.
  1984. @param A a matrix.
  1985. @param tol a threshold.
  1986. */
  1987. template <typename MAT> inline
  1988. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol
  1989. = magnitude_of_linalg(MAT)(-1)) {
  1990. typedef magnitude_of_linalg(MAT) R;
  1991. if (tol < R(0)) tol = default_tol(R()) * mat_maxnorm(A);
  1992. if (mat_nrows(A) != mat_ncols(A)) return false;
  1993. return is_symmetric(A, tol, typename linalg_traits<MAT>::storage_type());
  1994. }
  1995. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  1996. template <typename MAT>
  1997. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  1998. abstract_dense) {
  1999. size_type m = mat_nrows(A);
  2000. for (size_type i = 1; i < m; ++i)
  2001. for (size_type j = 0; j < i; ++j)
  2002. if (gmm::abs(A(i, j)-A(j, i)) > tol) return false;
  2003. return true;
  2004. }
  2005. template <typename MAT>
  2006. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2007. abstract_sparse) {
  2008. return is_symmetric(A, tol, typename principal_orientation_type<typename
  2009. linalg_traits<MAT>::sub_orientation>::potype());
  2010. }
  2011. template <typename MAT>
  2012. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2013. row_major) {
  2014. for (size_type i = 0; i < mat_nrows(A); ++i) {
  2015. typedef typename linalg_traits<MAT>::const_sub_row_type row_type;
  2016. row_type row = mat_const_row(A, i);
  2017. typename linalg_traits<row_type>::const_iterator
  2018. it = vect_const_begin(row), ite = vect_const_end(row);
  2019. for (; it != ite; ++it)
  2020. if (gmm::abs(*it - A(it.index(), i)) > tol) return false;
  2021. }
  2022. return true;
  2023. }
  2024. template <typename MAT>
  2025. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2026. col_major) {
  2027. for (size_type i = 0; i < mat_ncols(A); ++i) {
  2028. typedef typename linalg_traits<MAT>::const_sub_col_type col_type;
  2029. col_type col = mat_const_col(A, i);
  2030. typename linalg_traits<col_type>::const_iterator
  2031. it = vect_const_begin(col), ite = vect_const_end(col);
  2032. for (; it != ite; ++it)
  2033. if (gmm::abs(*it - A(i, it.index())) > tol) return false;
  2034. }
  2035. return true;
  2036. }
  2037. template <typename MAT>
  2038. bool is_symmetric(const MAT &A, magnitude_of_linalg(MAT) tol,
  2039. abstract_skyline)
  2040. { return is_symmetric(A, tol, abstract_sparse()); }
  2041. ///@endcond
  2042. /** test if A is Hermitian.
  2043. @param A a matrix.
  2044. @param tol a threshold.
  2045. */
  2046. template <typename MAT> inline
  2047. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol
  2048. = magnitude_of_linalg(MAT)(-1)) {
  2049. typedef magnitude_of_linalg(MAT) R;
  2050. if (tol < R(0)) tol = default_tol(R()) * mat_maxnorm(A);
  2051. if (mat_nrows(A) != mat_ncols(A)) return false;
  2052. return is_hermitian(A, tol, typename linalg_traits<MAT>::storage_type());
  2053. }
  2054. ///@cond DOXY_SHOW_ALL_FUNCTIONS
  2055. template <typename MAT>
  2056. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2057. abstract_dense) {
  2058. size_type m = mat_nrows(A);
  2059. for (size_type i = 1; i < m; ++i)
  2060. for (size_type j = 0; j < i; ++j)
  2061. if (gmm::abs(A(i, j)-gmm::conj(A(j, i))) > tol) return false;
  2062. return true;
  2063. }
  2064. template <typename MAT>
  2065. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2066. abstract_sparse) {
  2067. return is_hermitian(A, tol, typename principal_orientation_type<typename
  2068. linalg_traits<MAT>::sub_orientation>::potype());
  2069. }
  2070. template <typename MAT>
  2071. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2072. row_major) {
  2073. for (size_type i = 0; i < mat_nrows(A); ++i) {
  2074. typedef typename linalg_traits<MAT>::const_sub_row_type row_type;
  2075. row_type row = mat_const_row(A, i);
  2076. typename linalg_traits<row_type>::const_iterator
  2077. it = vect_const_begin(row), ite = vect_const_end(row);
  2078. for (; it != ite; ++it)
  2079. if (gmm::abs(gmm::conj(*it) - A(it.index(), i)) > tol) return false;
  2080. }
  2081. return true;
  2082. }
  2083. template <typename MAT>
  2084. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2085. col_major) {
  2086. for (size_type i = 0; i < mat_ncols(A); ++i) {
  2087. typedef typename linalg_traits<MAT>::const_sub_col_type col_type;
  2088. col_type col = mat_const_col(A, i);
  2089. typename linalg_traits<col_type>::const_iterator
  2090. it = vect_const_begin(col), ite = vect_const_end(col);
  2091. for (; it != ite; ++it)
  2092. if (gmm::abs(gmm::conj(*it) - A(i, it.index())) > tol) return false;
  2093. }
  2094. return true;
  2095. }
  2096. template <typename MAT>
  2097. bool is_hermitian(const MAT &A, magnitude_of_linalg(MAT) tol,
  2098. abstract_skyline)
  2099. { return is_hermitian(A, tol, abstract_sparse()); }
  2100. ///@endcond
  2101. }
  2102. #endif // GMM_BLAS_H__