You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

357 lines
16 KiB

  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // Copyright (C) 2015 Gael Guennebaud <gael.guennebaud@inria.fr>
  5. //
  6. // This Source Code Form is subject to the terms of the Mozilla
  7. // Public License v. 2.0. If a copy of the MPL was not distributed
  8. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  9. #ifdef STORMEIGEN_DEFAULT_TO_ROW_MAJOR
  10. #undef STORMEIGEN_DEFAULT_TO_ROW_MAJOR
  11. #endif
  12. #define STORMEIGEN_DEBUG_ASSIGN
  13. #include "main.h"
  14. #include <typeinfo>
  15. using internal::demangle_flags;
  16. using internal::demangle_traversal;
  17. using internal::demangle_unrolling;
  18. template<typename Dst, typename Src>
  19. bool test_assign(const Dst&, const Src&, int traversal, int unrolling)
  20. {
  21. typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar> > traits;
  22. bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
  23. if(!res)
  24. {
  25. std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl;
  26. std::cerr << " " << demangle_flags(internal::evaluator<Src>::Flags) << std::endl;
  27. std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl;
  28. std::cerr << " " << demangle_flags(internal::evaluator<Dst>::Flags) << std::endl;
  29. traits::debug();
  30. std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
  31. << " got " << demangle_traversal(traits::Traversal) << "\n";
  32. std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
  33. << " got " << demangle_unrolling(traits::Unrolling) << "\n";
  34. }
  35. return res;
  36. }
  37. template<typename Dst, typename Src>
  38. bool test_assign(int traversal, int unrolling)
  39. {
  40. typedef internal::copy_using_evaluator_traits<internal::evaluator<Dst>,internal::evaluator<Src>, internal::assign_op<typename Dst::Scalar> > traits;
  41. bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
  42. if(!res)
  43. {
  44. std::cerr << "Src: " << demangle_flags(Src::Flags) << std::endl;
  45. std::cerr << " " << demangle_flags(internal::evaluator<Src>::Flags) << std::endl;
  46. std::cerr << "Dst: " << demangle_flags(Dst::Flags) << std::endl;
  47. std::cerr << " " << demangle_flags(internal::evaluator<Dst>::Flags) << std::endl;
  48. traits::debug();
  49. std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
  50. << " got " << demangle_traversal(traits::Traversal) << "\n";
  51. std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
  52. << " got " << demangle_unrolling(traits::Unrolling) << "\n";
  53. }
  54. return res;
  55. }
  56. template<typename Xpr>
  57. bool test_redux(const Xpr&, int traversal, int unrolling)
  58. {
  59. typedef internal::redux_traits<internal::scalar_sum_op<typename Xpr::Scalar>,internal::redux_evaluator<Xpr> > traits;
  60. bool res = traits::Traversal==traversal && traits::Unrolling==unrolling;
  61. if(!res)
  62. {
  63. std::cerr << demangle_flags(Xpr::Flags) << std::endl;
  64. std::cerr << demangle_flags(internal::evaluator<Xpr>::Flags) << std::endl;
  65. traits::debug();
  66. std::cerr << " Expected Traversal == " << demangle_traversal(traversal)
  67. << " got " << demangle_traversal(traits::Traversal) << "\n";
  68. std::cerr << " Expected Unrolling == " << demangle_unrolling(unrolling)
  69. << " got " << demangle_unrolling(traits::Unrolling) << "\n";
  70. }
  71. return res;
  72. }
  73. template<typename Scalar, bool Enable = internal::packet_traits<Scalar>::Vectorizable>
  74. struct vectorization_logic
  75. {
  76. typedef internal::packet_traits<Scalar> PacketTraits;
  77. typedef typename internal::packet_traits<Scalar>::type PacketType;
  78. typedef typename internal::unpacket_traits<PacketType>::half HalfPacketType;
  79. enum {
  80. PacketSize = internal::unpacket_traits<PacketType>::size,
  81. HalfPacketSize = internal::unpacket_traits<HalfPacketType>::size
  82. };
  83. static void run()
  84. {
  85. typedef Matrix<Scalar,PacketSize,1> Vector1;
  86. typedef Matrix<Scalar,Dynamic,1> VectorX;
  87. typedef Matrix<Scalar,Dynamic,Dynamic> MatrixXX;
  88. typedef Matrix<Scalar,PacketSize,PacketSize> Matrix11;
  89. typedef Matrix<Scalar,2*PacketSize,2*PacketSize> Matrix22;
  90. typedef Matrix<Scalar,(Matrix11::Flags&RowMajorBit)?16:4*PacketSize,(Matrix11::Flags&RowMajorBit)?4*PacketSize:16> Matrix44;
  91. typedef Matrix<Scalar,(Matrix11::Flags&RowMajorBit)?16:4*PacketSize,(Matrix11::Flags&RowMajorBit)?4*PacketSize:16,DontAlign|STORMEIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION> Matrix44u;
  92. typedef Matrix<Scalar,4*PacketSize,4*PacketSize,ColMajor> Matrix44c;
  93. typedef Matrix<Scalar,4*PacketSize,4*PacketSize,RowMajor> Matrix44r;
  94. typedef Matrix<Scalar,
  95. (PacketSize==8 ? 4 : PacketSize==4 ? 2 : PacketSize==2 ? 1 : /*PacketSize==1 ?*/ 1),
  96. (PacketSize==8 ? 2 : PacketSize==4 ? 2 : PacketSize==2 ? 2 : /*PacketSize==1 ?*/ 1)
  97. > Matrix1;
  98. typedef Matrix<Scalar,
  99. (PacketSize==8 ? 4 : PacketSize==4 ? 2 : PacketSize==2 ? 1 : /*PacketSize==1 ?*/ 1),
  100. (PacketSize==8 ? 2 : PacketSize==4 ? 2 : PacketSize==2 ? 2 : /*PacketSize==1 ?*/ 1),
  101. DontAlign|((Matrix1::Flags&RowMajorBit)?RowMajor:ColMajor)> Matrix1u;
  102. // this type is made such that it can only be vectorized when viewed as a linear 1D vector
  103. typedef Matrix<Scalar,
  104. (PacketSize==8 ? 4 : PacketSize==4 ? 6 : PacketSize==2 ? ((Matrix11::Flags&RowMajorBit)?2:3) : /*PacketSize==1 ?*/ 1),
  105. (PacketSize==8 ? 6 : PacketSize==4 ? 2 : PacketSize==2 ? ((Matrix11::Flags&RowMajorBit)?3:2) : /*PacketSize==1 ?*/ 3)
  106. > Matrix3;
  107. #if !STORMEIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
  108. VERIFY(test_assign(Vector1(),Vector1(),
  109. InnerVectorizedTraversal,CompleteUnrolling));
  110. VERIFY(test_assign(Vector1(),Vector1()+Vector1(),
  111. InnerVectorizedTraversal,CompleteUnrolling));
  112. VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
  113. InnerVectorizedTraversal,CompleteUnrolling));
  114. VERIFY(test_assign(Vector1(),Vector1().template cast<Scalar>(),
  115. InnerVectorizedTraversal,CompleteUnrolling));
  116. VERIFY(test_assign(Vector1(),Vector1(),
  117. InnerVectorizedTraversal,CompleteUnrolling));
  118. VERIFY(test_assign(Vector1(),Vector1()+Vector1(),
  119. InnerVectorizedTraversal,CompleteUnrolling));
  120. VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
  121. InnerVectorizedTraversal,CompleteUnrolling));
  122. VERIFY(test_assign(Matrix44(),Matrix44()+Matrix44(),
  123. InnerVectorizedTraversal,InnerUnrolling));
  124. VERIFY(test_assign(Matrix44u(),Matrix44()+Matrix44(),
  125. LinearTraversal,NoUnrolling));
  126. VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
  127. LinearTraversal,CompleteUnrolling));
  128. VERIFY(test_assign(Matrix44c().col(1),Matrix44c().col(2)+Matrix44c().col(3),
  129. InnerVectorizedTraversal,CompleteUnrolling));
  130. VERIFY(test_assign(Matrix44r().row(2),Matrix44r().row(1)+Matrix44r().row(1),
  131. InnerVectorizedTraversal,CompleteUnrolling));
  132. if(PacketSize>1)
  133. {
  134. typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
  135. VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
  136. LinearTraversal,CompleteUnrolling));
  137. VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
  138. LinearTraversal,CompleteUnrolling));
  139. VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
  140. PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
  141. VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
  142. HalfPacketSize==1 ? InnerVectorizedTraversal : LinearTraversal,NoUnrolling));
  143. VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
  144. DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
  145. }
  146. VERIFY(test_redux(Matrix3(),
  147. LinearVectorizedTraversal,CompleteUnrolling));
  148. VERIFY(test_redux(Matrix44(),
  149. LinearVectorizedTraversal,NoUnrolling));
  150. VERIFY(test_redux(Matrix44().template block<(Matrix1::Flags&RowMajorBit)?4:PacketSize,(Matrix1::Flags&RowMajorBit)?PacketSize:4>(1,2),
  151. DefaultTraversal,CompleteUnrolling));
  152. VERIFY(test_redux(Matrix44c().template block<2*PacketSize,1>(1,2),
  153. LinearVectorizedTraversal,CompleteUnrolling));
  154. VERIFY(test_redux(Matrix44r().template block<1,2*PacketSize>(2,1),
  155. LinearVectorizedTraversal,CompleteUnrolling));
  156. VERIFY((test_assign<
  157. Map<Matrix22, AlignedMax, OuterStride<3*PacketSize> >,
  158. Matrix22
  159. >(InnerVectorizedTraversal,CompleteUnrolling)));
  160. VERIFY((test_assign<
  161. Map<Matrix<Scalar,STORMEIGEN_PLAIN_ENUM_MAX(2,PacketSize),STORMEIGEN_PLAIN_ENUM_MAX(2,PacketSize)>, AlignedMax, InnerStride<3*PacketSize> >,
  162. Matrix<Scalar,STORMEIGEN_PLAIN_ENUM_MAX(2,PacketSize),STORMEIGEN_PLAIN_ENUM_MAX(2,PacketSize)>
  163. >(DefaultTraversal,CompleteUnrolling)));
  164. VERIFY((test_assign(Matrix11(), Matrix<Scalar,PacketSize,STORMEIGEN_PLAIN_ENUM_MIN(2,PacketSize)>()*Matrix<Scalar,STORMEIGEN_PLAIN_ENUM_MIN(2,PacketSize),PacketSize>(),
  165. InnerVectorizedTraversal, CompleteUnrolling)));
  166. #endif
  167. VERIFY(test_assign(MatrixXX(10,10),MatrixXX(20,20).block(10,10,2,3),
  168. SliceVectorizedTraversal,NoUnrolling));
  169. VERIFY(test_redux(VectorX(10),
  170. LinearVectorizedTraversal,NoUnrolling));
  171. }
  172. };
  173. template<typename Scalar> struct vectorization_logic<Scalar,false>
  174. {
  175. static void run() {}
  176. };
  177. template<typename Scalar, bool Enable = !internal::is_same<typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half,
  178. typename internal::packet_traits<Scalar>::type>::value >
  179. struct vectorization_logic_half
  180. {
  181. typedef internal::packet_traits<Scalar> PacketTraits;
  182. typedef typename internal::unpacket_traits<typename internal::packet_traits<Scalar>::type>::half PacketType;
  183. enum {
  184. PacketSize = internal::unpacket_traits<PacketType>::size
  185. };
  186. static void run()
  187. {
  188. typedef Matrix<Scalar,PacketSize,1> Vector1;
  189. typedef Matrix<Scalar,PacketSize,PacketSize> Matrix11;
  190. typedef Matrix<Scalar,5*PacketSize,7,ColMajor> Matrix57;
  191. typedef Matrix<Scalar,5*PacketSize,7,DontAlign|ColMajor> Matrix57u;
  192. // typedef Matrix<Scalar,(Matrix11::Flags&RowMajorBit)?16:4*PacketSize,(Matrix11::Flags&RowMajorBit)?4*PacketSize:16> Matrix44;
  193. // typedef Matrix<Scalar,(Matrix11::Flags&RowMajorBit)?16:4*PacketSize,(Matrix11::Flags&RowMajorBit)?4*PacketSize:16,DontAlign|STORMEIGEN_DEFAULT_MATRIX_STORAGE_ORDER_OPTION> Matrix44u;
  194. // typedef Matrix<Scalar,4*PacketSize,4*PacketSize,ColMajor> Matrix44c;
  195. // typedef Matrix<Scalar,4*PacketSize,4*PacketSize,RowMajor> Matrix44r;
  196. typedef Matrix<Scalar,
  197. (PacketSize==8 ? 4 : PacketSize==4 ? 2 : PacketSize==2 ? 1 : /*PacketSize==1 ?*/ 1),
  198. (PacketSize==8 ? 2 : PacketSize==4 ? 2 : PacketSize==2 ? 2 : /*PacketSize==1 ?*/ 1)
  199. > Matrix1;
  200. typedef Matrix<Scalar,
  201. (PacketSize==8 ? 4 : PacketSize==4 ? 2 : PacketSize==2 ? 1 : /*PacketSize==1 ?*/ 1),
  202. (PacketSize==8 ? 2 : PacketSize==4 ? 2 : PacketSize==2 ? 2 : /*PacketSize==1 ?*/ 1),
  203. DontAlign|((Matrix1::Flags&RowMajorBit)?RowMajor:ColMajor)> Matrix1u;
  204. // this type is made such that it can only be vectorized when viewed as a linear 1D vector
  205. typedef Matrix<Scalar,
  206. (PacketSize==8 ? 4 : PacketSize==4 ? 6 : PacketSize==2 ? ((Matrix11::Flags&RowMajorBit)?2:3) : /*PacketSize==1 ?*/ 1),
  207. (PacketSize==8 ? 6 : PacketSize==4 ? 2 : PacketSize==2 ? ((Matrix11::Flags&RowMajorBit)?3:2) : /*PacketSize==1 ?*/ 3)
  208. > Matrix3;
  209. #if !STORMEIGEN_GCC_AND_ARCH_DOESNT_WANT_STACK_ALIGNMENT
  210. VERIFY(test_assign(Vector1(),Vector1(),
  211. InnerVectorizedTraversal,CompleteUnrolling));
  212. VERIFY(test_assign(Vector1(),Vector1()+Vector1(),
  213. InnerVectorizedTraversal,CompleteUnrolling));
  214. VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
  215. InnerVectorizedTraversal,CompleteUnrolling));
  216. VERIFY(test_assign(Vector1(),Vector1().template cast<Scalar>(),
  217. InnerVectorizedTraversal,CompleteUnrolling));
  218. VERIFY(test_assign(Vector1(),Vector1(),
  219. InnerVectorizedTraversal,CompleteUnrolling));
  220. VERIFY(test_assign(Vector1(),Vector1()+Vector1(),
  221. InnerVectorizedTraversal,CompleteUnrolling));
  222. VERIFY(test_assign(Vector1(),Vector1().cwiseProduct(Vector1()),
  223. InnerVectorizedTraversal,CompleteUnrolling));
  224. VERIFY(test_assign(Matrix57(),Matrix57()+Matrix57(),
  225. InnerVectorizedTraversal,InnerUnrolling));
  226. VERIFY(test_assign(Matrix57u(),Matrix57()+Matrix57(),
  227. LinearTraversal,NoUnrolling));
  228. VERIFY(test_assign(Matrix1u(),Matrix1()+Matrix1(),
  229. LinearTraversal,CompleteUnrolling));
  230. if(PacketSize>1)
  231. {
  232. typedef Matrix<Scalar,3,3,ColMajor> Matrix33c;
  233. VERIFY(test_assign(Matrix33c().row(2),Matrix33c().row(1)+Matrix33c().row(1),
  234. LinearTraversal,CompleteUnrolling));
  235. VERIFY(test_assign(Matrix33c().col(0),Matrix33c().col(1)+Matrix33c().col(1),
  236. LinearTraversal,CompleteUnrolling));
  237. VERIFY(test_assign(Matrix3(),Matrix3().cwiseQuotient(Matrix3()),
  238. PacketTraits::HasDiv ? LinearVectorizedTraversal : LinearTraversal,CompleteUnrolling));
  239. VERIFY(test_assign(Matrix<Scalar,17,17>(),Matrix<Scalar,17,17>()+Matrix<Scalar,17,17>(),
  240. LinearTraversal,NoUnrolling));
  241. VERIFY(test_assign(Matrix11(),Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(2,3)+Matrix<Scalar,17,17>().template block<PacketSize,PacketSize>(8,4),
  242. DefaultTraversal,PacketSize>4?InnerUnrolling:CompleteUnrolling));
  243. }
  244. VERIFY(test_redux(Matrix3(),
  245. LinearVectorizedTraversal,CompleteUnrolling));
  246. VERIFY(test_redux(Matrix57(),
  247. LinearVectorizedTraversal,CompleteUnrolling));
  248. VERIFY(test_redux(Matrix57().template block<PacketSize,3>(1,0),
  249. DefaultTraversal,CompleteUnrolling));
  250. VERIFY((test_assign<
  251. Map<Matrix<Scalar,STORMEIGEN_PLAIN_ENUM_MAX(2,PacketSize),STORMEIGEN_PLAIN_ENUM_MAX(2,PacketSize)>, AlignedMax, InnerStride<3*PacketSize> >,
  252. Matrix<Scalar,STORMEIGEN_PLAIN_ENUM_MAX(2,PacketSize),STORMEIGEN_PLAIN_ENUM_MAX(2,PacketSize)>
  253. >(DefaultTraversal,CompleteUnrolling)));
  254. VERIFY((test_assign(Matrix57(), Matrix<Scalar,5*PacketSize,3>()*Matrix<Scalar,3,7>(),
  255. InnerVectorizedTraversal, CompleteUnrolling)));
  256. #endif
  257. }
  258. };
  259. template<typename Scalar> struct vectorization_logic_half<Scalar,false>
  260. {
  261. static void run() {}
  262. };
  263. void test_vectorization_logic()
  264. {
  265. #ifdef STORMEIGEN_VECTORIZE
  266. CALL_SUBTEST( vectorization_logic<int>::run() );
  267. CALL_SUBTEST( vectorization_logic<float>::run() );
  268. CALL_SUBTEST( vectorization_logic<double>::run() );
  269. CALL_SUBTEST( vectorization_logic<std::complex<float> >::run() );
  270. CALL_SUBTEST( vectorization_logic<std::complex<double> >::run() );
  271. CALL_SUBTEST( vectorization_logic_half<int>::run() );
  272. CALL_SUBTEST( vectorization_logic_half<float>::run() );
  273. CALL_SUBTEST( vectorization_logic_half<double>::run() );
  274. CALL_SUBTEST( vectorization_logic_half<std::complex<float> >::run() );
  275. CALL_SUBTEST( vectorization_logic_half<std::complex<double> >::run() );
  276. if(internal::packet_traits<float>::Vectorizable)
  277. {
  278. VERIFY(test_assign(Matrix<float,3,3>(),Matrix<float,3,3>()+Matrix<float,3,3>(),
  279. LinearTraversal,CompleteUnrolling));
  280. VERIFY(test_redux(Matrix<float,5,2>(),
  281. DefaultTraversal,CompleteUnrolling));
  282. }
  283. if(internal::packet_traits<double>::Vectorizable)
  284. {
  285. VERIFY(test_assign(Matrix<double,3,3>(),Matrix<double,3,3>()+Matrix<double,3,3>(),
  286. LinearTraversal,CompleteUnrolling));
  287. VERIFY(test_redux(Matrix<double,7,3>(),
  288. DefaultTraversal,CompleteUnrolling));
  289. }
  290. #endif // STORMEIGEN_VECTORIZE
  291. }