You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

589 lines
22 KiB

  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // Copyright (C) 2008-2009 Gael Guennebaud <gael.guennebaud@inria.fr>
  5. // Copyright (C) 2006-2008 Benoit Jacob <jacob.benoit.1@gmail.com>
  6. //
  7. // This Source Code Form is subject to the terms of the Mozilla
  8. // Public License v. 2.0. If a copy of the MPL was not distributed
  9. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  10. #include "main.h"
  11. // using namespace StormEigen;
  12. namespace StormEigen {
  13. namespace internal {
  14. template<typename T> T negate(const T& x) { return -x; }
  15. }
  16. }
  17. // NOTE: we disbale inlining for this function to workaround a GCC issue when using -O3 and the i387 FPU.
  18. template<typename Scalar> STORMEIGEN_DONT_INLINE
  19. bool isApproxAbs(const Scalar& a, const Scalar& b, const typename NumTraits<Scalar>::Real& refvalue)
  20. {
  21. return internal::isMuchSmallerThan(a-b, refvalue);
  22. }
  23. template<typename Scalar> bool areApproxAbs(const Scalar* a, const Scalar* b, int size, const typename NumTraits<Scalar>::Real& refvalue)
  24. {
  25. for (int i=0; i<size; ++i)
  26. {
  27. if (!isApproxAbs(a[i],b[i],refvalue))
  28. {
  29. std::cout << "ref: [" << Map<const Matrix<Scalar,1,Dynamic> >(a,size) << "]" << " != vec: [" << Map<const Matrix<Scalar,1,Dynamic> >(b,size) << "]\n";
  30. return false;
  31. }
  32. }
  33. return true;
  34. }
  35. template<typename Scalar> bool areApprox(const Scalar* a, const Scalar* b, int size)
  36. {
  37. for (int i=0; i<size; ++i)
  38. {
  39. if (a[i]!=b[i] && !internal::isApprox(a[i],b[i]))
  40. {
  41. std::cout << "ref: [" << Map<const Matrix<Scalar,1,Dynamic> >(a,size) << "]" << " != vec: [" << Map<const Matrix<Scalar,1,Dynamic> >(b,size) << "]\n";
  42. return false;
  43. }
  44. }
  45. return true;
  46. }
  47. #define CHECK_CWISE1(REFOP, POP) { \
  48. for (int i=0; i<PacketSize; ++i) \
  49. ref[i] = REFOP(data1[i]); \
  50. internal::pstore(data2, POP(internal::pload<Packet>(data1))); \
  51. VERIFY(areApprox(ref, data2, PacketSize) && #POP); \
  52. }
  53. template<bool Cond,typename Packet>
  54. struct packet_helper
  55. {
  56. template<typename T>
  57. inline Packet load(const T* from) const { return internal::pload<Packet>(from); }
  58. template<typename T>
  59. inline void store(T* to, const Packet& x) const { internal::pstore(to,x); }
  60. };
  61. template<typename Packet>
  62. struct packet_helper<false,Packet>
  63. {
  64. template<typename T>
  65. inline T load(const T* from) const { return *from; }
  66. template<typename T>
  67. inline void store(T* to, const T& x) const { *to = x; }
  68. };
  69. #define CHECK_CWISE1_IF(COND, REFOP, POP) if(COND) { \
  70. packet_helper<COND,Packet> h; \
  71. for (int i=0; i<PacketSize; ++i) \
  72. ref[i] = REFOP(data1[i]); \
  73. h.store(data2, POP(h.load(data1))); \
  74. VERIFY(areApprox(ref, data2, PacketSize) && #POP); \
  75. }
  76. #define CHECK_CWISE2_IF(COND, REFOP, POP) if(COND) { \
  77. packet_helper<COND,Packet> h; \
  78. for (int i=0; i<PacketSize; ++i) \
  79. ref[i] = REFOP(data1[i], data1[i+PacketSize]); \
  80. h.store(data2, POP(h.load(data1),h.load(data1+PacketSize))); \
  81. VERIFY(areApprox(ref, data2, PacketSize) && #POP); \
  82. }
  83. #define REF_ADD(a,b) ((a)+(b))
  84. #define REF_SUB(a,b) ((a)-(b))
  85. #define REF_MUL(a,b) ((a)*(b))
  86. #define REF_DIV(a,b) ((a)/(b))
  87. template<typename Scalar> void packetmath()
  88. {
  89. using std::abs;
  90. typedef internal::packet_traits<Scalar> PacketTraits;
  91. typedef typename PacketTraits::type Packet;
  92. const int PacketSize = PacketTraits::size;
  93. typedef typename NumTraits<Scalar>::Real RealScalar;
  94. const int max_size = PacketSize > 4 ? PacketSize : 4;
  95. const int size = PacketSize*max_size;
  96. STORMEIGEN_ALIGN_MAX Scalar data1[size];
  97. STORMEIGEN_ALIGN_MAX Scalar data2[size];
  98. STORMEIGEN_ALIGN_MAX Packet packets[PacketSize*2];
  99. STORMEIGEN_ALIGN_MAX Scalar ref[size];
  100. RealScalar refvalue = 0;
  101. for (int i=0; i<size; ++i)
  102. {
  103. data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
  104. data2[i] = internal::random<Scalar>()/RealScalar(PacketSize);
  105. refvalue = (std::max)(refvalue,abs(data1[i]));
  106. }
  107. internal::pstore(data2, internal::pload<Packet>(data1));
  108. VERIFY(areApprox(data1, data2, PacketSize) && "aligned load/store");
  109. for (int offset=0; offset<PacketSize; ++offset)
  110. {
  111. internal::pstore(data2, internal::ploadu<Packet>(data1+offset));
  112. VERIFY(areApprox(data1+offset, data2, PacketSize) && "internal::ploadu");
  113. }
  114. for (int offset=0; offset<PacketSize; ++offset)
  115. {
  116. internal::pstoreu(data2+offset, internal::pload<Packet>(data1));
  117. VERIFY(areApprox(data1, data2+offset, PacketSize) && "internal::pstoreu");
  118. }
  119. for (int offset=0; offset<PacketSize; ++offset)
  120. {
  121. packets[0] = internal::pload<Packet>(data1);
  122. packets[1] = internal::pload<Packet>(data1+PacketSize);
  123. if (offset==0) internal::palign<0>(packets[0], packets[1]);
  124. else if (offset==1) internal::palign<1>(packets[0], packets[1]);
  125. else if (offset==2) internal::palign<2>(packets[0], packets[1]);
  126. else if (offset==3) internal::palign<3>(packets[0], packets[1]);
  127. else if (offset==4) internal::palign<4>(packets[0], packets[1]);
  128. else if (offset==5) internal::palign<5>(packets[0], packets[1]);
  129. else if (offset==6) internal::palign<6>(packets[0], packets[1]);
  130. else if (offset==7) internal::palign<7>(packets[0], packets[1]);
  131. internal::pstore(data2, packets[0]);
  132. for (int i=0; i<PacketSize; ++i)
  133. ref[i] = data1[i+offset];
  134. VERIFY(areApprox(ref, data2, PacketSize) && "internal::palign");
  135. }
  136. VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasAdd);
  137. VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasSub);
  138. VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMul);
  139. VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasNegate);
  140. VERIFY((internal::is_same<Scalar,int>::value) || (!PacketTraits::Vectorizable) || PacketTraits::HasDiv);
  141. CHECK_CWISE2_IF(PacketTraits::HasAdd, REF_ADD, internal::padd);
  142. CHECK_CWISE2_IF(PacketTraits::HasSub, REF_SUB, internal::psub);
  143. CHECK_CWISE2_IF(PacketTraits::HasMul, REF_MUL, internal::pmul);
  144. CHECK_CWISE2_IF(PacketTraits::HasDiv, REF_DIV, internal::pdiv);
  145. CHECK_CWISE1(internal::negate, internal::pnegate);
  146. CHECK_CWISE1(numext::conj, internal::pconj);
  147. for(int offset=0;offset<3;++offset)
  148. {
  149. for (int i=0; i<PacketSize; ++i)
  150. ref[i] = data1[offset];
  151. internal::pstore(data2, internal::pset1<Packet>(data1[offset]));
  152. VERIFY(areApprox(ref, data2, PacketSize) && "internal::pset1");
  153. }
  154. {
  155. for (int i=0; i<PacketSize*4; ++i)
  156. ref[i] = data1[i/PacketSize];
  157. Packet A0, A1, A2, A3;
  158. internal::pbroadcast4<Packet>(data1, A0, A1, A2, A3);
  159. internal::pstore(data2+0*PacketSize, A0);
  160. internal::pstore(data2+1*PacketSize, A1);
  161. internal::pstore(data2+2*PacketSize, A2);
  162. internal::pstore(data2+3*PacketSize, A3);
  163. VERIFY(areApprox(ref, data2, 4*PacketSize) && "internal::pbroadcast4");
  164. }
  165. {
  166. for (int i=0; i<PacketSize*2; ++i)
  167. ref[i] = data1[i/PacketSize];
  168. Packet A0, A1;
  169. internal::pbroadcast2<Packet>(data1, A0, A1);
  170. internal::pstore(data2+0*PacketSize, A0);
  171. internal::pstore(data2+1*PacketSize, A1);
  172. VERIFY(areApprox(ref, data2, 2*PacketSize) && "internal::pbroadcast2");
  173. }
  174. VERIFY(internal::isApprox(data1[0], internal::pfirst(internal::pload<Packet>(data1))) && "internal::pfirst");
  175. if(PacketSize>1)
  176. {
  177. for(int offset=0;offset<4;++offset)
  178. {
  179. for(int i=0;i<PacketSize/2;++i)
  180. ref[2*i+0] = ref[2*i+1] = data1[offset+i];
  181. internal::pstore(data2,internal::ploaddup<Packet>(data1+offset));
  182. VERIFY(areApprox(ref, data2, PacketSize) && "ploaddup");
  183. }
  184. }
  185. if(PacketSize>2)
  186. {
  187. for(int offset=0;offset<4;++offset)
  188. {
  189. for(int i=0;i<PacketSize/4;++i)
  190. ref[4*i+0] = ref[4*i+1] = ref[4*i+2] = ref[4*i+3] = data1[offset+i];
  191. internal::pstore(data2,internal::ploadquad<Packet>(data1+offset));
  192. VERIFY(areApprox(ref, data2, PacketSize) && "ploadquad");
  193. }
  194. }
  195. ref[0] = 0;
  196. for (int i=0; i<PacketSize; ++i)
  197. ref[0] += data1[i];
  198. VERIFY(isApproxAbs(ref[0], internal::predux(internal::pload<Packet>(data1)), refvalue) && "internal::predux");
  199. {
  200. for (int i=0; i<4; ++i)
  201. ref[i] = 0;
  202. for (int i=0; i<PacketSize; ++i)
  203. ref[i%4] += data1[i];
  204. internal::pstore(data2, internal::predux4(internal::pload<Packet>(data1)));
  205. VERIFY(areApprox(ref, data2, PacketSize>4?PacketSize/2:PacketSize) && "internal::predux4");
  206. }
  207. ref[0] = 1;
  208. for (int i=0; i<PacketSize; ++i)
  209. ref[0] *= data1[i];
  210. VERIFY(internal::isApprox(ref[0], internal::predux_mul(internal::pload<Packet>(data1))) && "internal::predux_mul");
  211. for (int j=0; j<PacketSize; ++j)
  212. {
  213. ref[j] = 0;
  214. for (int i=0; i<PacketSize; ++i)
  215. ref[j] += data1[i+j*PacketSize];
  216. packets[j] = internal::pload<Packet>(data1+j*PacketSize);
  217. }
  218. internal::pstore(data2, internal::preduxp(packets));
  219. VERIFY(areApproxAbs(ref, data2, PacketSize, refvalue) && "internal::preduxp");
  220. for (int i=0; i<PacketSize; ++i)
  221. ref[i] = data1[PacketSize-i-1];
  222. internal::pstore(data2, internal::preverse(internal::pload<Packet>(data1)));
  223. VERIFY(areApprox(ref, data2, PacketSize) && "internal::preverse");
  224. internal::PacketBlock<Packet> kernel;
  225. for (int i=0; i<PacketSize; ++i) {
  226. kernel.packet[i] = internal::pload<Packet>(data1+i*PacketSize);
  227. }
  228. ptranspose(kernel);
  229. for (int i=0; i<PacketSize; ++i) {
  230. internal::pstore(data2, kernel.packet[i]);
  231. for (int j = 0; j < PacketSize; ++j) {
  232. VERIFY(isApproxAbs(data2[j], data1[i+j*PacketSize], refvalue) && "ptranspose");
  233. }
  234. }
  235. if (PacketTraits::HasBlend) {
  236. Packet thenPacket = internal::pload<Packet>(data1);
  237. Packet elsePacket = internal::pload<Packet>(data2);
  238. STORMEIGEN_ALIGN_MAX internal::Selector<PacketSize> selector;
  239. for (int i = 0; i < PacketSize; ++i) {
  240. selector.select[i] = i;
  241. }
  242. Packet blend = internal::pblend(selector, thenPacket, elsePacket);
  243. STORMEIGEN_ALIGN_MAX Scalar result[size];
  244. internal::pstore(result, blend);
  245. for (int i = 0; i < PacketSize; ++i) {
  246. VERIFY(isApproxAbs(result[i], (selector.select[i] ? data1[i] : data2[i]), refvalue));
  247. }
  248. }
  249. }
  250. template<typename Scalar> void packetmath_real()
  251. {
  252. using std::abs;
  253. typedef internal::packet_traits<Scalar> PacketTraits;
  254. typedef typename PacketTraits::type Packet;
  255. const int PacketSize = PacketTraits::size;
  256. const int size = PacketSize*4;
  257. STORMEIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4];
  258. STORMEIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4];
  259. STORMEIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4];
  260. for (int i=0; i<size; ++i)
  261. {
  262. data1[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-3,3));
  263. data2[i] = internal::random<Scalar>(-1,1) * std::pow(Scalar(10), internal::random<Scalar>(-3,3));
  264. }
  265. CHECK_CWISE1_IF(PacketTraits::HasSin, std::sin, internal::psin);
  266. CHECK_CWISE1_IF(PacketTraits::HasCos, std::cos, internal::pcos);
  267. CHECK_CWISE1_IF(PacketTraits::HasTan, std::tan, internal::ptan);
  268. CHECK_CWISE1_IF(PacketTraits::HasRound, numext::round, internal::pround);
  269. CHECK_CWISE1_IF(PacketTraits::HasCeil, numext::ceil, internal::pceil);
  270. CHECK_CWISE1_IF(PacketTraits::HasFloor, numext::floor, internal::pfloor);
  271. for (int i=0; i<size; ++i)
  272. {
  273. data1[i] = internal::random<Scalar>(-1,1);
  274. data2[i] = internal::random<Scalar>(-1,1);
  275. }
  276. CHECK_CWISE1_IF(PacketTraits::HasASin, std::asin, internal::pasin);
  277. CHECK_CWISE1_IF(PacketTraits::HasACos, std::acos, internal::pacos);
  278. for (int i=0; i<size; ++i)
  279. {
  280. data1[i] = internal::random<Scalar>(-87,88);
  281. data2[i] = internal::random<Scalar>(-87,88);
  282. }
  283. CHECK_CWISE1_IF(PacketTraits::HasExp, std::exp, internal::pexp);
  284. if(PacketTraits::HasExp && PacketTraits::size>=2)
  285. {
  286. data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
  287. data1[1] = std::numeric_limits<Scalar>::epsilon();
  288. packet_helper<PacketTraits::HasExp,Packet> h;
  289. h.store(data2, internal::pexp(h.load(data1)));
  290. VERIFY((numext::isnan)(data2[0]));
  291. VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::epsilon()), data2[1]);
  292. data1[0] = -std::numeric_limits<Scalar>::epsilon();
  293. data1[1] = 0;
  294. h.store(data2, internal::pexp(h.load(data1)));
  295. VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::epsilon()), data2[0]);
  296. VERIFY_IS_EQUAL(std::exp(Scalar(0)), data2[1]);
  297. data1[0] = (std::numeric_limits<Scalar>::min)();
  298. data1[1] = -(std::numeric_limits<Scalar>::min)();
  299. h.store(data2, internal::pexp(h.load(data1)));
  300. VERIFY_IS_EQUAL(std::exp((std::numeric_limits<Scalar>::min)()), data2[0]);
  301. VERIFY_IS_EQUAL(std::exp(-(std::numeric_limits<Scalar>::min)()), data2[1]);
  302. data1[0] = std::numeric_limits<Scalar>::denorm_min();
  303. data1[1] = -std::numeric_limits<Scalar>::denorm_min();
  304. h.store(data2, internal::pexp(h.load(data1)));
  305. VERIFY_IS_EQUAL(std::exp(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
  306. VERIFY_IS_EQUAL(std::exp(-std::numeric_limits<Scalar>::denorm_min()), data2[1]);
  307. }
  308. #ifdef STORMEIGEN_HAS_C99_MATH
  309. {
  310. data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
  311. packet_helper<internal::packet_traits<Scalar>::HasLGamma,Packet> h;
  312. h.store(data2, internal::plgamma(h.load(data1)));
  313. VERIFY((numext::isnan)(data2[0]));
  314. }
  315. {
  316. data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
  317. packet_helper<internal::packet_traits<Scalar>::HasErf,Packet> h;
  318. h.store(data2, internal::perf(h.load(data1)));
  319. VERIFY((numext::isnan)(data2[0]));
  320. }
  321. {
  322. data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
  323. packet_helper<internal::packet_traits<Scalar>::HasErfc,Packet> h;
  324. h.store(data2, internal::perfc(h.load(data1)));
  325. VERIFY((numext::isnan)(data2[0]));
  326. }
  327. #endif // STORMEIGEN_HAS_C99_MATH
  328. for (int i=0; i<size; ++i)
  329. {
  330. data1[i] = internal::random<Scalar>(0,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
  331. data2[i] = internal::random<Scalar>(0,1) * std::pow(Scalar(10), internal::random<Scalar>(-6,6));
  332. }
  333. if(internal::random<float>(0,1)<0.1)
  334. data1[internal::random<int>(0, PacketSize)] = 0;
  335. CHECK_CWISE1_IF(PacketTraits::HasSqrt, std::sqrt, internal::psqrt);
  336. CHECK_CWISE1_IF(PacketTraits::HasLog, std::log, internal::plog);
  337. #if defined(STORMEIGEN_HAS_C99_MATH) && (__cplusplus > 199711L)
  338. CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasLGamma, std::lgamma, internal::plgamma);
  339. CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErf, std::erf, internal::perf);
  340. CHECK_CWISE1_IF(internal::packet_traits<Scalar>::HasErfc, std::erfc, internal::perfc);
  341. #endif
  342. if(PacketTraits::HasLog && PacketTraits::size>=2)
  343. {
  344. data1[0] = std::numeric_limits<Scalar>::quiet_NaN();
  345. data1[1] = std::numeric_limits<Scalar>::epsilon();
  346. packet_helper<PacketTraits::HasLog,Packet> h;
  347. h.store(data2, internal::plog(h.load(data1)));
  348. VERIFY((numext::isnan)(data2[0]));
  349. VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::epsilon()), data2[1]);
  350. data1[0] = -std::numeric_limits<Scalar>::epsilon();
  351. data1[1] = 0;
  352. h.store(data2, internal::plog(h.load(data1)));
  353. VERIFY((numext::isnan)(data2[0]));
  354. VERIFY_IS_EQUAL(std::log(Scalar(0)), data2[1]);
  355. data1[0] = (std::numeric_limits<Scalar>::min)();
  356. data1[1] = -(std::numeric_limits<Scalar>::min)();
  357. h.store(data2, internal::plog(h.load(data1)));
  358. VERIFY_IS_EQUAL(std::log((std::numeric_limits<Scalar>::min)()), data2[0]);
  359. VERIFY((numext::isnan)(data2[1]));
  360. data1[0] = std::numeric_limits<Scalar>::denorm_min();
  361. data1[1] = -std::numeric_limits<Scalar>::denorm_min();
  362. h.store(data2, internal::plog(h.load(data1)));
  363. // VERIFY_IS_EQUAL(std::log(std::numeric_limits<Scalar>::denorm_min()), data2[0]);
  364. VERIFY((numext::isnan)(data2[1]));
  365. data1[0] = -1.0f;
  366. h.store(data2, internal::plog(h.load(data1)));
  367. VERIFY((numext::isnan)(data2[0]));
  368. #if !STORMEIGEN_FAST_MATH
  369. h.store(data2, internal::psqrt(h.load(data1)));
  370. VERIFY((numext::isnan)(data2[0]));
  371. VERIFY((numext::isnan)(data2[1]));
  372. #endif
  373. }
  374. }
  375. template<typename Scalar> void packetmath_notcomplex()
  376. {
  377. using std::abs;
  378. typedef internal::packet_traits<Scalar> PacketTraits;
  379. typedef typename PacketTraits::type Packet;
  380. const int PacketSize = PacketTraits::size;
  381. STORMEIGEN_ALIGN_MAX Scalar data1[PacketTraits::size*4];
  382. STORMEIGEN_ALIGN_MAX Scalar data2[PacketTraits::size*4];
  383. STORMEIGEN_ALIGN_MAX Scalar ref[PacketTraits::size*4];
  384. Array<Scalar,Dynamic,1>::Map(data1, PacketTraits::size*4).setRandom();
  385. ref[0] = data1[0];
  386. for (int i=0; i<PacketSize; ++i)
  387. ref[0] = (std::min)(ref[0],data1[i]);
  388. VERIFY(internal::isApprox(ref[0], internal::predux_min(internal::pload<Packet>(data1))) && "internal::predux_min");
  389. VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMin);
  390. VERIFY((!PacketTraits::Vectorizable) || PacketTraits::HasMax);
  391. CHECK_CWISE2_IF(PacketTraits::HasMin, (std::min), internal::pmin);
  392. CHECK_CWISE2_IF(PacketTraits::HasMax, (std::max), internal::pmax);
  393. CHECK_CWISE1(abs, internal::pabs);
  394. ref[0] = data1[0];
  395. for (int i=0; i<PacketSize; ++i)
  396. ref[0] = (std::max)(ref[0],data1[i]);
  397. VERIFY(internal::isApprox(ref[0], internal::predux_max(internal::pload<Packet>(data1))) && "internal::predux_max");
  398. for (int i=0; i<PacketSize; ++i)
  399. ref[i] = data1[0]+Scalar(i);
  400. internal::pstore(data2, internal::plset<Packet>(data1[0]));
  401. VERIFY(areApprox(ref, data2, PacketSize) && "internal::plset");
  402. }
  403. template<typename Scalar,bool ConjLhs,bool ConjRhs> void test_conj_helper(Scalar* data1, Scalar* data2, Scalar* ref, Scalar* pval)
  404. {
  405. typedef internal::packet_traits<Scalar> PacketTraits;
  406. typedef typename PacketTraits::type Packet;
  407. const int PacketSize = PacketTraits::size;
  408. internal::conj_if<ConjLhs> cj0;
  409. internal::conj_if<ConjRhs> cj1;
  410. internal::conj_helper<Scalar,Scalar,ConjLhs,ConjRhs> cj;
  411. internal::conj_helper<Packet,Packet,ConjLhs,ConjRhs> pcj;
  412. for(int i=0;i<PacketSize;++i)
  413. {
  414. ref[i] = cj0(data1[i]) * cj1(data2[i]);
  415. VERIFY(internal::isApprox(ref[i], cj.pmul(data1[i],data2[i])) && "conj_helper pmul");
  416. }
  417. internal::pstore(pval,pcj.pmul(internal::pload<Packet>(data1),internal::pload<Packet>(data2)));
  418. VERIFY(areApprox(ref, pval, PacketSize) && "conj_helper pmul");
  419. for(int i=0;i<PacketSize;++i)
  420. {
  421. Scalar tmp = ref[i];
  422. ref[i] += cj0(data1[i]) * cj1(data2[i]);
  423. VERIFY(internal::isApprox(ref[i], cj.pmadd(data1[i],data2[i],tmp)) && "conj_helper pmadd");
  424. }
  425. internal::pstore(pval,pcj.pmadd(internal::pload<Packet>(data1),internal::pload<Packet>(data2),internal::pload<Packet>(pval)));
  426. VERIFY(areApprox(ref, pval, PacketSize) && "conj_helper pmadd");
  427. }
  428. template<typename Scalar> void packetmath_complex()
  429. {
  430. typedef internal::packet_traits<Scalar> PacketTraits;
  431. typedef typename PacketTraits::type Packet;
  432. const int PacketSize = PacketTraits::size;
  433. const int size = PacketSize*4;
  434. STORMEIGEN_ALIGN_MAX Scalar data1[PacketSize*4];
  435. STORMEIGEN_ALIGN_MAX Scalar data2[PacketSize*4];
  436. STORMEIGEN_ALIGN_MAX Scalar ref[PacketSize*4];
  437. STORMEIGEN_ALIGN_MAX Scalar pval[PacketSize*4];
  438. for (int i=0; i<size; ++i)
  439. {
  440. data1[i] = internal::random<Scalar>() * Scalar(1e2);
  441. data2[i] = internal::random<Scalar>() * Scalar(1e2);
  442. }
  443. test_conj_helper<Scalar,false,false> (data1,data2,ref,pval);
  444. test_conj_helper<Scalar,false,true> (data1,data2,ref,pval);
  445. test_conj_helper<Scalar,true,false> (data1,data2,ref,pval);
  446. test_conj_helper<Scalar,true,true> (data1,data2,ref,pval);
  447. {
  448. for(int i=0;i<PacketSize;++i)
  449. ref[i] = Scalar(std::imag(data1[i]),std::real(data1[i]));
  450. internal::pstore(pval,internal::pcplxflip(internal::pload<Packet>(data1)));
  451. VERIFY(areApprox(ref, pval, PacketSize) && "pcplxflip");
  452. }
  453. }
  454. template<typename Scalar> void packetmath_scatter_gather()
  455. {
  456. typedef internal::packet_traits<Scalar> PacketTraits;
  457. typedef typename PacketTraits::type Packet;
  458. typedef typename NumTraits<Scalar>::Real RealScalar;
  459. const int PacketSize = PacketTraits::size;
  460. STORMEIGEN_ALIGN_MAX Scalar data1[PacketSize];
  461. RealScalar refvalue = 0;
  462. for (int i=0; i<PacketSize; ++i) {
  463. data1[i] = internal::random<Scalar>()/RealScalar(PacketSize);
  464. }
  465. int stride = internal::random<int>(1,20);
  466. STORMEIGEN_ALIGN_MAX Scalar buffer[PacketSize*20];
  467. memset(buffer, 0, 20*sizeof(Packet));
  468. Packet packet = internal::pload<Packet>(data1);
  469. internal::pscatter<Scalar, Packet>(buffer, packet, stride);
  470. for (int i = 0; i < PacketSize*20; ++i) {
  471. if ((i%stride) == 0 && i<stride*PacketSize) {
  472. VERIFY(isApproxAbs(buffer[i], data1[i/stride], refvalue) && "pscatter");
  473. } else {
  474. VERIFY(isApproxAbs(buffer[i], Scalar(0), refvalue) && "pscatter");
  475. }
  476. }
  477. for (int i=0; i<PacketSize*7; ++i) {
  478. buffer[i] = internal::random<Scalar>()/RealScalar(PacketSize);
  479. }
  480. packet = internal::pgather<Scalar, Packet>(buffer, 7);
  481. internal::pstore(data1, packet);
  482. for (int i = 0; i < PacketSize; ++i) {
  483. VERIFY(isApproxAbs(data1[i], buffer[i*7], refvalue) && "pgather");
  484. }
  485. }
  486. void test_packetmath()
  487. {
  488. for(int i = 0; i < g_repeat; i++) {
  489. CALL_SUBTEST_1( packetmath<float>() );
  490. CALL_SUBTEST_2( packetmath<double>() );
  491. CALL_SUBTEST_3( packetmath<int>() );
  492. CALL_SUBTEST_4( packetmath<std::complex<float> >() );
  493. CALL_SUBTEST_5( packetmath<std::complex<double> >() );
  494. CALL_SUBTEST_1( packetmath_notcomplex<float>() );
  495. CALL_SUBTEST_2( packetmath_notcomplex<double>() );
  496. CALL_SUBTEST_3( packetmath_notcomplex<int>() );
  497. CALL_SUBTEST_1( packetmath_real<float>() );
  498. CALL_SUBTEST_2( packetmath_real<double>() );
  499. CALL_SUBTEST_4( packetmath_complex<std::complex<float> >() );
  500. CALL_SUBTEST_5( packetmath_complex<std::complex<double> >() );
  501. CALL_SUBTEST_1( packetmath_scatter_gather<float>() );
  502. CALL_SUBTEST_2( packetmath_scatter_gather<double>() );
  503. CALL_SUBTEST_3( packetmath_scatter_gather<int>() );
  504. CALL_SUBTEST_4( packetmath_scatter_gather<std::complex<float> >() );
  505. CALL_SUBTEST_5( packetmath_scatter_gather<std::complex<double> >() );
  506. }
  507. }