You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

445 lines
15 KiB

  1. // This file is part of Eigen, a lightweight C++ template library
  2. // for linear algebra.
  3. //
  4. // Copyright (C) 2008 Gael Guennebaud <gael.guennebaud@inria.fr>
  5. // Copyright (C) 2007-2011 Benoit Jacob <jacob.benoit.1@gmail.com>
  6. //
  7. // This Source Code Form is subject to the terms of the Mozilla
  8. // Public License v. 2.0. If a copy of the MPL was not distributed
  9. // with this file, You can obtain one at http://mozilla.org/MPL/2.0/.
  10. #ifndef STORMEIGEN_CORE_H
  11. #define STORMEIGEN_CORE_H
  12. // first thing Eigen does: stop the compiler from committing suicide
  13. #include "src/Core/util/DisableStupidWarnings.h"
  14. // Handle NVCC/CUDA
  15. #ifdef __CUDACC__
  16. // Do not try asserts on CUDA!
  17. #ifndef STORMEIGEN_NO_DEBUG
  18. #define STORMEIGEN_NO_DEBUG
  19. #endif
  20. #ifdef STORMEIGEN_INTERNAL_DEBUGGING
  21. #undef STORMEIGEN_INTERNAL_DEBUGGING
  22. #endif
  23. // Do not try to vectorize on CUDA!
  24. #ifndef STORMEIGEN_DONT_VECTORIZE
  25. #define STORMEIGEN_DONT_VECTORIZE
  26. #endif
  27. #ifdef STORMEIGEN_EXCEPTIONS
  28. #undef STORMEIGEN_EXCEPTIONS
  29. #endif
  30. // All functions callable from CUDA code must be qualified with __device__
  31. #define STORMEIGEN_DEVICE_FUNC __host__ __device__
  32. #else
  33. #define STORMEIGEN_DEVICE_FUNC
  34. #endif
  35. #if defined(__CUDA_ARCH__)
  36. #define STORMEIGEN_USING_STD_MATH(FUNC) using ::FUNC;
  37. #else
  38. #define STORMEIGEN_USING_STD_MATH(FUNC) using std::FUNC;
  39. #endif
  40. #if (defined(_CPPUNWIND) || defined(__EXCEPTIONS)) && !defined(__CUDA_ARCH__) && !defined(STORMEIGEN_EXCEPTIONS)
  41. #define STORMEIGEN_EXCEPTIONS
  42. #endif
  43. #ifdef STORMEIGEN_EXCEPTIONS
  44. #include <new>
  45. #endif
  46. // then include this file where all our macros are defined. It's really important to do it first because
  47. // it's where we do all the alignment settings (platform detection and honoring the user's will if he
  48. // defined e.g. STORMEIGEN_DONT_ALIGN) so it needs to be done before we do anything with vectorization.
  49. #include "src/Core/util/Macros.h"
  50. // Disable the ipa-cp-clone optimization flag with MinGW 6.x or newer (enabled by default with -O3)
  51. // See http://eigen.tuxfamily.org/bz/show_bug.cgi?id=556 for details.
  52. #if STORMEIGEN_COMP_MINGW && STORMEIGEN_GNUC_AT_LEAST(4,6)
  53. #pragma GCC optimize ("-fno-ipa-cp-clone")
  54. #endif
  55. #include <complex>
  56. // this include file manages BLAS and MKL related macros
  57. // and inclusion of their respective header files
  58. #include "src/Core/util/MKL_support.h"
  59. // if alignment is disabled, then disable vectorization. Note: STORMEIGEN_MAX_ALIGN_BYTES is the proper check, it takes into
  60. // account both the user's will (STORMEIGEN_MAX_ALIGN_BYTES,STORMEIGEN_DONT_ALIGN) and our own platform checks
  61. #if STORMEIGEN_MAX_ALIGN_BYTES==0
  62. #ifndef STORMEIGEN_DONT_VECTORIZE
  63. #define STORMEIGEN_DONT_VECTORIZE
  64. #endif
  65. #endif
  66. #if STORMEIGEN_COMP_MSVC
  67. #include <malloc.h> // for _aligned_malloc -- need it regardless of whether vectorization is enabled
  68. #if (STORMEIGEN_COMP_MSVC >= 1500) // 2008 or later
  69. // Remember that usage of defined() in a #define is undefined by the standard.
  70. // a user reported that in 64-bit mode, MSVC doesn't care to define _M_IX86_FP.
  71. #if (defined(_M_IX86_FP) && (_M_IX86_FP >= 2)) || STORMEIGEN_ARCH_x86_64
  72. #define STORMEIGEN_SSE2_ON_MSVC_2008_OR_LATER
  73. #endif
  74. #endif
  75. #else
  76. // Remember that usage of defined() in a #define is undefined by the standard
  77. #if (defined __SSE2__) && ( (!STORMEIGEN_COMP_GNUC) || STORMEIGEN_COMP_ICC || STORMEIGEN_GNUC_AT_LEAST(4,2) )
  78. #define STORMEIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC
  79. #endif
  80. #endif
  81. #ifndef STORMEIGEN_DONT_VECTORIZE
  82. #if defined (STORMEIGEN_SSE2_ON_NON_MSVC_BUT_NOT_OLD_GCC) || defined(STORMEIGEN_SSE2_ON_MSVC_2008_OR_LATER)
  83. // Defines symbols for compile-time detection of which instructions are
  84. // used.
  85. // STORMEIGEN_VECTORIZE_YY is defined if and only if the instruction set YY is used
  86. #define STORMEIGEN_VECTORIZE
  87. #define STORMEIGEN_VECTORIZE_SSE
  88. #define STORMEIGEN_VECTORIZE_SSE2
  89. // Detect sse3/ssse3/sse4:
  90. // gcc and icc defines __SSE3__, ...
  91. // there is no way to know about this on msvc. You can define STORMEIGEN_VECTORIZE_SSE* if you
  92. // want to force the use of those instructions with msvc.
  93. #ifdef __SSE3__
  94. #define STORMEIGEN_VECTORIZE_SSE3
  95. #endif
  96. #ifdef __SSSE3__
  97. #define STORMEIGEN_VECTORIZE_SSSE3
  98. #endif
  99. #ifdef __SSE4_1__
  100. #define STORMEIGEN_VECTORIZE_SSE4_1
  101. #endif
  102. #ifdef __SSE4_2__
  103. #define STORMEIGEN_VECTORIZE_SSE4_2
  104. #endif
  105. #ifdef __AVX__
  106. #define STORMEIGEN_VECTORIZE_AVX
  107. #define STORMEIGEN_VECTORIZE_SSE3
  108. #define STORMEIGEN_VECTORIZE_SSSE3
  109. #define STORMEIGEN_VECTORIZE_SSE4_1
  110. #define STORMEIGEN_VECTORIZE_SSE4_2
  111. #endif
  112. #ifdef __AVX2__
  113. #define STORMEIGEN_VECTORIZE_AVX2
  114. #endif
  115. #ifdef __FMA__
  116. #define STORMEIGEN_VECTORIZE_FMA
  117. #endif
  118. // include files
  119. // This extern "C" works around a MINGW-w64 compilation issue
  120. // https://sourceforge.net/tracker/index.php?func=detail&aid=3018394&group_id=202880&atid=983354
  121. // In essence, intrin.h is included by windows.h and also declares intrinsics (just as emmintrin.h etc. below do).
  122. // However, intrin.h uses an extern "C" declaration, and g++ thus complains of duplicate declarations
  123. // with conflicting linkage. The linkage for intrinsics doesn't matter, but at that stage the compiler doesn't know;
  124. // so, to avoid compile errors when windows.h is included after StormEigen/Core, ensure intrinsics are extern "C" here too.
  125. // notice that since these are C headers, the extern "C" is theoretically needed anyways.
  126. extern "C" {
  127. // In theory we should only include immintrin.h and not the other *mmintrin.h header files directly.
  128. // Doing so triggers some issues with ICC. However old gcc versions seems to not have this file, thus:
  129. #if STORMEIGEN_COMP_ICC >= 1110
  130. #include <immintrin.h>
  131. #else
  132. #include <emmintrin.h>
  133. #include <xmmintrin.h>
  134. #ifdef STORMEIGEN_VECTORIZE_SSE3
  135. #include <pmmintrin.h>
  136. #endif
  137. #ifdef STORMEIGEN_VECTORIZE_SSSE3
  138. #include <tmmintrin.h>
  139. #endif
  140. #ifdef STORMEIGEN_VECTORIZE_SSE4_1
  141. #include <smmintrin.h>
  142. #endif
  143. #ifdef STORMEIGEN_VECTORIZE_SSE4_2
  144. #include <nmmintrin.h>
  145. #endif
  146. #ifdef STORMEIGEN_VECTORIZE_AVX
  147. #include <immintrin.h>
  148. #endif
  149. #endif
  150. } // end extern "C"
  151. #elif defined __VSX__
  152. #define STORMEIGEN_VECTORIZE
  153. #define STORMEIGEN_VECTORIZE_VSX
  154. #include <altivec.h>
  155. // We need to #undef all these ugly tokens defined in <altivec.h>
  156. // => use __vector instead of vector
  157. #undef bool
  158. #undef vector
  159. #undef pixel
  160. #elif defined __ALTIVEC__
  161. #define STORMEIGEN_VECTORIZE
  162. #define STORMEIGEN_VECTORIZE_ALTIVEC
  163. #include <altivec.h>
  164. // We need to #undef all these ugly tokens defined in <altivec.h>
  165. // => use __vector instead of vector
  166. #undef bool
  167. #undef vector
  168. #undef pixel
  169. #elif (defined __ARM_NEON) || (defined __ARM_NEON__)
  170. #define STORMEIGEN_VECTORIZE
  171. #define STORMEIGEN_VECTORIZE_NEON
  172. #include <arm_neon.h>
  173. #endif
  174. #endif
  175. #if defined __CUDACC__
  176. #define STORMEIGEN_VECTORIZE_CUDA
  177. #include <vector_types.h>
  178. #endif
  179. #if (defined _OPENMP) && (!defined STORMEIGEN_DONT_PARALLELIZE)
  180. #define STORMEIGEN_HAS_OPENMP
  181. #endif
  182. #ifdef STORMEIGEN_HAS_OPENMP
  183. #include <omp.h>
  184. #endif
  185. // MSVC for windows mobile does not have the errno.h file
  186. #if !(STORMEIGEN_COMP_MSVC && STORMEIGEN_OS_WINCE) && !STORMEIGEN_COMP_ARM
  187. #define STORMEIGEN_HAS_ERRNO
  188. #endif
  189. #ifdef STORMEIGEN_HAS_ERRNO
  190. #include <cerrno>
  191. #endif
  192. #include <cstddef>
  193. #include <cstdlib>
  194. #include <cmath>
  195. #include <cassert>
  196. #include <functional>
  197. #include <iosfwd>
  198. #include <cstring>
  199. #include <string>
  200. #include <limits>
  201. #include <climits> // for CHAR_BIT
  202. // for min/max:
  203. #include <algorithm>
  204. // for outputting debug info
  205. #ifdef STORMEIGEN_DEBUG_ASSIGN
  206. #include <iostream>
  207. #endif
  208. // required for __cpuid, needs to be included after cmath
  209. #if STORMEIGEN_COMP_MSVC && STORMEIGEN_ARCH_i386_OR_x86_64 && !STORMEIGEN_OS_WINCE
  210. #include <intrin.h>
  211. #endif
  212. /** \brief Namespace containing all symbols from the %Eigen library. */
  213. namespace StormEigen {
  214. inline static const char *SimdInstructionSetsInUse(void) {
  215. #if defined(STORMEIGEN_VECTORIZE_AVX)
  216. return "AVX SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
  217. #elif defined(STORMEIGEN_VECTORIZE_SSE4_2)
  218. return "SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2";
  219. #elif defined(STORMEIGEN_VECTORIZE_SSE4_1)
  220. return "SSE, SSE2, SSE3, SSSE3, SSE4.1";
  221. #elif defined(STORMEIGEN_VECTORIZE_SSSE3)
  222. return "SSE, SSE2, SSE3, SSSE3";
  223. #elif defined(STORMEIGEN_VECTORIZE_SSE3)
  224. return "SSE, SSE2, SSE3";
  225. #elif defined(STORMEIGEN_VECTORIZE_SSE2)
  226. return "SSE, SSE2";
  227. #elif defined(STORMEIGEN_VECTORIZE_ALTIVEC)
  228. return "AltiVec";
  229. #elif defined(STORMEIGEN_VECTORIZE_VSX)
  230. return "VSX";
  231. #elif defined(STORMEIGEN_VECTORIZE_NEON)
  232. return "ARM NEON";
  233. #else
  234. return "None";
  235. #endif
  236. }
  237. } // end namespace StormEigen
  238. #if defined STORMEIGEN2_SUPPORT_STAGE40_FULL_STORMEIGEN3_STRICTNESS || defined STORMEIGEN2_SUPPORT_STAGE30_FULL_STORMEIGEN3_API || defined STORMEIGEN2_SUPPORT_STAGE20_RESOLVE_API_CONFLICTS || defined STORMEIGEN2_SUPPORT_STAGE10_FULL_STORMEIGEN2_API || defined STORMEIGEN2_SUPPORT
  239. // This will generate an error message:
  240. #error Eigen2-support is only available up to version 3.2. Please go to "http://eigen.tuxfamily.org/index.php?title=Eigen2" for further information
  241. #endif
  242. // we use size_t frequently and we'll never remember to prepend it with std:: everytime just to
  243. // ensure QNX/QCC support
  244. using std::size_t;
  245. // gcc 4.6.0 wants std:: for ptrdiff_t
  246. using std::ptrdiff_t;
  247. /** \defgroup Core_Module Core module
  248. * This is the main module of Eigen providing dense matrix and vector support
  249. * (both fixed and dynamic size) with all the features corresponding to a BLAS library
  250. * and much more...
  251. *
  252. * \code
  253. * #include <StormEigen/Core>
  254. * \endcode
  255. */
  256. #include "src/Core/util/Constants.h"
  257. #include "src/Core/util/Meta.h"
  258. #include "src/Core/util/ForwardDeclarations.h"
  259. #include "src/Core/util/StaticAssert.h"
  260. #include "src/Core/util/XprHelper.h"
  261. #include "src/Core/util/Memory.h"
  262. #include "src/Core/NumTraits.h"
  263. #include "src/Core/MathFunctions.h"
  264. #include "src/Core/SpecialFunctions.h"
  265. #include "src/Core/GenericPacketMath.h"
  266. #if defined STORMEIGEN_VECTORIZE_AVX
  267. // Use AVX for floats and doubles, SSE for integers
  268. #include "src/Core/arch/SSE/PacketMath.h"
  269. #include "src/Core/arch/SSE/Complex.h"
  270. #include "src/Core/arch/SSE/MathFunctions.h"
  271. #include "src/Core/arch/AVX/PacketMath.h"
  272. #include "src/Core/arch/AVX/MathFunctions.h"
  273. #include "src/Core/arch/AVX/Complex.h"
  274. #include "src/Core/arch/AVX/TypeCasting.h"
  275. #elif defined STORMEIGEN_VECTORIZE_SSE
  276. #include "src/Core/arch/SSE/PacketMath.h"
  277. #include "src/Core/arch/SSE/MathFunctions.h"
  278. #include "src/Core/arch/SSE/Complex.h"
  279. #include "src/Core/arch/SSE/TypeCasting.h"
  280. #elif defined(STORMEIGEN_VECTORIZE_ALTIVEC) || defined(STORMEIGEN_VECTORIZE_VSX)
  281. #include "src/Core/arch/AltiVec/PacketMath.h"
  282. #include "src/Core/arch/AltiVec/MathFunctions.h"
  283. #include "src/Core/arch/AltiVec/Complex.h"
  284. #elif defined STORMEIGEN_VECTORIZE_NEON
  285. #include "src/Core/arch/NEON/PacketMath.h"
  286. #include "src/Core/arch/NEON/MathFunctions.h"
  287. #include "src/Core/arch/NEON/Complex.h"
  288. #endif
  289. #if defined STORMEIGEN_VECTORIZE_CUDA
  290. #include "src/Core/arch/CUDA/PacketMath.h"
  291. #include "src/Core/arch/CUDA/MathFunctions.h"
  292. #endif
  293. #include "src/Core/arch/Default/Settings.h"
  294. #include "src/Core/functors/BinaryFunctors.h"
  295. #include "src/Core/functors/UnaryFunctors.h"
  296. #include "src/Core/functors/NullaryFunctors.h"
  297. #include "src/Core/functors/StlFunctors.h"
  298. #include "src/Core/functors/AssignmentFunctors.h"
  299. #include "src/Core/DenseCoeffsBase.h"
  300. #include "src/Core/DenseBase.h"
  301. #include "src/Core/MatrixBase.h"
  302. #include "src/Core/EigenBase.h"
  303. #include "src/Core/Product.h"
  304. #include "src/Core/CoreEvaluators.h"
  305. #include "src/Core/AssignEvaluator.h"
  306. #ifndef STORMEIGEN_PARSED_BY_DOXYGEN // work around Doxygen bug triggered by Assign.h r814874
  307. // at least confirmed with Doxygen 1.5.5 and 1.5.6
  308. #include "src/Core/Assign.h"
  309. #endif
  310. #include "src/Core/ArrayBase.h"
  311. #include "src/Core/util/BlasUtil.h"
  312. #include "src/Core/DenseStorage.h"
  313. #include "src/Core/NestByValue.h"
  314. // #include "src/Core/ForceAlignedAccess.h"
  315. #include "src/Core/ReturnByValue.h"
  316. #include "src/Core/NoAlias.h"
  317. #include "src/Core/PlainObjectBase.h"
  318. #include "src/Core/Matrix.h"
  319. #include "src/Core/Array.h"
  320. #include "src/Core/CwiseBinaryOp.h"
  321. #include "src/Core/CwiseUnaryOp.h"
  322. #include "src/Core/CwiseNullaryOp.h"
  323. #include "src/Core/CwiseUnaryView.h"
  324. #include "src/Core/SelfCwiseBinaryOp.h"
  325. #include "src/Core/Dot.h"
  326. #include "src/Core/StableNorm.h"
  327. #include "src/Core/Stride.h"
  328. #include "src/Core/MapBase.h"
  329. #include "src/Core/Map.h"
  330. #include "src/Core/Ref.h"
  331. #include "src/Core/Block.h"
  332. #include "src/Core/VectorBlock.h"
  333. #include "src/Core/Transpose.h"
  334. #include "src/Core/DiagonalMatrix.h"
  335. #include "src/Core/Diagonal.h"
  336. #include "src/Core/DiagonalProduct.h"
  337. #include "src/Core/Redux.h"
  338. #include "src/Core/Visitor.h"
  339. #include "src/Core/Fuzzy.h"
  340. #include "src/Core/IO.h"
  341. #include "src/Core/Swap.h"
  342. #include "src/Core/CommaInitializer.h"
  343. #include "src/Core/GeneralProduct.h"
  344. #include "src/Core/Solve.h"
  345. #include "src/Core/Inverse.h"
  346. #include "src/Core/SolverBase.h"
  347. #include "src/Core/PermutationMatrix.h"
  348. #include "src/Core/Transpositions.h"
  349. #include "src/Core/TriangularMatrix.h"
  350. #include "src/Core/SelfAdjointView.h"
  351. #include "src/Core/products/GeneralBlockPanelKernel.h"
  352. #include "src/Core/products/Parallelizer.h"
  353. #include "src/Core/ProductEvaluators.h"
  354. #include "src/Core/products/GeneralMatrixVector.h"
  355. #include "src/Core/products/GeneralMatrixMatrix.h"
  356. #include "src/Core/SolveTriangular.h"
  357. #include "src/Core/products/GeneralMatrixMatrixTriangular.h"
  358. #include "src/Core/products/SelfadjointMatrixVector.h"
  359. #include "src/Core/products/SelfadjointMatrixMatrix.h"
  360. #include "src/Core/products/SelfadjointProduct.h"
  361. #include "src/Core/products/SelfadjointRank2Update.h"
  362. #include "src/Core/products/TriangularMatrixVector.h"
  363. #include "src/Core/products/TriangularMatrixMatrix.h"
  364. #include "src/Core/products/TriangularSolverMatrix.h"
  365. #include "src/Core/products/TriangularSolverVector.h"
  366. #include "src/Core/BandMatrix.h"
  367. #include "src/Core/CoreIterators.h"
  368. #include "src/Core/BooleanRedux.h"
  369. #include "src/Core/Select.h"
  370. #include "src/Core/VectorwiseOp.h"
  371. #include "src/Core/Random.h"
  372. #include "src/Core/Replicate.h"
  373. #include "src/Core/Reverse.h"
  374. #include "src/Core/ArrayWrapper.h"
  375. #ifdef STORMEIGEN_USE_BLAS
  376. #include "src/Core/products/GeneralMatrixMatrix_MKL.h"
  377. #include "src/Core/products/GeneralMatrixVector_MKL.h"
  378. #include "src/Core/products/GeneralMatrixMatrixTriangular_MKL.h"
  379. #include "src/Core/products/SelfadjointMatrixMatrix_MKL.h"
  380. #include "src/Core/products/SelfadjointMatrixVector_MKL.h"
  381. #include "src/Core/products/TriangularMatrixMatrix_MKL.h"
  382. #include "src/Core/products/TriangularMatrixVector_MKL.h"
  383. #include "src/Core/products/TriangularSolverMatrix_MKL.h"
  384. #endif // STORMEIGEN_USE_BLAS
  385. #ifdef STORMEIGEN_USE_MKL_VML
  386. #include "src/Core/Assign_MKL.h"
  387. #endif
  388. #include "src/Core/GlobalFunctions.h"
  389. #include "src/Core/util/ReenableStupidWarnings.h"
  390. #endif // STORMEIGEN_CORE_H