You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

161 lines
4.8 KiB

  1. // workaround issue between gcc >= 4.7 and cuda 5.5
  2. #if (defined __GNUC__) && (__GNUC__>4 || __GNUC_MINOR__>=7)
  3. #undef _GLIBCXX_ATOMIC_BUILTINS
  4. #undef _GLIBCXX_USE_INT128
  5. #endif
  6. #define EIGEN_TEST_NO_LONGDOUBLE
  7. #define EIGEN_TEST_NO_COMPLEX
  8. #define EIGEN_TEST_FUNC cuda_basic
  9. #define EIGEN_DEFAULT_DENSE_INDEX_TYPE int
  10. #include <math_constants.h>
  11. #include "main.h"
  12. #include "cuda_common.h"
  13. #include <Eigen/Eigenvalues>
  14. // struct Foo{
  15. // EIGEN_DEVICE_FUNC
  16. // void operator()(int i, const float* mats, float* vecs) const {
  17. // using namespace StormEigen;
  18. // // Matrix3f M(data);
  19. // // Vector3f x(data+9);
  20. // // Map<Vector3f>(data+9) = M.inverse() * x;
  21. // Matrix3f M(mats+i/16);
  22. // Vector3f x(vecs+i*3);
  23. // // using std::min;
  24. // // using std::sqrt;
  25. // Map<Vector3f>(vecs+i*3) << x.minCoeff(), 1, 2;// / x.dot(x);//(M.inverse() * x) / x.x();
  26. // //x = x*2 + x.y() * x + x * x.maxCoeff() - x / x.sum();
  27. // }
  28. // };
  29. template<typename T>
  30. struct coeff_wise {
  31. EIGEN_DEVICE_FUNC
  32. void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
  33. {
  34. using namespace StormEigen;
  35. T x1(in+i);
  36. T x2(in+i+1);
  37. T x3(in+i+2);
  38. Map<T> res(out+i*T::MaxSizeAtCompileTime);
  39. res.array() += (in[0] * x1 + x2).array() * x3.array();
  40. }
  41. };
  42. template<typename T>
  43. struct replicate {
  44. EIGEN_DEVICE_FUNC
  45. void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
  46. {
  47. using namespace StormEigen;
  48. T x1(in+i);
  49. int step = x1.size() * 4;
  50. int stride = 3 * step;
  51. typedef Map<Array<typename T::Scalar,Dynamic,Dynamic> > MapType;
  52. MapType(out+i*stride+0*step, x1.rows()*2, x1.cols()*2) = x1.replicate(2,2);
  53. MapType(out+i*stride+1*step, x1.rows()*3, x1.cols()) = in[i] * x1.colwise().replicate(3);
  54. MapType(out+i*stride+2*step, x1.rows(), x1.cols()*3) = in[i] * x1.rowwise().replicate(3);
  55. }
  56. };
  57. template<typename T>
  58. struct redux {
  59. EIGEN_DEVICE_FUNC
  60. void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
  61. {
  62. using namespace StormEigen;
  63. int N = 10;
  64. T x1(in+i);
  65. out[i*N+0] = x1.minCoeff();
  66. out[i*N+1] = x1.maxCoeff();
  67. out[i*N+2] = x1.sum();
  68. out[i*N+3] = x1.prod();
  69. out[i*N+4] = x1.matrix().squaredNorm();
  70. out[i*N+5] = x1.matrix().norm();
  71. out[i*N+6] = x1.colwise().sum().maxCoeff();
  72. out[i*N+7] = x1.rowwise().maxCoeff().sum();
  73. out[i*N+8] = x1.matrix().colwise().squaredNorm().sum();
  74. }
  75. };
  76. template<typename T1, typename T2>
  77. struct prod_test {
  78. EIGEN_DEVICE_FUNC
  79. void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
  80. {
  81. using namespace StormEigen;
  82. typedef Matrix<typename T1::Scalar, T1::RowsAtCompileTime, T2::ColsAtCompileTime> T3;
  83. T1 x1(in+i);
  84. T2 x2(in+i+1);
  85. Map<T3> res(out+i*T3::MaxSizeAtCompileTime);
  86. res += in[i] * x1 * x2;
  87. }
  88. };
  89. template<typename T1, typename T2>
  90. struct diagonal {
  91. EIGEN_DEVICE_FUNC
  92. void operator()(int i, const typename T1::Scalar* in, typename T1::Scalar* out) const
  93. {
  94. using namespace StormEigen;
  95. T1 x1(in+i);
  96. Map<T2> res(out+i*T2::MaxSizeAtCompileTime);
  97. res += x1.diagonal();
  98. }
  99. };
  100. template<typename T>
  101. struct eigenvalues {
  102. EIGEN_DEVICE_FUNC
  103. void operator()(int i, const typename T::Scalar* in, typename T::Scalar* out) const
  104. {
  105. using namespace StormEigen;
  106. typedef Matrix<typename T::Scalar, T::RowsAtCompileTime, 1> Vec;
  107. T M(in+i);
  108. Map<Vec> res(out+i*Vec::MaxSizeAtCompileTime);
  109. T A = M*M.adjoint();
  110. SelfAdjointEigenSolver<T> eig;
  111. eig.computeDirect(M);
  112. res = eig.eigenvalues();
  113. }
  114. };
  115. void test_cuda_basic()
  116. {
  117. ei_test_init_cuda();
  118. int nthreads = 100;
  119. StormEigen::VectorXf in, out;
  120. #ifndef __CUDA_ARCH__
  121. int data_size = nthreads * 512;
  122. in.setRandom(data_size);
  123. out.setRandom(data_size);
  124. #endif
  125. CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise<Vector3f>(), nthreads, in, out) );
  126. CALL_SUBTEST( run_and_compare_to_cuda(coeff_wise<Array44f>(), nthreads, in, out) );
  127. CALL_SUBTEST( run_and_compare_to_cuda(replicate<Array4f>(), nthreads, in, out) );
  128. CALL_SUBTEST( run_and_compare_to_cuda(replicate<Array33f>(), nthreads, in, out) );
  129. CALL_SUBTEST( run_and_compare_to_cuda(redux<Array4f>(), nthreads, in, out) );
  130. CALL_SUBTEST( run_and_compare_to_cuda(redux<Matrix3f>(), nthreads, in, out) );
  131. CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix3f,Matrix3f>(), nthreads, in, out) );
  132. CALL_SUBTEST( run_and_compare_to_cuda(prod_test<Matrix4f,Vector4f>(), nthreads, in, out) );
  133. CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix3f,Vector3f>(), nthreads, in, out) );
  134. CALL_SUBTEST( run_and_compare_to_cuda(diagonal<Matrix4f,Vector4f>(), nthreads, in, out) );
  135. CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix3f>(), nthreads, in, out) );
  136. CALL_SUBTEST( run_and_compare_to_cuda(eigenvalues<Matrix2f>(), nthreads, in, out) );
  137. }