You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

181 lines
7.5 KiB

  1. #include "basicValueIteration.h"
  2. #include <iostream>
  3. #include <chrono>
  4. #include <cuda_runtime.h>
  5. #include "cusparse_v2.h"
  6. __global__ void cuda_kernel_basicValueIteration_mvReduce(int const * const A, int * const B) {
  7. *B = *A;
  8. }
  9. void cudaForStormTestFunction(int a, int b) {
  10. std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl;
  11. }
  12. void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices) {
  13. if (sizeof(double) != sizeof(uint_fast64_t)) {
  14. std::cout << "FATAL ERROR - Internal Sizes of Double and uint_fast64_t do NOT match, CUDA acceleration not possible!" << std::endl;
  15. return;
  16. }
  17. uint_fast64_t* device_matrixRowIndices = nullptr;
  18. uint_fast64_t* device_matrixColIndicesAndValues = nullptr;
  19. double* device_x = nullptr;
  20. double* device_b = nullptr;
  21. double* device_multiplyResult = nullptr;
  22. uint_fast64_t* device_nondeterministicChoiceIndices = nullptr;
  23. cudaError_t cudaMallocResult;
  24. cudaMallocResult = cudaMalloc<uint_fast64_t>(&device_matrixRowIndices, matrixRowIndices.size());
  25. if (cudaMallocResult != cudaSuccess) {
  26. std::cout << "Could not allocate memory for Matrix Row Indices, Error Code " << cudaMallocResult << "." << std::endl;
  27. goto cleanup;
  28. }
  29. cudaMallocResult = cudaMalloc<uint_fast64_t>(&device_matrixColIndicesAndValues, columnIndicesAndValues.size() * 2);
  30. if (cudaMallocResult != cudaSuccess) {
  31. std::cout << "Could not allocate memory for Matrix Column Indices and Values, Error Code " << cudaMallocResult << "." << std::endl;
  32. goto cleanup;
  33. }
  34. cudaMallocResult = cudaMalloc<double>(&device_x, x.size());
  35. if (cudaMallocResult != cudaSuccess) {
  36. std::cout << "Could not allocate memory for Vector x, Error Code " << cudaMallocResult << "." << std::endl;
  37. goto cleanup;
  38. }
  39. cudaMallocResult = cudaMalloc<double>(&device_b, b.size());
  40. if (cudaMallocResult != cudaSuccess) {
  41. std::cout << "Could not allocate memory for Vector b, Error Code " << cudaMallocResult << "." << std::endl;
  42. goto cleanup;
  43. }
  44. cudaMallocResult = cudaMalloc<double>(&device_multiplyResult, b.size());
  45. if (cudaMallocResult != cudaSuccess) {
  46. std::cout << "Could not allocate memory for Vector multiplyResult, Error Code " << cudaMallocResult << "." << std::endl;
  47. goto cleanup;
  48. }
  49. cudaMallocResult = cudaMalloc<uint_fast64_t>(&device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.size());
  50. if (cudaMallocResult != cudaSuccess) {
  51. std::cout << "Could not allocate memory for Nondeterministic Choice Indices, Error Code " << cudaMallocResult << "." << std::endl;
  52. goto cleanup;
  53. }
  54. // Memory allocated, copy data to device
  55. cudaError_t cudaCopyResult;
  56. cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(uint_fast64_t) * matrixRowIndices.size(), cudaMemcpyHostToDevice);
  57. if (cudaCopyResult != cudaSuccess) {
  58. std::cout << "Could not copy data for Matrix Row Indices, Error Code " << cudaCopyResult << std::endl;
  59. goto cleanup;
  60. }
  61. cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(uint_fast64_t) * columnIndicesAndValues.size()) + (sizeof(double) * columnIndicesAndValues.size()), cudaMemcpyHostToDevice);
  62. if (cudaCopyResult != cudaSuccess) {
  63. std::cout << "Could not copy data for Matrix Column Indices and Values, Error Code " << cudaCopyResult << std::endl;
  64. goto cleanup;
  65. }
  66. cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(double) * x.size(), cudaMemcpyHostToDevice);
  67. if (cudaCopyResult != cudaSuccess) {
  68. std::cout << "Could not copy data for Vector x, Error Code " << cudaCopyResult << std::endl;
  69. goto cleanup;
  70. }
  71. cudaCopyResult = cudaMemcpy(device_b, b.data(), sizeof(double) * b.size(), cudaMemcpyHostToDevice);
  72. if (cudaCopyResult != cudaSuccess) {
  73. std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl;
  74. goto cleanup;
  75. }
  76. cudaCopyResult = cudaMemcpy(device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.data(), sizeof(uint_fast64_t) * nondeterministicChoiceIndices.size(), cudaMemcpyHostToDevice);
  77. if (cudaCopyResult != cudaSuccess) {
  78. std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl;
  79. goto cleanup;
  80. }
  81. // Data is on device, start Kernel
  82. // All code related to freeing memory and clearing up the device
  83. cleanup:
  84. if (device_matrixRowIndices != nullptr) {
  85. cudaError_t cudaFreeResult = cudaFree(device_matrixRowIndices);
  86. if (cudaFreeResult != cudaSuccess) {
  87. std::cout << "Could not free Memory of Matrix Row Indices, Error Code " << cudaFreeResult << "." << std::endl;
  88. }
  89. device_matrixRowIndices = nullptr;
  90. }
  91. if (device_matrixColIndicesAndValues != nullptr) {
  92. cudaError_t cudaFreeResult = cudaFree(device_matrixColIndicesAndValues);
  93. if (cudaFreeResult != cudaSuccess) {
  94. std::cout << "Could not free Memory of Matrix Column Indices and Values, Error Code " << cudaFreeResult << "." << std::endl;
  95. }
  96. device_matrixColIndicesAndValues = nullptr;
  97. }
  98. if (device_x != nullptr) {
  99. cudaError_t cudaFreeResult = cudaFree(device_x);
  100. if (cudaFreeResult != cudaSuccess) {
  101. std::cout << "Could not free Memory of Vector x, Error Code " << cudaFreeResult << "." << std::endl;
  102. }
  103. device_x = nullptr;
  104. }
  105. if (device_b != nullptr) {
  106. cudaError_t cudaFreeResult = cudaFree(device_b);
  107. if (cudaFreeResult != cudaSuccess) {
  108. std::cout << "Could not free Memory of Vector b, Error Code " << cudaFreeResult << "." << std::endl;
  109. }
  110. device_b = nullptr;
  111. }
  112. if (device_multiplyResult != nullptr) {
  113. cudaError_t cudaFreeResult = cudaFree(device_multiplyResult);
  114. if (cudaFreeResult != cudaSuccess) {
  115. std::cout << "Could not free Memory of Vector multiplyResult, Error Code " << cudaFreeResult << "." << std::endl;
  116. }
  117. device_multiplyResult = nullptr;
  118. }
  119. if (device_nondeterministicChoiceIndices != nullptr) {
  120. cudaError_t cudaFreeResult = cudaFree(device_nondeterministicChoiceIndices);
  121. if (cudaFreeResult != cudaSuccess) {
  122. std::cout << "Could not free Memory of Nondeterministic Choice Indices, Error Code " << cudaFreeResult << "." << std::endl;
  123. }
  124. device_nondeterministicChoiceIndices = nullptr;
  125. }
  126. }
  127. /*
  128. void kernelSwitchTest(size_t N) {
  129. int* deviceIntA;
  130. int* deviceIntB;
  131. if (cudaMalloc((void**)&deviceIntA, sizeof(int)) != cudaSuccess) {
  132. std::cout << "Error in cudaMalloc while allocating " << sizeof(int) << " Bytes!" << std::endl;
  133. return;
  134. }
  135. if (cudaMalloc((void**)&deviceIntB, sizeof(int)) != cudaSuccess) {
  136. std::cout << "Error in cudaMalloc while allocating " << sizeof(int) << " Bytes!" << std::endl;
  137. return;
  138. }
  139. // Allocate space on the device
  140. auto start_time = std::chrono::high_resolution_clock::now();
  141. for (int i = 0; i < N; ++i) {
  142. cuda_kernel_kernelSwitchTest<<<1,1>>>(deviceIntA, deviceIntB);
  143. }
  144. auto end_time = std::chrono::high_resolution_clock::now();
  145. std::cout << "Switching the Kernel " << N << " times took " << std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() << "micros" << std::endl;
  146. std::cout << "Resulting in " << (std::chrono::duration_cast<std::chrono::microseconds>(end_time - start_time).count() / ((double)(N))) << "Microseconds per Kernel Switch" << std::endl;
  147. // Free memory on device
  148. if (cudaFree(deviceIntA) != cudaSuccess) {
  149. std::cout << "Error in cudaFree!" << std::endl;
  150. return;
  151. }
  152. if (cudaFree(deviceIntB) != cudaSuccess) {
  153. std::cout << "Error in cudaFree!" << std::endl;
  154. return;
  155. }
  156. }*/