diff --git a/CMakeLists.txt b/CMakeLists.txt index 8d0e87995..60f3426e9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ option(ENABLE_INTELTBB "Sets whether the Intel TBB is available." OFF) option(STORM_USE_COTIRE "Sets whether Cotire should be used (for building precompiled headers)." OFF) option(LINK_LIBCXXABI "Sets whether libc++abi should be linked." OFF) option(USE_LIBCXX "Sets whether the standard library is libc++." OFF) -option(ENABLE_CUDAFORSTORM "Sets whether StoRM is built with its CUDA extension." OFF) +option(STORM_USE_CUDAFORSTORM "Sets whether StoRM is built with its CUDA extension." OFF) set(GUROBI_ROOT "" CACHE STRING "The root directory of Gurobi (if available).") set(Z3_ROOT "" CACHE STRING "The root directory of Z3 (if available).") set(ADDITIONAL_INCLUDE_DIRS "" CACHE STRING "Additional directories added to the include directories.") @@ -180,7 +180,7 @@ endif() set(STORM_CPP_GLPK_DEF "define") # CUDA Defines -if (ENABLE_CUDAFORSTORM) +if (STORM_USE_CUDAFORSTORM) set(STORM_CPP_CUDAFORSTORM_DEF "define") else() set(STORM_CPP_CUDAFORSTORM_DEF "undef") @@ -289,7 +289,7 @@ endif() if (ENABLE_Z3) link_directories("${Z3_ROOT}/bin") endif() -if (ENABLE_CUDAFORSTORM) +if (STORM_USE_CUDAFORSTORM) link_directories("${PROJECT_SOURCE_DIR}/build/cudaForStorm/lib") endif() if ((NOT Boost_LIBRARY_DIRS) OR ("${Boost_LIBRARY_DIRS}" STREQUAL "")) @@ -328,14 +328,14 @@ target_link_libraries(storm-performance-tests ${Boost_LIBRARIES}) ## CUDA For Storm ## ############################################################# -if (ENABLE_CUDAFORSTORM) +if (STORM_USE_CUDAFORSTORM) message (STATUS "StoRM - Linking with CudaForStorm") include_directories("${PROJECT_BINARY_DIR}/cudaForStorm/include") include_directories("${PROJECT_SOURCE_DIR}/resources/cudaForStorm") target_link_libraries(storm cudaForStorm) target_link_libraries(storm-functional-tests cudaForStorm) target_link_libraries(storm-performance-tests cudaForStorm) -endif(ENABLE_CUDAFORSTORM) +endif(STORM_USE_CUDAFORSTORM) ############################################################# ## diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.cu b/resources/cudaForStorm/srcCuda/basicValueIteration.cu index 18f1f7b02..80bb87e57 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu @@ -15,8 +15,136 @@ void cudaForStormTestFunction(int a, int b) { std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl; } -void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector const& matrixColumnIndices, std::vector const& matrixValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { +void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector> columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { + if (sizeof(double) != sizeof(uint_fast64_t)) { + std::cout << "FATAL ERROR - Internal Sizes of Double and uint_fast64_t do NOT match, CUDA acceleration not possible!" << std::endl; + return; + } + uint_fast64_t* device_matrixRowIndices = nullptr; + uint_fast64_t* device_matrixColIndicesAndValues = nullptr; + double* device_x = nullptr; + double* device_b = nullptr; + double* device_multiplyResult = nullptr; + uint_fast64_t* device_nondeterministicChoiceIndices = nullptr; + + cudaError_t cudaMallocResult; + + cudaMallocResult = cudaMalloc(&device_matrixRowIndices, matrixRowIndices.size()); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Matrix Row Indices, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + cudaMallocResult = cudaMalloc(&device_matrixColIndicesAndValues, columnIndicesAndValues.size() * 2); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Matrix Column Indices and Values, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + cudaMallocResult = cudaMalloc(&device_x, x.size()); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Vector x, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + cudaMallocResult = cudaMalloc(&device_b, b.size()); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Vector b, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + cudaMallocResult = cudaMalloc(&device_multiplyResult, b.size()); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Vector multiplyResult, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + cudaMallocResult = cudaMalloc(&device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.size()); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Nondeterministic Choice Indices, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + // Memory allocated, copy data to device + cudaError_t cudaCopyResult; + + cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(uint_fast64_t) * matrixRowIndices.size(), cudaMemcpyHostToDevice); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy data for Matrix Row Indices, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(uint_fast64_t) * columnIndicesAndValues.size()) + (sizeof(double) * columnIndicesAndValues.size()), cudaMemcpyHostToDevice); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy data for Matrix Column Indices and Values, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(double) * x.size(), cudaMemcpyHostToDevice); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy data for Vector x, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + cudaCopyResult = cudaMemcpy(device_b, b.data(), sizeof(double) * b.size(), cudaMemcpyHostToDevice); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + cudaCopyResult = cudaMemcpy(device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.data(), sizeof(uint_fast64_t) * nondeterministicChoiceIndices.size(), cudaMemcpyHostToDevice); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + // Data is on device, start Kernel + + // All code related to freeing memory and clearing up the device +cleanup: + if (device_matrixRowIndices != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_matrixRowIndices); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Matrix Row Indices, Error Code " << cudaFreeResult << "." << std::endl; + } + device_matrixRowIndices = nullptr; + } + if (device_matrixColIndicesAndValues != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_matrixColIndicesAndValues); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Matrix Column Indices and Values, Error Code " << cudaFreeResult << "." << std::endl; + } + device_matrixColIndicesAndValues = nullptr; + } + if (device_x != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_x); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Vector x, Error Code " << cudaFreeResult << "." << std::endl; + } + device_x = nullptr; + } + if (device_b != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_b); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Vector b, Error Code " << cudaFreeResult << "." << std::endl; + } + device_b = nullptr; + } + if (device_multiplyResult != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_multiplyResult); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Vector multiplyResult, Error Code " << cudaFreeResult << "." << std::endl; + } + device_multiplyResult = nullptr; + } + if (device_nondeterministicChoiceIndices != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_nondeterministicChoiceIndices); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Nondeterministic Choice Indices, Error Code " << cudaFreeResult << "." << std::endl; + } + device_nondeterministicChoiceIndices = nullptr; + } } /* diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.h b/resources/cudaForStorm/srcCuda/basicValueIteration.h index 88e8a92e5..fc5a8a322 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.h +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.h @@ -1,8 +1,9 @@ #include #include +#include // Library exports #include "cudaForStorm_Export.h" cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b); -cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector const& matrixColumnIndices, std::vector const& matrixValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices); \ No newline at end of file +cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector const& matrixRowIndices, std::vector> columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices); \ No newline at end of file