Implementations, implementations.

Former-commit-id: e203636fac
12 years ago · 9388cd158c
3 changed files with 136 additions and 7 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -32,7 +32,7 @@ option(ENABLE_INTELTBB "Sets whether the Intel TBB is available." OFF)
 option(STORM_USE_COTIRE "Sets whether Cotire should be used (for building precompiled headers)." OFF)
 option(LINK_LIBCXXABI "Sets whether libc++abi should be linked." OFF)
 option(USE_LIBCXX "Sets whether the standard library is libc++." OFF)
-option(ENABLE_CUDAFORSTORM "Sets whether StoRM is built with its CUDA extension." OFF)
+option(STORM_USE_CUDAFORSTORM "Sets whether StoRM is built with its CUDA extension." OFF)
 set(GUROBI_ROOT "" CACHE STRING "The root directory of Gurobi (if available).")
 set(Z3_ROOT "" CACHE STRING "The root directory of Z3 (if available).")
 set(ADDITIONAL_INCLUDE_DIRS "" CACHE STRING "Additional directories added to the include directories.")
@ -180,7 +180,7 @@ endif()
 set(STORM_CPP_GLPK_DEF "define")

 # CUDA Defines
-if (ENABLE_CUDAFORSTORM)
+if (STORM_USE_CUDAFORSTORM)
 	set(STORM_CPP_CUDAFORSTORM_DEF "define")
 else()
 	set(STORM_CPP_CUDAFORSTORM_DEF "undef")
@ -289,7 +289,7 @@ endif()
 if (ENABLE_Z3)
    link_directories("${Z3_ROOT}/bin")
 endif()
-if (ENABLE_CUDAFORSTORM)
+if (STORM_USE_CUDAFORSTORM)
 	link_directories("${PROJECT_SOURCE_DIR}/build/cudaForStorm/lib")
 endif()
 if ((NOT Boost_LIBRARY_DIRS) OR ("${Boost_LIBRARY_DIRS}" STREQUAL ""))
@ -328,14 +328,14 @@ target_link_libraries(storm-performance-tests ${Boost_LIBRARIES})
 ##	CUDA For Storm
 ##
 #############################################################
-if (ENABLE_CUDAFORSTORM)
+if (STORM_USE_CUDAFORSTORM)
    message (STATUS "StoRM - Linking with CudaForStorm")
 	include_directories("${PROJECT_BINARY_DIR}/cudaForStorm/include")
 	include_directories("${PROJECT_SOURCE_DIR}/resources/cudaForStorm")
    target_link_libraries(storm cudaForStorm)
    target_link_libraries(storm-functional-tests cudaForStorm)
    target_link_libraries(storm-performance-tests cudaForStorm)
-endif(ENABLE_CUDAFORSTORM)
+endif(STORM_USE_CUDAFORSTORM)

 #############################################################
 ##
--- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu
+++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu
@ -15,8 +15,136 @@ void cudaForStormTestFunction(int a, int b) {
 	std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl;
 }

-void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices) {
+void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices) {
+	if (sizeof(double) != sizeof(uint_fast64_t)) {
+		std::cout << "FATAL ERROR - Internal Sizes of Double and uint_fast64_t do NOT match, CUDA acceleration not possible!" << std::endl;
+		return;
+	}
+	
+	uint_fast64_t* device_matrixRowIndices = nullptr;
+	uint_fast64_t* device_matrixColIndicesAndValues = nullptr;
+	double* device_x = nullptr;
+	double* device_b = nullptr;
+	double* device_multiplyResult = nullptr;
+	uint_fast64_t* device_nondeterministicChoiceIndices = nullptr;
+
+	cudaError_t cudaMallocResult;
+
+	cudaMallocResult = cudaMalloc<uint_fast64_t>(&device_matrixRowIndices, matrixRowIndices.size());
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Matrix Row Indices, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	cudaMallocResult = cudaMalloc<uint_fast64_t>(&device_matrixColIndicesAndValues, columnIndicesAndValues.size() * 2);
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Matrix Column Indices and Values, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	cudaMallocResult = cudaMalloc<double>(&device_x, x.size());
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Vector x, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	cudaMallocResult = cudaMalloc<double>(&device_b, b.size());
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Vector b, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	cudaMallocResult = cudaMalloc<double>(&device_multiplyResult, b.size());
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Vector multiplyResult, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	cudaMallocResult = cudaMalloc<uint_fast64_t>(&device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.size());
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Nondeterministic Choice Indices, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	// Memory allocated, copy data to device
+	cudaError_t cudaCopyResult;

+	cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(uint_fast64_t) * matrixRowIndices.size(), cudaMemcpyHostToDevice);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy data for Matrix Row Indices, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(uint_fast64_t) * columnIndicesAndValues.size()) + (sizeof(double) * columnIndicesAndValues.size()), cudaMemcpyHostToDevice);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy data for Matrix Column Indices and Values, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(double) * x.size(), cudaMemcpyHostToDevice);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy data for Vector x, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	cudaCopyResult = cudaMemcpy(device_b, b.data(), sizeof(double) * b.size(), cudaMemcpyHostToDevice);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	cudaCopyResult = cudaMemcpy(device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.data(), sizeof(uint_fast64_t) * nondeterministicChoiceIndices.size(), cudaMemcpyHostToDevice);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	// Data is on device, start Kernel
+
+	// All code related to freeing memory and clearing up the device
+cleanup:
+	if (device_matrixRowIndices != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_matrixRowIndices);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Matrix Row Indices, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_matrixRowIndices = nullptr;
+	}
+	if (device_matrixColIndicesAndValues != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_matrixColIndicesAndValues);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Matrix Column Indices and Values, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_matrixColIndicesAndValues = nullptr;
+	}
+	if (device_x != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_x);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Vector x, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_x = nullptr;
+	}
+	if (device_b != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_b);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Vector b, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_b = nullptr;
+	}
+	if (device_multiplyResult != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_multiplyResult);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Vector multiplyResult, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_multiplyResult = nullptr;
+	}
+	if (device_nondeterministicChoiceIndices != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_nondeterministicChoiceIndices);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Nondeterministic Choice Indices, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_nondeterministicChoiceIndices = nullptr;
+	}
 }

 /*
--- a/resources/cudaForStorm/srcCuda/basicValueIteration.h
+++ b/resources/cudaForStorm/srcCuda/basicValueIteration.h
@ -1,8 +1,9 @@
 #include <cstdint>
 #include <vector>
+#include <utility>

 // Library exports
 #include "cudaForStorm_Export.h"

 cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b);
-cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<uint_fast64_t> const& matrixColumnIndices, std::vector<double> const& matrixValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);
+cudaForStorm_EXPORT void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);