Added a Test for the CUDA Plugin.

Added accessors for the SparseMatrix as I need access to the internal vectors. Added a pure SPMV Kernel interface to check the kernel for errors. Former-commit-id: 46e1449eeb
13 years ago · e45fa5a82c
6 changed files with 215 additions and 1 deletions
--- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu
+++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu
@ -267,6 +267,125 @@ cleanup:
 	}
 }

+template <typename IndexType, typename ValueType>
+void basicValueIteration_spmv(uint_fast64_t const matrixColCount, std::vector<IndexType> const& matrixRowIndices, std::vector<std::pair<IndexType, ValueType>> const& columnIndicesAndValues, std::vector<ValueType> const& x, std::vector<ValueType>& b) {
+	IndexType* device_matrixRowIndices = nullptr;
+	IndexType* device_matrixColIndicesAndValues = nullptr;
+	ValueType* device_x = nullptr;
+	ValueType* device_multiplyResult = nullptr;
+
+	std::cout.sync_with_stdio(true);
+	std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory()))*100 << "%)." << std::endl; 
+	size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size();
+	std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl;
+
+	const IndexType matrixRowCount = matrixRowIndices.size() - 1;
+	const IndexType matrixNnzCount = columnIndicesAndValues.size();
+
+	cudaError_t cudaMallocResult;
+
+	CUDA_CHECK_ALL_ERRORS();
+	cudaMallocResult = cudaMalloc(reinterpret_cast<void**>(&device_matrixRowIndices), sizeof(IndexType) * (matrixRowCount + 1));
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Matrix Row Indices, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	CUDA_CHECK_ALL_ERRORS();
+	cudaMallocResult = cudaMalloc(reinterpret_cast<void**>(&device_matrixColIndicesAndValues), sizeof(IndexType) * matrixNnzCount + sizeof(ValueType) * matrixNnzCount);
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Matrix Column Indices and Values, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	CUDA_CHECK_ALL_ERRORS();
+	cudaMallocResult = cudaMalloc(reinterpret_cast<void**>(&device_x), sizeof(ValueType) * matrixColCount);
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Vector x, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	CUDA_CHECK_ALL_ERRORS();
+	cudaMallocResult = cudaMalloc(reinterpret_cast<void**>(&device_multiplyResult), sizeof(ValueType) * matrixRowCount);
+	if (cudaMallocResult != cudaSuccess) {
+		std::cout << "Could not allocate memory for Vector multiplyResult, Error Code " << cudaMallocResult << "." << std::endl;
+		goto cleanup;
+	}
+
+	// Memory allocated, copy data to device
+	cudaError_t cudaCopyResult;
+
+	CUDA_CHECK_ALL_ERRORS();
+	cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(IndexType) * (matrixRowCount + 1), cudaMemcpyHostToDevice);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy data for Matrix Row Indices, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	CUDA_CHECK_ALL_ERRORS();
+	cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(IndexType) * matrixNnzCount) + (sizeof(ValueType) * matrixNnzCount), cudaMemcpyHostToDevice);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy data for Matrix Column Indices and Values, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	CUDA_CHECK_ALL_ERRORS();
+	cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(ValueType) * matrixColCount, cudaMemcpyHostToDevice);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy data for Vector x, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	// Preset the multiplyResult to zeros...
+	CUDA_CHECK_ALL_ERRORS();
+	cudaCopyResult = cudaMemset(device_multiplyResult, 0, sizeof(ValueType) * matrixRowCount);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not zero the multiply Result, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	cusp::detail::device::storm_cuda_opt_spmv_csr_vector<IndexType, ValueType>(matrixRowCount, matrixNnzCount, device_matrixRowIndices, device_matrixColIndicesAndValues, device_x, device_multiplyResult);
+	CUDA_CHECK_ALL_ERRORS();
+
+	// Get result back from the device
+	cudaCopyResult = cudaMemcpy(b.data(), device_multiplyResult, sizeof(ValueType) * matrixRowCount, cudaMemcpyDeviceToHost);
+	if (cudaCopyResult != cudaSuccess) {
+		std::cout << "Could not copy back data for result vector, Error Code " << cudaCopyResult << std::endl;
+		goto cleanup;
+	}
+
+	// All code related to freeing memory and clearing up the device
+cleanup:
+	if (device_matrixRowIndices != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_matrixRowIndices);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Matrix Row Indices, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_matrixRowIndices = nullptr;
+	}
+	if (device_matrixColIndicesAndValues != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_matrixColIndicesAndValues);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Matrix Column Indices and Values, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_matrixColIndicesAndValues = nullptr;
+	}
+	if (device_x != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_x);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Vector x, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_x = nullptr;
+	}
+	if (device_multiplyResult != nullptr) {
+		cudaError_t cudaFreeResult = cudaFree(device_multiplyResult);
+		if (cudaFreeResult != cudaSuccess) {
+			std::cout << "Could not free Memory of Vector multiplyResult, Error Code " << cudaFreeResult << "." << std::endl;
+		}
+		device_multiplyResult = nullptr;
+	}
+}
+
 /*
 * Declare and implement all exported functions for these Kernels here
 *
@ -276,6 +395,10 @@ void cudaForStormTestFunction(int a, int b) {
 	std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl;
 }

+void basicValueIteration_spmv_uint64_double(uint_fast64_t const matrixColCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double> const& x, std::vector<double>& b) {
+	basicValueIteration_spmv(matrixColCount, matrixRowIndices, columnIndicesAndValues, x, b);
+}
+
 void basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices) {
 	if (relativePrecisionCheck) {
 		basicValueIteration_mvReduce<true, true, uint_fast64_t, double>(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices);
--- a/resources/cudaForStorm/srcCuda/basicValueIteration.h
+++ b/resources/cudaForStorm/srcCuda/basicValueIteration.h
@ -11,5 +11,6 @@
 cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b);
 cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);
 cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double_maximize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);
+cudaForStorm_EXPORT void basicValueIteration_spmv_uint64_double(uint_fast64_t const matrixColCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double> const& x, std::vector<double>& b);

 #endif // STORM_CUDAFORSTORM_BASICVALUEITERATION_H_
--- a/resources/cudaForStorm/srcCuda/cuspExtension.h
+++ b/resources/cudaForStorm/srcCuda/cuspExtension.h
@ -2,7 +2,7 @@
 * This is an extension of the original CUSP csr_vector.h SPMV implementation.
 * It is based on the Code and incorporates changes as to cope with the details
 * of the StoRM code.
- * As this is mostly copy & past, the original license still applies.
+ * As this is mostly copy & paste, the original license still applies.
 */

 /*
--- a/src/storage/SparseMatrix.cpp
+++ b/src/storage/SparseMatrix.cpp
@ -797,6 +797,22 @@ namespace storm {
            }
            return true;
        }
+
+		/*!
+		* Returns a reference to the internal rowMapping vector
+		*/
+		template<typename T>
+		std::vector<uint_fast64_t> const& SparseMatrix<T>::__internal_getRowIndications() {
+			return this->rowIndications;
+		}
+
+		/*!
+		* Returns a reference to the internal columnMapping vector
+		*/
+		template<typename T>
+		std::vector<std::pair<uint_fast64_t, T>> const& SparseMatrix<T>::__internal_getColumnsAndValues() {
+			return this->columnsAndValues;
+		}
        
        template<typename T>
        std::ostream& operator<<(std::ostream& out, SparseMatrix<T> const& matrix) {
--- a/src/storage/SparseMatrix.h
+++ b/src/storage/SparseMatrix.h
@ -583,6 +583,15 @@ namespace storm {
             * @return size_t A hash value for this matrix.
             */
            std::size_t hash() const;
+
+			/*!
+			 * Returns a reference to the internal rowMapping vector
+			 */
+			std::vector<uint_fast64_t> const& __internal_getRowIndications();
+			/*!
+			* Returns a reference to the internal columnMapping vector
+			*/
+			std::vector<std::pair<uint_fast64_t, T>> const& __internal_getColumnsAndValues();
            
        private:
            // The number of rows of the matrix.
--- a/test/functional/solver/CudaPluginTest.cpp
+++ b/test/functional/solver/CudaPluginTest.cpp
@ -0,0 +1,65 @@
+#include "gtest/gtest.h"
+#include "src/storage/SparseMatrix.h"
+#include "src/exceptions/InvalidStateException.h"
+#include "src/exceptions/OutOfRangeException.h"
+
+#include "storm-config.h"
+
+#ifdef STORM_HAVE_CUDAFORSTORM
+
+#include "cudaForStorm.h"
+
+TEST(CudaPlugin, CreationWithDimensions) {
+    storm::storage::SparseMatrixBuilder<double> matrixBuilder(4, 4, 10);
+    ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 1, 1.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 3, -1.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 0, 8.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 1, 7.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 2, -5.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 3, 2.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 0, 2.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 1, 2.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 2, 4.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 3, 4.0));
+
+    
+    storm::storage::SparseMatrix<double> matrix;
+    ASSERT_NO_THROW(matrix = matrixBuilder.build());
+    
+    ASSERT_EQ(4, matrix.getRowCount());
+    ASSERT_EQ(4, matrix.getColumnCount());
+    ASSERT_EQ(10, matrix.getEntryCount());
+
+	std::vector<double> x({0, 4, 1, 1});
+	std::vector<double> b({0, 0, 0, 0});
+
+	ASSERT_NO_THROW(basicValueIteration_spmv_uint64_double(matrix.getColumnCount(), matrix.__internal_getRowIndications(), matrix.__internal_getColumnsAndValues(), x, b));
+
+	ASSERT_EQ(b.at(0), 3);
+	ASSERT_EQ(b.at(1), 25);
+	ASSERT_EQ(b.at(2), 16);
+	ASSERT_EQ(b.at(3), 0);
+}
+
+TEST(CudaPlugin, VerySmall) {
+	storm::storage::SparseMatrixBuilder<double> matrixBuilder(2, 2, 2);
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 0, 1.0));
+	ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 1, 2.0));
+
+	storm::storage::SparseMatrix<double> matrix;
+	ASSERT_NO_THROW(matrix = matrixBuilder.build());
+
+	ASSERT_EQ(2, matrix.getRowCount());
+	ASSERT_EQ(2, matrix.getColumnCount());
+	ASSERT_EQ(2, matrix.getEntryCount());
+
+	std::vector<double> x({ 4.0, 8.0 });
+	std::vector<double> b({ 0.0, 0.0 });
+
+	ASSERT_NO_THROW(basicValueIteration_spmv_uint64_double(matrix.getColumnCount(), matrix.__internal_getRowIndications(), matrix.__internal_getColumnsAndValues(), x, b));
+
+	ASSERT_EQ(b.at(0), 4.0);
+	ASSERT_EQ(b.at(1), 16.0);
+}
+
+#endif