From e45fa5a82ccd6c8f6608ca7b6caf063c08b2f6b5 Mon Sep 17 00:00:00 2001 From: PBerger Date: Wed, 12 Mar 2014 23:31:24 +0100 Subject: [PATCH] Added a Test for the CUDA Plugin. Added accessors for the SparseMatrix as I need access to the internal vectors. Added a pure SPMV Kernel interface to check the kernel for errors. Former-commit-id: 46e1449eeb4993de24a753d07a08b240a1465021 --- .../srcCuda/basicValueIteration.cu | 123 ++++++++++++++++++ .../srcCuda/basicValueIteration.h | 1 + .../cudaForStorm/srcCuda/cuspExtension.h | 2 +- src/storage/SparseMatrix.cpp | 16 +++ src/storage/SparseMatrix.h | 9 ++ test/functional/solver/CudaPluginTest.cpp | 65 +++++++++ 6 files changed, 215 insertions(+), 1 deletion(-) create mode 100644 test/functional/solver/CudaPluginTest.cpp diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.cu b/resources/cudaForStorm/srcCuda/basicValueIteration.cu index 31fb8d4ba..712dfac09 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu @@ -267,6 +267,125 @@ cleanup: } } +template +void basicValueIteration_spmv(uint_fast64_t const matrixColCount, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector const& x, std::vector& b) { + IndexType* device_matrixRowIndices = nullptr; + IndexType* device_matrixColIndicesAndValues = nullptr; + ValueType* device_x = nullptr; + ValueType* device_multiplyResult = nullptr; + + std::cout.sync_with_stdio(true); + std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast(getFreeCudaMemory()) / static_cast(getTotalCudaMemory()))*100 << "%)." << std::endl; + size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size(); + std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl; + + const IndexType matrixRowCount = matrixRowIndices.size() - 1; + const IndexType matrixNnzCount = columnIndicesAndValues.size(); + + cudaError_t cudaMallocResult; + + CUDA_CHECK_ALL_ERRORS(); + cudaMallocResult = cudaMalloc(reinterpret_cast(&device_matrixRowIndices), sizeof(IndexType) * (matrixRowCount + 1)); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Matrix Row Indices, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + CUDA_CHECK_ALL_ERRORS(); + cudaMallocResult = cudaMalloc(reinterpret_cast(&device_matrixColIndicesAndValues), sizeof(IndexType) * matrixNnzCount + sizeof(ValueType) * matrixNnzCount); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Matrix Column Indices and Values, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + CUDA_CHECK_ALL_ERRORS(); + cudaMallocResult = cudaMalloc(reinterpret_cast(&device_x), sizeof(ValueType) * matrixColCount); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Vector x, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + CUDA_CHECK_ALL_ERRORS(); + cudaMallocResult = cudaMalloc(reinterpret_cast(&device_multiplyResult), sizeof(ValueType) * matrixRowCount); + if (cudaMallocResult != cudaSuccess) { + std::cout << "Could not allocate memory for Vector multiplyResult, Error Code " << cudaMallocResult << "." << std::endl; + goto cleanup; + } + + // Memory allocated, copy data to device + cudaError_t cudaCopyResult; + + CUDA_CHECK_ALL_ERRORS(); + cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(IndexType) * (matrixRowCount + 1), cudaMemcpyHostToDevice); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy data for Matrix Row Indices, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + CUDA_CHECK_ALL_ERRORS(); + cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(IndexType) * matrixNnzCount) + (sizeof(ValueType) * matrixNnzCount), cudaMemcpyHostToDevice); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy data for Matrix Column Indices and Values, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + CUDA_CHECK_ALL_ERRORS(); + cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(ValueType) * matrixColCount, cudaMemcpyHostToDevice); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy data for Vector x, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + // Preset the multiplyResult to zeros... + CUDA_CHECK_ALL_ERRORS(); + cudaCopyResult = cudaMemset(device_multiplyResult, 0, sizeof(ValueType) * matrixRowCount); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not zero the multiply Result, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + cusp::detail::device::storm_cuda_opt_spmv_csr_vector(matrixRowCount, matrixNnzCount, device_matrixRowIndices, device_matrixColIndicesAndValues, device_x, device_multiplyResult); + CUDA_CHECK_ALL_ERRORS(); + + // Get result back from the device + cudaCopyResult = cudaMemcpy(b.data(), device_multiplyResult, sizeof(ValueType) * matrixRowCount, cudaMemcpyDeviceToHost); + if (cudaCopyResult != cudaSuccess) { + std::cout << "Could not copy back data for result vector, Error Code " << cudaCopyResult << std::endl; + goto cleanup; + } + + // All code related to freeing memory and clearing up the device +cleanup: + if (device_matrixRowIndices != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_matrixRowIndices); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Matrix Row Indices, Error Code " << cudaFreeResult << "." << std::endl; + } + device_matrixRowIndices = nullptr; + } + if (device_matrixColIndicesAndValues != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_matrixColIndicesAndValues); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Matrix Column Indices and Values, Error Code " << cudaFreeResult << "." << std::endl; + } + device_matrixColIndicesAndValues = nullptr; + } + if (device_x != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_x); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Vector x, Error Code " << cudaFreeResult << "." << std::endl; + } + device_x = nullptr; + } + if (device_multiplyResult != nullptr) { + cudaError_t cudaFreeResult = cudaFree(device_multiplyResult); + if (cudaFreeResult != cudaSuccess) { + std::cout << "Could not free Memory of Vector multiplyResult, Error Code " << cudaFreeResult << "." << std::endl; + } + device_multiplyResult = nullptr; + } +} + /* * Declare and implement all exported functions for these Kernels here * @@ -276,6 +395,10 @@ void cudaForStormTestFunction(int a, int b) { std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl; } +void basicValueIteration_spmv_uint64_double(uint_fast64_t const matrixColCount, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector const& x, std::vector& b) { + basicValueIteration_spmv(matrixColCount, matrixRowIndices, columnIndicesAndValues, x, b); +} + void basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { if (relativePrecisionCheck) { basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices); diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.h b/resources/cudaForStorm/srcCuda/basicValueIteration.h index 61529d963..2395c0311 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.h +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.h @@ -11,5 +11,6 @@ cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b); cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices); cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double_maximize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices); +cudaForStorm_EXPORT void basicValueIteration_spmv_uint64_double(uint_fast64_t const matrixColCount, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector const& x, std::vector& b); #endif // STORM_CUDAFORSTORM_BASICVALUEITERATION_H_ \ No newline at end of file diff --git a/resources/cudaForStorm/srcCuda/cuspExtension.h b/resources/cudaForStorm/srcCuda/cuspExtension.h index 4b13005f3..34e6e6e14 100644 --- a/resources/cudaForStorm/srcCuda/cuspExtension.h +++ b/resources/cudaForStorm/srcCuda/cuspExtension.h @@ -2,7 +2,7 @@ * This is an extension of the original CUSP csr_vector.h SPMV implementation. * It is based on the Code and incorporates changes as to cope with the details * of the StoRM code. - * As this is mostly copy & past, the original license still applies. + * As this is mostly copy & paste, the original license still applies. */ /* diff --git a/src/storage/SparseMatrix.cpp b/src/storage/SparseMatrix.cpp index 1ba121550..b831ae9df 100644 --- a/src/storage/SparseMatrix.cpp +++ b/src/storage/SparseMatrix.cpp @@ -797,6 +797,22 @@ namespace storm { } return true; } + + /*! + * Returns a reference to the internal rowMapping vector + */ + template + std::vector const& SparseMatrix::__internal_getRowIndications() { + return this->rowIndications; + } + + /*! + * Returns a reference to the internal columnMapping vector + */ + template + std::vector> const& SparseMatrix::__internal_getColumnsAndValues() { + return this->columnsAndValues; + } template std::ostream& operator<<(std::ostream& out, SparseMatrix const& matrix) { diff --git a/src/storage/SparseMatrix.h b/src/storage/SparseMatrix.h index cac8ae586..973fa79dd 100644 --- a/src/storage/SparseMatrix.h +++ b/src/storage/SparseMatrix.h @@ -583,6 +583,15 @@ namespace storm { * @return size_t A hash value for this matrix. */ std::size_t hash() const; + + /*! + * Returns a reference to the internal rowMapping vector + */ + std::vector const& __internal_getRowIndications(); + /*! + * Returns a reference to the internal columnMapping vector + */ + std::vector> const& __internal_getColumnsAndValues(); private: // The number of rows of the matrix. diff --git a/test/functional/solver/CudaPluginTest.cpp b/test/functional/solver/CudaPluginTest.cpp new file mode 100644 index 000000000..a59697af8 --- /dev/null +++ b/test/functional/solver/CudaPluginTest.cpp @@ -0,0 +1,65 @@ +#include "gtest/gtest.h" +#include "src/storage/SparseMatrix.h" +#include "src/exceptions/InvalidStateException.h" +#include "src/exceptions/OutOfRangeException.h" + +#include "storm-config.h" + +#ifdef STORM_HAVE_CUDAFORSTORM + +#include "cudaForStorm.h" + +TEST(CudaPlugin, CreationWithDimensions) { + storm::storage::SparseMatrixBuilder matrixBuilder(4, 4, 10); + ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 1, 1.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 3, -1.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 0, 8.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 1, 7.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 2, -5.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 3, 2.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 0, 2.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 1, 2.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 2, 4.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 3, 4.0)); + + + storm::storage::SparseMatrix matrix; + ASSERT_NO_THROW(matrix = matrixBuilder.build()); + + ASSERT_EQ(4, matrix.getRowCount()); + ASSERT_EQ(4, matrix.getColumnCount()); + ASSERT_EQ(10, matrix.getEntryCount()); + + std::vector x({0, 4, 1, 1}); + std::vector b({0, 0, 0, 0}); + + ASSERT_NO_THROW(basicValueIteration_spmv_uint64_double(matrix.getColumnCount(), matrix.__internal_getRowIndications(), matrix.__internal_getColumnsAndValues(), x, b)); + + ASSERT_EQ(b.at(0), 3); + ASSERT_EQ(b.at(1), 25); + ASSERT_EQ(b.at(2), 16); + ASSERT_EQ(b.at(3), 0); +} + +TEST(CudaPlugin, VerySmall) { + storm::storage::SparseMatrixBuilder matrixBuilder(2, 2, 2); + ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 0, 1.0)); + ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 1, 2.0)); + + storm::storage::SparseMatrix matrix; + ASSERT_NO_THROW(matrix = matrixBuilder.build()); + + ASSERT_EQ(2, matrix.getRowCount()); + ASSERT_EQ(2, matrix.getColumnCount()); + ASSERT_EQ(2, matrix.getEntryCount()); + + std::vector x({ 4.0, 8.0 }); + std::vector b({ 0.0, 0.0 }); + + ASSERT_NO_THROW(basicValueIteration_spmv_uint64_double(matrix.getColumnCount(), matrix.__internal_getRowIndications(), matrix.__internal_getColumnsAndValues(), x, b)); + + ASSERT_EQ(b.at(0), 4.0); + ASSERT_EQ(b.at(1), 16.0); +} + +#endif \ No newline at end of file