Browse Source

Added a Test for the CUDA Plugin.

Added accessors for the SparseMatrix as I need access to the internal vectors.
Added a pure SPMV Kernel interface to check the kernel for errors.


Former-commit-id: 46e1449eeb
tempestpy_adaptions
PBerger 11 years ago
parent
commit
e45fa5a82c
  1. 123
      resources/cudaForStorm/srcCuda/basicValueIteration.cu
  2. 1
      resources/cudaForStorm/srcCuda/basicValueIteration.h
  3. 2
      resources/cudaForStorm/srcCuda/cuspExtension.h
  4. 16
      src/storage/SparseMatrix.cpp
  5. 9
      src/storage/SparseMatrix.h
  6. 65
      test/functional/solver/CudaPluginTest.cpp

123
resources/cudaForStorm/srcCuda/basicValueIteration.cu

@ -267,6 +267,125 @@ cleanup:
}
}
template <typename IndexType, typename ValueType>
void basicValueIteration_spmv(uint_fast64_t const matrixColCount, std::vector<IndexType> const& matrixRowIndices, std::vector<std::pair<IndexType, ValueType>> const& columnIndicesAndValues, std::vector<ValueType> const& x, std::vector<ValueType>& b) {
IndexType* device_matrixRowIndices = nullptr;
IndexType* device_matrixColIndicesAndValues = nullptr;
ValueType* device_x = nullptr;
ValueType* device_multiplyResult = nullptr;
std::cout.sync_with_stdio(true);
std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory()))*100 << "%)." << std::endl;
size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size();
std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl;
const IndexType matrixRowCount = matrixRowIndices.size() - 1;
const IndexType matrixNnzCount = columnIndicesAndValues.size();
cudaError_t cudaMallocResult;
CUDA_CHECK_ALL_ERRORS();
cudaMallocResult = cudaMalloc(reinterpret_cast<void**>(&device_matrixRowIndices), sizeof(IndexType) * (matrixRowCount + 1));
if (cudaMallocResult != cudaSuccess) {
std::cout << "Could not allocate memory for Matrix Row Indices, Error Code " << cudaMallocResult << "." << std::endl;
goto cleanup;
}
CUDA_CHECK_ALL_ERRORS();
cudaMallocResult = cudaMalloc(reinterpret_cast<void**>(&device_matrixColIndicesAndValues), sizeof(IndexType) * matrixNnzCount + sizeof(ValueType) * matrixNnzCount);
if (cudaMallocResult != cudaSuccess) {
std::cout << "Could not allocate memory for Matrix Column Indices and Values, Error Code " << cudaMallocResult << "." << std::endl;
goto cleanup;
}
CUDA_CHECK_ALL_ERRORS();
cudaMallocResult = cudaMalloc(reinterpret_cast<void**>(&device_x), sizeof(ValueType) * matrixColCount);
if (cudaMallocResult != cudaSuccess) {
std::cout << "Could not allocate memory for Vector x, Error Code " << cudaMallocResult << "." << std::endl;
goto cleanup;
}
CUDA_CHECK_ALL_ERRORS();
cudaMallocResult = cudaMalloc(reinterpret_cast<void**>(&device_multiplyResult), sizeof(ValueType) * matrixRowCount);
if (cudaMallocResult != cudaSuccess) {
std::cout << "Could not allocate memory for Vector multiplyResult, Error Code " << cudaMallocResult << "." << std::endl;
goto cleanup;
}
// Memory allocated, copy data to device
cudaError_t cudaCopyResult;
CUDA_CHECK_ALL_ERRORS();
cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(IndexType) * (matrixRowCount + 1), cudaMemcpyHostToDevice);
if (cudaCopyResult != cudaSuccess) {
std::cout << "Could not copy data for Matrix Row Indices, Error Code " << cudaCopyResult << std::endl;
goto cleanup;
}
CUDA_CHECK_ALL_ERRORS();
cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(IndexType) * matrixNnzCount) + (sizeof(ValueType) * matrixNnzCount), cudaMemcpyHostToDevice);
if (cudaCopyResult != cudaSuccess) {
std::cout << "Could not copy data for Matrix Column Indices and Values, Error Code " << cudaCopyResult << std::endl;
goto cleanup;
}
CUDA_CHECK_ALL_ERRORS();
cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(ValueType) * matrixColCount, cudaMemcpyHostToDevice);
if (cudaCopyResult != cudaSuccess) {
std::cout << "Could not copy data for Vector x, Error Code " << cudaCopyResult << std::endl;
goto cleanup;
}
// Preset the multiplyResult to zeros...
CUDA_CHECK_ALL_ERRORS();
cudaCopyResult = cudaMemset(device_multiplyResult, 0, sizeof(ValueType) * matrixRowCount);
if (cudaCopyResult != cudaSuccess) {
std::cout << "Could not zero the multiply Result, Error Code " << cudaCopyResult << std::endl;
goto cleanup;
}
cusp::detail::device::storm_cuda_opt_spmv_csr_vector<IndexType, ValueType>(matrixRowCount, matrixNnzCount, device_matrixRowIndices, device_matrixColIndicesAndValues, device_x, device_multiplyResult);
CUDA_CHECK_ALL_ERRORS();
// Get result back from the device
cudaCopyResult = cudaMemcpy(b.data(), device_multiplyResult, sizeof(ValueType) * matrixRowCount, cudaMemcpyDeviceToHost);
if (cudaCopyResult != cudaSuccess) {
std::cout << "Could not copy back data for result vector, Error Code " << cudaCopyResult << std::endl;
goto cleanup;
}
// All code related to freeing memory and clearing up the device
cleanup:
if (device_matrixRowIndices != nullptr) {
cudaError_t cudaFreeResult = cudaFree(device_matrixRowIndices);
if (cudaFreeResult != cudaSuccess) {
std::cout << "Could not free Memory of Matrix Row Indices, Error Code " << cudaFreeResult << "." << std::endl;
}
device_matrixRowIndices = nullptr;
}
if (device_matrixColIndicesAndValues != nullptr) {
cudaError_t cudaFreeResult = cudaFree(device_matrixColIndicesAndValues);
if (cudaFreeResult != cudaSuccess) {
std::cout << "Could not free Memory of Matrix Column Indices and Values, Error Code " << cudaFreeResult << "." << std::endl;
}
device_matrixColIndicesAndValues = nullptr;
}
if (device_x != nullptr) {
cudaError_t cudaFreeResult = cudaFree(device_x);
if (cudaFreeResult != cudaSuccess) {
std::cout << "Could not free Memory of Vector x, Error Code " << cudaFreeResult << "." << std::endl;
}
device_x = nullptr;
}
if (device_multiplyResult != nullptr) {
cudaError_t cudaFreeResult = cudaFree(device_multiplyResult);
if (cudaFreeResult != cudaSuccess) {
std::cout << "Could not free Memory of Vector multiplyResult, Error Code " << cudaFreeResult << "." << std::endl;
}
device_multiplyResult = nullptr;
}
}
/*
* Declare and implement all exported functions for these Kernels here
*
@ -276,6 +395,10 @@ void cudaForStormTestFunction(int a, int b) {
std::cout << "Cuda for Storm: a + b = " << (a+b) << std::endl;
}
void basicValueIteration_spmv_uint64_double(uint_fast64_t const matrixColCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double> const& x, std::vector<double>& b) {
basicValueIteration_spmv(matrixColCount, matrixRowIndices, columnIndicesAndValues, x, b);
}
void basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices) {
if (relativePrecisionCheck) {
basicValueIteration_mvReduce<true, true, uint_fast64_t, double>(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices);

1
resources/cudaForStorm/srcCuda/basicValueIteration.h

@ -11,5 +11,6 @@
cudaForStorm_EXPORT void cudaForStormTestFunction(int a, int b);
cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);
cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double_maximize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double>& x, std::vector<double> const& b, std::vector<uint_fast64_t> const& nondeterministicChoiceIndices);
cudaForStorm_EXPORT void basicValueIteration_spmv_uint64_double(uint_fast64_t const matrixColCount, std::vector<uint_fast64_t> const& matrixRowIndices, std::vector<std::pair<uint_fast64_t, double>> const& columnIndicesAndValues, std::vector<double> const& x, std::vector<double>& b);
#endif // STORM_CUDAFORSTORM_BASICVALUEITERATION_H_

2
resources/cudaForStorm/srcCuda/cuspExtension.h

@ -2,7 +2,7 @@
* This is an extension of the original CUSP csr_vector.h SPMV implementation.
* It is based on the Code and incorporates changes as to cope with the details
* of the StoRM code.
* As this is mostly copy & past, the original license still applies.
* As this is mostly copy & paste, the original license still applies.
*/
/*

16
src/storage/SparseMatrix.cpp

@ -797,6 +797,22 @@ namespace storm {
}
return true;
}
/*!
* Returns a reference to the internal rowMapping vector
*/
template<typename T>
std::vector<uint_fast64_t> const& SparseMatrix<T>::__internal_getRowIndications() {
return this->rowIndications;
}
/*!
* Returns a reference to the internal columnMapping vector
*/
template<typename T>
std::vector<std::pair<uint_fast64_t, T>> const& SparseMatrix<T>::__internal_getColumnsAndValues() {
return this->columnsAndValues;
}
template<typename T>
std::ostream& operator<<(std::ostream& out, SparseMatrix<T> const& matrix) {

9
src/storage/SparseMatrix.h

@ -583,6 +583,15 @@ namespace storm {
* @return size_t A hash value for this matrix.
*/
std::size_t hash() const;
/*!
* Returns a reference to the internal rowMapping vector
*/
std::vector<uint_fast64_t> const& __internal_getRowIndications();
/*!
* Returns a reference to the internal columnMapping vector
*/
std::vector<std::pair<uint_fast64_t, T>> const& __internal_getColumnsAndValues();
private:
// The number of rows of the matrix.

65
test/functional/solver/CudaPluginTest.cpp

@ -0,0 +1,65 @@
#include "gtest/gtest.h"
#include "src/storage/SparseMatrix.h"
#include "src/exceptions/InvalidStateException.h"
#include "src/exceptions/OutOfRangeException.h"
#include "storm-config.h"
#ifdef STORM_HAVE_CUDAFORSTORM
#include "cudaForStorm.h"
TEST(CudaPlugin, CreationWithDimensions) {
storm::storage::SparseMatrixBuilder<double> matrixBuilder(4, 4, 10);
ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 1, 1.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 3, -1.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 0, 8.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 1, 7.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 2, -5.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 3, 2.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 0, 2.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 1, 2.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 2, 4.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(2, 3, 4.0));
storm::storage::SparseMatrix<double> matrix;
ASSERT_NO_THROW(matrix = matrixBuilder.build());
ASSERT_EQ(4, matrix.getRowCount());
ASSERT_EQ(4, matrix.getColumnCount());
ASSERT_EQ(10, matrix.getEntryCount());
std::vector<double> x({0, 4, 1, 1});
std::vector<double> b({0, 0, 0, 0});
ASSERT_NO_THROW(basicValueIteration_spmv_uint64_double(matrix.getColumnCount(), matrix.__internal_getRowIndications(), matrix.__internal_getColumnsAndValues(), x, b));
ASSERT_EQ(b.at(0), 3);
ASSERT_EQ(b.at(1), 25);
ASSERT_EQ(b.at(2), 16);
ASSERT_EQ(b.at(3), 0);
}
TEST(CudaPlugin, VerySmall) {
storm::storage::SparseMatrixBuilder<double> matrixBuilder(2, 2, 2);
ASSERT_NO_THROW(matrixBuilder.addNextValue(0, 0, 1.0));
ASSERT_NO_THROW(matrixBuilder.addNextValue(1, 1, 2.0));
storm::storage::SparseMatrix<double> matrix;
ASSERT_NO_THROW(matrix = matrixBuilder.build());
ASSERT_EQ(2, matrix.getRowCount());
ASSERT_EQ(2, matrix.getColumnCount());
ASSERT_EQ(2, matrix.getEntryCount());
std::vector<double> x({ 4.0, 8.0 });
std::vector<double> b({ 0.0, 0.0 });
ASSERT_NO_THROW(basicValueIteration_spmv_uint64_double(matrix.getColumnCount(), matrix.__internal_getRowIndications(), matrix.__internal_getColumnsAndValues(), x, b));
ASSERT_EQ(b.at(0), 4.0);
ASSERT_EQ(b.at(1), 16.0);
}
#endif
Loading…
Cancel
Save