diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.cu b/resources/cudaForStorm/srcCuda/basicValueIteration.cu index 33ee6218c..6e84d5e70 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu @@ -57,10 +57,11 @@ void exploadVector(std::vector> const& inputVect } template -void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueType const precision, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { +bool basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueType const precision, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices, size_t& iterationCount) { //std::vector matrixColumnIndices; //std::vector matrixValues; //exploadVector(columnIndicesAndValues, matrixColumnIndices, matrixValues); + bool errorOccured = false; IndexType* device_matrixRowIndices = nullptr; IndexType* device_matrixColIndicesAndValues = nullptr; @@ -85,12 +86,13 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaError_t cudaMallocResult; bool converged = false; - uint_fast64_t iterationCount = 0; + iterationCount = 0; CUDA_CHECK_ALL_ERRORS(); cudaMallocResult = cudaMalloc(reinterpret_cast(&device_matrixRowIndices), sizeof(IndexType) * (matrixRowCount + 1)); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Matrix Row Indices, Error Code " << cudaMallocResult << "." << std::endl; + errorOccured = true; goto cleanup; } @@ -98,6 +100,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaMallocResult = cudaMalloc(reinterpret_cast(&device_matrixColIndicesAndValues), sizeof(IndexType) * matrixNnzCount + sizeof(ValueType) * matrixNnzCount); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Matrix Column Indices and Values, Error Code " << cudaMallocResult << "." << std::endl; + errorOccured = true; goto cleanup; } @@ -105,6 +108,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaMallocResult = cudaMalloc(reinterpret_cast(&device_x), sizeof(ValueType) * matrixColCount); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Vector x, Error Code " << cudaMallocResult << "." << std::endl; + errorOccured = true; goto cleanup; } @@ -112,6 +116,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaMallocResult = cudaMalloc(reinterpret_cast(&device_xSwap), sizeof(ValueType) * matrixColCount); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Vector x swap, Error Code " << cudaMallocResult << "." << std::endl; + errorOccured = true; goto cleanup; } @@ -119,6 +124,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaMallocResult = cudaMalloc(reinterpret_cast(&device_b), sizeof(ValueType) * matrixRowCount); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Vector b, Error Code " << cudaMallocResult << "." << std::endl; + errorOccured = true; goto cleanup; } @@ -126,6 +132,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaMallocResult = cudaMalloc(reinterpret_cast(&device_multiplyResult), sizeof(ValueType) * matrixRowCount); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Vector multiplyResult, Error Code " << cudaMallocResult << "." << std::endl; + errorOccured = true; goto cleanup; } @@ -133,6 +140,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaMallocResult = cudaMalloc(reinterpret_cast(&device_nondeterministicChoiceIndices), sizeof(IndexType) * (matrixColCount + 1)); if (cudaMallocResult != cudaSuccess) { std::cout << "Could not allocate memory for Nondeterministic Choice Indices, Error Code " << cudaMallocResult << "." << std::endl; + errorOccured = true; goto cleanup; } @@ -147,6 +155,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaCopyResult = cudaMemcpy(device_matrixRowIndices, matrixRowIndices.data(), sizeof(IndexType) * (matrixRowCount + 1), cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Matrix Row Indices, Error Code " << cudaCopyResult << std::endl; + errorOccured = true; goto cleanup; } @@ -154,6 +163,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaCopyResult = cudaMemcpy(device_matrixColIndicesAndValues, columnIndicesAndValues.data(), (sizeof(IndexType) * matrixNnzCount) + (sizeof(ValueType) * matrixNnzCount), cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Matrix Column Indices and Values, Error Code " << cudaCopyResult << std::endl; + errorOccured = true; goto cleanup; } @@ -161,6 +171,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaCopyResult = cudaMemcpy(device_x, x.data(), sizeof(ValueType) * matrixColCount, cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Vector x, Error Code " << cudaCopyResult << std::endl; + errorOccured = true; goto cleanup; } @@ -169,6 +180,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaCopyResult = cudaMemset(device_xSwap, 0, sizeof(ValueType) * matrixColCount); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not zero the Swap Vector x, Error Code " << cudaCopyResult << std::endl; + errorOccured = true; goto cleanup; } @@ -176,6 +188,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaCopyResult = cudaMemcpy(device_b, b.data(), sizeof(ValueType) * matrixRowCount, cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl; + errorOccured = true; goto cleanup; } @@ -184,6 +197,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaCopyResult = cudaMemset(device_multiplyResult, 0, sizeof(ValueType) * matrixRowCount); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not zero the multiply Result, Error Code " << cudaCopyResult << std::endl; + errorOccured = true; goto cleanup; } @@ -191,6 +205,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaCopyResult = cudaMemcpy(device_nondeterministicChoiceIndices, nondeterministicChoiceIndices.data(), sizeof(IndexType) * (matrixColCount + 1), cudaMemcpyHostToDevice); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy data for Vector b, Error Code " << cudaCopyResult << std::endl; + errorOccured = true; goto cleanup; } @@ -232,6 +247,12 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy // Swap pointers, device_x always contains the most current result std::swap(device_x, device_xSwap); } + + if (!converged && (iterationCount == maxIterationCount)) { + iterationCount = 0; + errorOccured = true; + } + #ifdef DEBUG std::cout << "(DLL) Finished kernel execution." << std::endl; std::cout << "(DLL) Executed " << iterationCount << " of max. " << maxIterationCount << " Iterations." << std::endl; @@ -241,6 +262,7 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy cudaCopyResult = cudaMemcpy(x.data(), device_x, sizeof(ValueType) * matrixColCount, cudaMemcpyDeviceToHost); if (cudaCopyResult != cudaSuccess) { std::cout << "Could not copy back data for result vector x, Error Code " << cudaCopyResult << std::endl; + errorOccured = true; goto cleanup; } @@ -254,6 +276,7 @@ cleanup: cudaError_t cudaFreeResult = cudaFree(device_matrixRowIndices); if (cudaFreeResult != cudaSuccess) { std::cout << "Could not free Memory of Matrix Row Indices, Error Code " << cudaFreeResult << "." << std::endl; + errorOccured = true; } device_matrixRowIndices = nullptr; } @@ -261,6 +284,7 @@ cleanup: cudaError_t cudaFreeResult = cudaFree(device_matrixColIndicesAndValues); if (cudaFreeResult != cudaSuccess) { std::cout << "Could not free Memory of Matrix Column Indices and Values, Error Code " << cudaFreeResult << "." << std::endl; + errorOccured = true; } device_matrixColIndicesAndValues = nullptr; } @@ -268,6 +292,7 @@ cleanup: cudaError_t cudaFreeResult = cudaFree(device_x); if (cudaFreeResult != cudaSuccess) { std::cout << "Could not free Memory of Vector x, Error Code " << cudaFreeResult << "." << std::endl; + errorOccured = true; } device_x = nullptr; } @@ -275,6 +300,7 @@ cleanup: cudaError_t cudaFreeResult = cudaFree(device_xSwap); if (cudaFreeResult != cudaSuccess) { std::cout << "Could not free Memory of Vector x swap, Error Code " << cudaFreeResult << "." << std::endl; + errorOccured = true; } device_xSwap = nullptr; } @@ -282,6 +308,7 @@ cleanup: cudaError_t cudaFreeResult = cudaFree(device_b); if (cudaFreeResult != cudaSuccess) { std::cout << "Could not free Memory of Vector b, Error Code " << cudaFreeResult << "." << std::endl; + errorOccured = true; } device_b = nullptr; } @@ -289,6 +316,7 @@ cleanup: cudaError_t cudaFreeResult = cudaFree(device_multiplyResult); if (cudaFreeResult != cudaSuccess) { std::cout << "Could not free Memory of Vector multiplyResult, Error Code " << cudaFreeResult << "." << std::endl; + errorOccured = true; } device_multiplyResult = nullptr; } @@ -296,12 +324,15 @@ cleanup: cudaError_t cudaFreeResult = cudaFree(device_nondeterministicChoiceIndices); if (cudaFreeResult != cudaSuccess) { std::cout << "Could not free Memory of Nondeterministic Choice Indices, Error Code " << cudaFreeResult << "." << std::endl; + errorOccured = true; } device_nondeterministicChoiceIndices = nullptr; } #ifdef DEBUG std::cout << "(DLL) Finished cleanup." << std::endl; #endif + + return !errorOccured; } template @@ -705,19 +736,19 @@ void basicValueIteration_equalModuloPrecision_double_NonRelative(std::vector(x, y, maxElement); } -void basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { +bool basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices, size_t& iterationCount) { if (relativePrecisionCheck) { - basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices); + return basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices, iterationCount); } else { - basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices); + return basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices, iterationCount); } } -void basicValueIteration_mvReduce_uint64_double_maximize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { +bool basicValueIteration_mvReduce_uint64_double_maximize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices, size_t& iterationCount) { if (relativePrecisionCheck) { - basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices); + return basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices, iterationCount); } else { - basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices); + return basicValueIteration_mvReduce(maxIterationCount, precision, matrixRowIndices, columnIndicesAndValues, x, b, nondeterministicChoiceIndices, iterationCount); } } diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.h b/resources/cudaForStorm/srcCuda/basicValueIteration.h index 1f1f88060..728ba07f9 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.h +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.h @@ -83,8 +83,8 @@ template #endif cudaForStorm_EXPORT size_t basicValueIteration_mvReduce_uint64_double_calculateMemorySize(size_t const rowCount, size_t const rowGroupCount, size_t const nnzCount); -cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices); -cudaForStorm_EXPORT void basicValueIteration_mvReduce_uint64_double_maximize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices); +cudaForStorm_EXPORT bool basicValueIteration_mvReduce_uint64_double_minimize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices, size_t& iterationCount); +cudaForStorm_EXPORT bool basicValueIteration_mvReduce_uint64_double_maximize(uint_fast64_t const maxIterationCount, double const precision, bool const relativePrecisionCheck, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices, size_t& iterationCount); cudaForStorm_EXPORT void basicValueIteration_spmv_uint64_double(uint_fast64_t const matrixColCount, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector const& x, std::vector& b); cudaForStorm_EXPORT void basicValueIteration_addVectorsInplace_double(std::vector& a, std::vector const& b); cudaForStorm_EXPORT void basicValueIteration_reduceGroupedVector_uint64_double_minimize(std::vector const& groupedVector, std::vector const& grouping, std::vector& targetVector); diff --git a/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp b/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp index dce426ec2..ee2a59bc7 100644 --- a/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp +++ b/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp @@ -64,9 +64,9 @@ namespace storm { std::vector* currentX = nullptr; std::vector* swap = nullptr; - uint_fast64_t currentMaxLocalIterations = 0; - uint_fast64_t localIterations = 0; - uint_fast64_t globalIterations = 0; + size_t currentMaxLocalIterations = 0; + size_t localIterations = 0; + size_t globalIterations = 0; bool converged = true; // Iterate over all SCCs of the MDP as specified by the topological sort. This guarantees that an SCC is only @@ -128,13 +128,30 @@ namespace storm { LOG4CPLUS_INFO(logger, "We will allocate " << (sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size()) << " Bytes."); LOG4CPLUS_INFO(logger, "The CUDA Runtime Version is " << getRuntimeCudaVersion()); + bool result = false; + localIterations = 0; if (minimize) { - basicValueIteration_mvReduce_uint64_double_minimize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices); + result = basicValueIteration_mvReduce_uint64_double_minimize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices, localIterations); } else { - basicValueIteration_mvReduce_uint64_double_maximize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices); + result = basicValueIteration_mvReduce_uint64_double_maximize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices, localIterations); + } + LOG4CPLUS_INFO(logger, "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations on GPU."); + + if (!result) { + converged = false; + LOG4CPLUS_ERROR(logger, "An error occurred in the CUDA Plugin. Can not continue."); + throw storm::exceptions::InvalidStateException() << "An error occurred in the CUDA Plugin. Can not continue."; + } else { + converged = true; } - converged = true; + + // As the "number of iterations" of the full method is the maximum of the local iterations, we need to keep + // track of the maximum. + if (localIterations > currentMaxLocalIterations) { + currentMaxLocalIterations = localIterations; + } + #else LOG4CPLUS_ERROR(logger, "The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error!"); throw storm::exceptions::InvalidStateException() << "The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error!"; diff --git a/src/storage/SparseMatrix.cpp b/src/storage/SparseMatrix.cpp index 4ffcbe991..c300e6858 100644 --- a/src/storage/SparseMatrix.cpp +++ b/src/storage/SparseMatrix.cpp @@ -787,8 +787,8 @@ namespace storm { const_iterator ite; std::vector::const_iterator rowIterator = this->rowIndications.begin() + startRow; std::vector::const_iterator rowIteratorEnd = this->rowIndications.begin() + endRow; - typename std::vector::iterator resultIterator = result.begin() + startRow; - typename std::vector::iterator resultIteratorEnd = result.begin() + endRow; + std::vector::iterator resultIterator = result.begin() + startRow; + std::vector::iterator resultIteratorEnd = result.begin() + endRow; for (; resultIterator != resultIteratorEnd; ++rowIterator, ++resultIterator) { *resultIterator = storm::utility::constantZero(); diff --git a/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp b/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp index 2f0e35393..441c4a3bb 100644 --- a/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp +++ b/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp @@ -8,11 +8,7 @@ TEST(TopologicalValueIterationMdpPrctlModelCheckerTest, AsynchronousLeader) { storm::settings::Settings* s = storm::settings::Settings::getInstance(); - storm::parser::AutoParser parser(STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.tra", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.lab", "", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.trans.rew"); - - ASSERT_EQ(parser.getType(), storm::models::MDP); - - std::shared_ptr> mdp = parser.getModel>(); + std::shared_ptr> mdp = storm::parser::AutoParser::parseModel(STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.tra", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.lab", "", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.trans.rew")->as>(); ASSERT_EQ(mdp->getNumberOfStates(), 2095783ull); ASSERT_EQ(mdp->getNumberOfTransitions(), 7714385ull); @@ -79,11 +75,7 @@ TEST(TopologicalValueIterationMdpPrctlModelCheckerTest, Consensus) { // Increase the maximal number of iterations, because the solver does not converge otherwise. // This is done in the main cpp unit - storm::parser::AutoParser parser(STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.tra", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.lab", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.steps.state.rew", ""); - - ASSERT_EQ(parser.getType(), storm::models::MDP); - - std::shared_ptr> mdp = parser.getModel>(); + std::shared_ptr> mdp = storm::parser::AutoParser::parseModel(STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.tra", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.lab", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.steps.state.rew", "")->as>(); ASSERT_EQ(mdp->getNumberOfStates(), 63616ull); ASSERT_EQ(mdp->getNumberOfTransitions(), 213472ull);