From d3f513b0a0a62484e438bbd157d1241fb734deba Mon Sep 17 00:00:00 2001 From: PBerger Date: Sat, 15 Mar 2014 18:28:56 +0100 Subject: [PATCH] Added debug output to CUDA Kernel. Added a performance test for the CUDA stuff. Former-commit-id: 9953befdea6d5235fe6dc8cec95028fcc062d46c --- .../srcCuda/basicValueIteration.cu | 11 +- ...onNondeterministicLinearEquationSolver.cpp | 80 ++++++-- ...ValueIterationMdpPrctlModelCheckerTest.cpp | 171 ++++++++++++++++++ 3 files changed, 243 insertions(+), 19 deletions(-) create mode 100644 test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp diff --git a/resources/cudaForStorm/srcCuda/basicValueIteration.cu b/resources/cudaForStorm/srcCuda/basicValueIteration.cu index 25541adbc..d3c412ddf 100644 --- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu +++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu @@ -58,11 +58,10 @@ void exploadVector(std::vector> const& inputVect template void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueType const precision, std::vector const& matrixRowIndices, std::vector> const& columnIndicesAndValues, std::vector& x, std::vector const& b, std::vector const& nondeterministicChoiceIndices) { - std::vector matrixColumnIndices; std::vector matrixValues; exploadVector(columnIndicesAndValues, matrixColumnIndices, matrixValues); - + IndexType* device_matrixRowIndices = nullptr; IndexType* device_matrixColIndices = nullptr; ValueType* device_matrixValues = nullptr; @@ -72,10 +71,12 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy ValueType* device_multiplyResult = nullptr; IndexType* device_nondeterministicChoiceIndices = nullptr; +#ifdef DEBUG std::cout.sync_with_stdio(true); std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast(getFreeCudaMemory()) / static_cast(getTotalCudaMemory()))*100 << "%)." << std::endl; size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size() + sizeof(ValueType) * b.size() + sizeof(IndexType) * nondeterministicChoiceIndices.size(); std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl; +#endif const IndexType matrixRowCount = matrixRowIndices.size() - 1; const IndexType matrixColCount = nondeterministicChoiceIndices.size() - 1; @@ -237,7 +238,9 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy // Swap pointers, device_x always contains the most current result std::swap(device_x, device_xSwap); } +#ifdef DEBUG std::cout << "(DLL) Executed " << iterationCount << " of max. " << maxIterationCount << " Iterations." << std::endl; +#endif // Get x back from the device cudaCopyResult = cudaMemcpy(x.data(), device_x, sizeof(ValueType) * matrixColCount, cudaMemcpyDeviceToHost); @@ -311,17 +314,19 @@ void basicValueIteration_spmv(uint_fast64_t const matrixColCount, std::vector matrixColumnIndices; std::vector matrixValues; exploadVector(columnIndicesAndValues, matrixColumnIndices, matrixValues); - + IndexType* device_matrixRowIndices = nullptr; IndexType* device_matrixColIndices = nullptr; ValueType* device_matrixValues = nullptr; ValueType* device_x = nullptr; ValueType* device_multiplyResult = nullptr; +#ifdef DEBUG std::cout.sync_with_stdio(true); std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast(getFreeCudaMemory()) / static_cast(getTotalCudaMemory()))*100 << "%)." << std::endl; size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size(); std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl; +#endif const IndexType matrixRowCount = matrixRowIndices.size() - 1; const IndexType matrixNnzCount = columnIndicesAndValues.size(); diff --git a/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp b/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp index 14f94f807..f50e8caf0 100644 --- a/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp +++ b/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp @@ -8,6 +8,7 @@ #include "src/models/PseudoModel.h" #include "src/storage/StronglyConnectedComponentDecomposition.h" #include "src/exceptions/IllegalArgumentException.h" +#include "src/exceptions/InvalidStateException.h" #include "log4cplus/logger.h" #include "log4cplus/loggingmacros.h" @@ -83,7 +84,6 @@ namespace storm { for (auto sccIndexIt = topologicalSort.begin(); sccIndexIt != topologicalSort.end() && converged; ++sccIndexIt) { storm::storage::StateBlock const& scc = sccDecomposition[*sccIndexIt]; - std::cout << "SCC Index: " << *sccIndexIt << std::endl; // Generate a submatrix storm::storage::BitVector subMatrixIndices(A.getColumnCount(), scc.cbegin(), scc.cend()); @@ -125,7 +125,24 @@ namespace storm { } // For the current SCC, we need to perform value iteration until convergence. -#ifndef STORM_HAVE_CUDAFORSTORM +#ifdef STORM_HAVE_CUDAFORSTORM + if (!resetCudaDevice()) { + LOG4CPLUS_ERROR(logger, "Could not reset CUDA Device, can not use CUDA Equation Solver."); + throw storm::exceptions::InvalidStateException() << "Could not reset CUDA Device, can not use CUDA Equation Solver."; + } + + LOG4CPLUS_INFO(logger, "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast(getFreeCudaMemory()) / static_cast(getTotalCudaMemory())) * 100 << "%)."); + LOG4CPLUS_INFO(logger, "We will allocate " << (sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size()) << " Bytes."); + LOG4CPLUS_INFO(logger, "The CUDA Runtime Version is " << getRuntimeCudaVersion()); + + std::vector copyX(*currentX); + if (minimize) { + basicValueIteration_mvReduce_uint64_double_minimize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, copyX, sccSubB, sccSubNondeterministicChoiceIndices); + } + else { + basicValueIteration_mvReduce_uint64_double_maximize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, copyX, sccSubB, sccSubNondeterministicChoiceIndices); + } + localIterations = 0; converged = false; while (!converged && localIterations < this->maximalNumberOfIterations) { @@ -139,7 +156,7 @@ namespace storm { /* Versus: A.multiplyWithVector(*currentX, *multiplyResult); - storm::utility::vector::addVectorsInPlace(*multiplyResult, b); + storm::utility::vector::addVectorsInPlace(*multiplyResult, b); */ // Reduce the vector x' by applying min/max for all non-deterministic choices. @@ -162,22 +179,53 @@ namespace storm { ++localIterations; ++globalIterations; } - std::cout << "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations." << std::endl; -#else - if (!resetCudaDevice()) { - std::cout << "Could not reset CUDA Device!" << std::endl; - } - std::cout << "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast(getFreeCudaMemory()) / static_cast(getTotalCudaMemory())) * 100 << "%)." << std::endl; - size_t memSize = sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size(); - std::cout << "We will allocate " << memSize << " Bytes." << std::endl; - std::cout << "The CUDA Runtime Version is " << getRuntimeCudaVersion() << std::endl; + LOG4CPLUS_INFO(logger, "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations."); - if (minimize) { - basicValueIteration_mvReduce_uint64_double_minimize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices); + uint_fast64_t diffCount = 0; + for (size_t i = 0; i < currentX->size(); ++i) { + if (currentX->at(i) != copyX.at(i)) { + LOG4CPLUS_WARN(logger, "CUDA solution differs on index " << i << " diff. " << std::abs(currentX->at(i) - copyX.at(i)) << ", CPU: " << currentX->at(i) << ", CUDA: " << copyX.at(i)); + std::cout << "CUDA solution differs on index " << i << " diff. " << std::abs(currentX->at(i) - copyX.at(i)) << ", CPU: " << currentX->at(i) << ", CUDA: " << copyX.at(i) << std::endl; + } } - else { - basicValueIteration_mvReduce_uint64_double_maximize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices); +#else + localIterations = 0; + converged = false; + while (!converged && localIterations < this->maximalNumberOfIterations) { + // Compute x' = A*x + b. + sccSubmatrix.multiplyWithVector(*currentX, sccMultiplyResult); + storm::utility::vector::addVectorsInPlace(sccMultiplyResult, sccSubB); + + //A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult); + //storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b); + + /* + Versus: + A.multiplyWithVector(*currentX, *multiplyResult); + storm::utility::vector::addVectorsInPlace(*multiplyResult, b); + */ + + // Reduce the vector x' by applying min/max for all non-deterministic choices. + if (minimize) { + storm::utility::vector::reduceVectorMin(sccMultiplyResult, *swap, sccSubNondeterministicChoiceIndices); + } + else { + storm::utility::vector::reduceVectorMax(sccMultiplyResult, *swap, sccSubNondeterministicChoiceIndices); + } + + // Determine whether the method converged. + // TODO: It seems that the equalModuloPrecision call that compares all values should have a higher + // running time. In fact, it is faster. This has to be investigated. + // converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative); + converged = storm::utility::vector::equalModuloPrecision(*currentX, *swap, this->precision, this->relative); + + // Update environment variables. + std::swap(currentX, swap); + + ++localIterations; + ++globalIterations; } + LOG4CPLUS_INFO(logger, "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations."); #endif // The Result of this SCC has to be taken back into the main result vector diff --git a/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp b/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp new file mode 100644 index 000000000..2f0e35393 --- /dev/null +++ b/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp @@ -0,0 +1,171 @@ +#include "gtest/gtest.h" +#include "storm-config.h" + +#include "src/settings/Settings.h" +#include "src/modelchecker/prctl/TopologicalValueIterationMdpPrctlModelChecker.h" +#include "src/solver/NativeNondeterministicLinearEquationSolver.h" +#include "src/parser/AutoParser.h" + +TEST(TopologicalValueIterationMdpPrctlModelCheckerTest, AsynchronousLeader) { + storm::settings::Settings* s = storm::settings::Settings::getInstance(); + storm::parser::AutoParser parser(STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.tra", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.lab", "", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.trans.rew"); + + ASSERT_EQ(parser.getType(), storm::models::MDP); + + std::shared_ptr> mdp = parser.getModel>(); + + ASSERT_EQ(mdp->getNumberOfStates(), 2095783ull); + ASSERT_EQ(mdp->getNumberOfTransitions(), 7714385ull); + + storm::modelchecker::prctl::TopologicalValueIterationMdpPrctlModelChecker mc(*mdp); + + storm::property::prctl::Ap* apFormula = new storm::property::prctl::Ap("elected"); + storm::property::prctl::Eventually* eventuallyFormula = new storm::property::prctl::Eventually(apFormula); + storm::property::prctl::ProbabilisticNoBoundOperator* probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(eventuallyFormula, true); + + std::vector result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[0] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("elected"); + eventuallyFormula = new storm::property::prctl::Eventually(apFormula); + probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(eventuallyFormula, false); + + result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[0] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("elected"); + storm::property::prctl::BoundedEventually* boundedEventuallyFormula = new storm::property::prctl::BoundedEventually(apFormula, 25); + probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(boundedEventuallyFormula, true); + + result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[0] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("elected"); + boundedEventuallyFormula = new storm::property::prctl::BoundedEventually(apFormula, 25); + probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(boundedEventuallyFormula, false); + + result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[0] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("elected"); + storm::property::prctl::ReachabilityReward* reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward(apFormula); + storm::property::prctl::RewardNoBoundOperator* rewardFormula = new storm::property::prctl::RewardNoBoundOperator(reachabilityRewardFormula, true); + + result = mc.checkNoBoundOperator(*rewardFormula); + + ASSERT_LT(std::abs(result[0] - 6.172433512), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete rewardFormula; + + apFormula = new storm::property::prctl::Ap("elected"); + reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward(apFormula); + rewardFormula = new storm::property::prctl::RewardNoBoundOperator(reachabilityRewardFormula, false); + + result = mc.checkNoBoundOperator(*rewardFormula); + + ASSERT_LT(std::abs(result[0] - 6.1724344), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete rewardFormula; +} + +TEST(TopologicalValueIterationMdpPrctlModelCheckerTest, Consensus) { + storm::settings::Settings* s = storm::settings::Settings::getInstance(); + // Increase the maximal number of iterations, because the solver does not converge otherwise. + // This is done in the main cpp unit + + storm::parser::AutoParser parser(STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.tra", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.lab", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.steps.state.rew", ""); + + ASSERT_EQ(parser.getType(), storm::models::MDP); + + std::shared_ptr> mdp = parser.getModel>(); + + ASSERT_EQ(mdp->getNumberOfStates(), 63616ull); + ASSERT_EQ(mdp->getNumberOfTransitions(), 213472ull); + + storm::modelchecker::prctl::TopologicalValueIterationMdpPrctlModelChecker mc(*mdp); + + storm::property::prctl::Ap* apFormula = new storm::property::prctl::Ap("finished"); + storm::property::prctl::Eventually* eventuallyFormula = new storm::property::prctl::Eventually(apFormula); + storm::property::prctl::ProbabilisticNoBoundOperator* probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(eventuallyFormula, true); + + std::vector result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[31168] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("finished"); + storm::property::prctl::Ap* apFormula2 = new storm::property::prctl::Ap("all_coins_equal_0"); + storm::property::prctl::And* andFormula = new storm::property::prctl::And(apFormula, apFormula2); + eventuallyFormula = new storm::property::prctl::Eventually(andFormula); + probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(eventuallyFormula, true); + + result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[31168] - 0.4374282832), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("finished"); + apFormula2 = new storm::property::prctl::Ap("all_coins_equal_1"); + andFormula = new storm::property::prctl::And(apFormula, apFormula2); + eventuallyFormula = new storm::property::prctl::Eventually(andFormula); + probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(eventuallyFormula, false); + + result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[31168] - 0.5293286369), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("finished"); + apFormula2 = new storm::property::prctl::Ap("agree"); + storm::property::prctl::Not* notFormula = new storm::property::prctl::Not(apFormula2); + andFormula = new storm::property::prctl::And(apFormula, notFormula); + eventuallyFormula = new storm::property::prctl::Eventually(andFormula); + probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(eventuallyFormula, false); + + result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[31168] - 0.10414097), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("finished"); + storm::property::prctl::BoundedEventually* boundedEventuallyFormula = new storm::property::prctl::BoundedEventually(apFormula, 50ull); + probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(boundedEventuallyFormula, true); + + result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[31168] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("finished"); + boundedEventuallyFormula = new storm::property::prctl::BoundedEventually(apFormula, 50ull); + probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator(boundedEventuallyFormula, false); + + result = mc.checkNoBoundOperator(*probFormula); + + ASSERT_LT(std::abs(result[31168] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete probFormula; + + apFormula = new storm::property::prctl::Ap("finished"); + storm::property::prctl::ReachabilityReward* reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward(apFormula); + storm::property::prctl::RewardNoBoundOperator* rewardFormula = new storm::property::prctl::RewardNoBoundOperator(reachabilityRewardFormula, true); + + result = mc.checkNoBoundOperator(*rewardFormula); + + ASSERT_LT(std::abs(result[31168] - 1725.593313), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete rewardFormula; + + apFormula = new storm::property::prctl::Ap("finished"); + reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward(apFormula); + rewardFormula = new storm::property::prctl::RewardNoBoundOperator(reachabilityRewardFormula, false); + + result = mc.checkNoBoundOperator(*rewardFormula); + + ASSERT_LT(std::abs(result[31168] - 2183.142422), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble()); + delete rewardFormula; +} \ No newline at end of file