Browse Source

Added debug output to CUDA Kernel.

Added a performance test for the CUDA stuff.


Former-commit-id: 9953befdea
tempestpy_adaptions
PBerger 11 years ago
parent
commit
d3f513b0a0
  1. 11
      resources/cudaForStorm/srcCuda/basicValueIteration.cu
  2. 80
      src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp
  3. 171
      test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp

11
resources/cudaForStorm/srcCuda/basicValueIteration.cu

@ -58,11 +58,10 @@ void exploadVector(std::vector<std::pair<IndexType, ValueType>> const& inputVect
template <bool Minimize, bool Relative, typename IndexType, typename ValueType>
void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueType const precision, std::vector<IndexType> const& matrixRowIndices, std::vector<std::pair<IndexType, ValueType>> const& columnIndicesAndValues, std::vector<ValueType>& x, std::vector<ValueType> const& b, std::vector<IndexType> const& nondeterministicChoiceIndices) {
std::vector<IndexType> matrixColumnIndices;
std::vector<ValueType> matrixValues;
exploadVector<IndexType, ValueType>(columnIndicesAndValues, matrixColumnIndices, matrixValues);
IndexType* device_matrixRowIndices = nullptr;
IndexType* device_matrixColIndices = nullptr;
ValueType* device_matrixValues = nullptr;
@ -72,10 +71,12 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy
ValueType* device_multiplyResult = nullptr;
IndexType* device_nondeterministicChoiceIndices = nullptr;
#ifdef DEBUG
std::cout.sync_with_stdio(true);
std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory()))*100 << "%)." << std::endl;
size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size() + sizeof(ValueType) * b.size() + sizeof(IndexType) * nondeterministicChoiceIndices.size();
std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl;
#endif
const IndexType matrixRowCount = matrixRowIndices.size() - 1;
const IndexType matrixColCount = nondeterministicChoiceIndices.size() - 1;
@ -237,7 +238,9 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy
// Swap pointers, device_x always contains the most current result
std::swap(device_x, device_xSwap);
}
#ifdef DEBUG
std::cout << "(DLL) Executed " << iterationCount << " of max. " << maxIterationCount << " Iterations." << std::endl;
#endif
// Get x back from the device
cudaCopyResult = cudaMemcpy(x.data(), device_x, sizeof(ValueType) * matrixColCount, cudaMemcpyDeviceToHost);
@ -311,17 +314,19 @@ void basicValueIteration_spmv(uint_fast64_t const matrixColCount, std::vector<In
std::vector<IndexType> matrixColumnIndices;
std::vector<ValueType> matrixValues;
exploadVector<IndexType, ValueType>(columnIndicesAndValues, matrixColumnIndices, matrixValues);
IndexType* device_matrixRowIndices = nullptr;
IndexType* device_matrixColIndices = nullptr;
ValueType* device_matrixValues = nullptr;
ValueType* device_x = nullptr;
ValueType* device_multiplyResult = nullptr;
#ifdef DEBUG
std::cout.sync_with_stdio(true);
std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory()))*100 << "%)." << std::endl;
size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size();
std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl;
#endif
const IndexType matrixRowCount = matrixRowIndices.size() - 1;
const IndexType matrixNnzCount = columnIndicesAndValues.size();

80
src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp

@ -8,6 +8,7 @@
#include "src/models/PseudoModel.h"
#include "src/storage/StronglyConnectedComponentDecomposition.h"
#include "src/exceptions/IllegalArgumentException.h"
#include "src/exceptions/InvalidStateException.h"
#include "log4cplus/logger.h"
#include "log4cplus/loggingmacros.h"
@ -83,7 +84,6 @@ namespace storm {
for (auto sccIndexIt = topologicalSort.begin(); sccIndexIt != topologicalSort.end() && converged; ++sccIndexIt) {
storm::storage::StateBlock const& scc = sccDecomposition[*sccIndexIt];
std::cout << "SCC Index: " << *sccIndexIt << std::endl;
// Generate a submatrix
storm::storage::BitVector subMatrixIndices(A.getColumnCount(), scc.cbegin(), scc.cend());
@ -125,7 +125,24 @@ namespace storm {
}
// For the current SCC, we need to perform value iteration until convergence.
#ifndef STORM_HAVE_CUDAFORSTORM
#ifdef STORM_HAVE_CUDAFORSTORM
if (!resetCudaDevice()) {
LOG4CPLUS_ERROR(logger, "Could not reset CUDA Device, can not use CUDA Equation Solver.");
throw storm::exceptions::InvalidStateException() << "Could not reset CUDA Device, can not use CUDA Equation Solver.";
}
LOG4CPLUS_INFO(logger, "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory())) * 100 << "%).");
LOG4CPLUS_INFO(logger, "We will allocate " << (sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size()) << " Bytes.");
LOG4CPLUS_INFO(logger, "The CUDA Runtime Version is " << getRuntimeCudaVersion());
std::vector<ValueType> copyX(*currentX);
if (minimize) {
basicValueIteration_mvReduce_uint64_double_minimize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, copyX, sccSubB, sccSubNondeterministicChoiceIndices);
}
else {
basicValueIteration_mvReduce_uint64_double_maximize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, copyX, sccSubB, sccSubNondeterministicChoiceIndices);
}
localIterations = 0;
converged = false;
while (!converged && localIterations < this->maximalNumberOfIterations) {
@ -139,7 +156,7 @@ namespace storm {
/*
Versus:
A.multiplyWithVector(*currentX, *multiplyResult);
storm::utility::vector::addVectorsInPlace(*multiplyResult, b);
storm::utility::vector::addVectorsInPlace(*multiplyResult, b);
*/
// Reduce the vector x' by applying min/max for all non-deterministic choices.
@ -162,22 +179,53 @@ namespace storm {
++localIterations;
++globalIterations;
}
std::cout << "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations." << std::endl;
#else
if (!resetCudaDevice()) {
std::cout << "Could not reset CUDA Device!" << std::endl;
}
std::cout << "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory())) * 100 << "%)." << std::endl;
size_t memSize = sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size();
std::cout << "We will allocate " << memSize << " Bytes." << std::endl;
std::cout << "The CUDA Runtime Version is " << getRuntimeCudaVersion() << std::endl;
LOG4CPLUS_INFO(logger, "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations.");
if (minimize) {
basicValueIteration_mvReduce_uint64_double_minimize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices);
uint_fast64_t diffCount = 0;
for (size_t i = 0; i < currentX->size(); ++i) {
if (currentX->at(i) != copyX.at(i)) {
LOG4CPLUS_WARN(logger, "CUDA solution differs on index " << i << " diff. " << std::abs(currentX->at(i) - copyX.at(i)) << ", CPU: " << currentX->at(i) << ", CUDA: " << copyX.at(i));
std::cout << "CUDA solution differs on index " << i << " diff. " << std::abs(currentX->at(i) - copyX.at(i)) << ", CPU: " << currentX->at(i) << ", CUDA: " << copyX.at(i) << std::endl;
}
}
else {
basicValueIteration_mvReduce_uint64_double_maximize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices);
#else
localIterations = 0;
converged = false;
while (!converged && localIterations < this->maximalNumberOfIterations) {
// Compute x' = A*x + b.
sccSubmatrix.multiplyWithVector(*currentX, sccMultiplyResult);
storm::utility::vector::addVectorsInPlace<ValueType>(sccMultiplyResult, sccSubB);
//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
/*
Versus:
A.multiplyWithVector(*currentX, *multiplyResult);
storm::utility::vector::addVectorsInPlace(*multiplyResult, b);
*/
// Reduce the vector x' by applying min/max for all non-deterministic choices.
if (minimize) {
storm::utility::vector::reduceVectorMin<ValueType>(sccMultiplyResult, *swap, sccSubNondeterministicChoiceIndices);
}
else {
storm::utility::vector::reduceVectorMax<ValueType>(sccMultiplyResult, *swap, sccSubNondeterministicChoiceIndices);
}
// Determine whether the method converged.
// TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
// running time. In fact, it is faster. This has to be investigated.
// converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
converged = storm::utility::vector::equalModuloPrecision<ValueType>(*currentX, *swap, this->precision, this->relative);
// Update environment variables.
std::swap(currentX, swap);
++localIterations;
++globalIterations;
}
LOG4CPLUS_INFO(logger, "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations.");
#endif
// The Result of this SCC has to be taken back into the main result vector

171
test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp

@ -0,0 +1,171 @@
#include "gtest/gtest.h"
#include "storm-config.h"
#include "src/settings/Settings.h"
#include "src/modelchecker/prctl/TopologicalValueIterationMdpPrctlModelChecker.h"
#include "src/solver/NativeNondeterministicLinearEquationSolver.h"
#include "src/parser/AutoParser.h"
TEST(TopologicalValueIterationMdpPrctlModelCheckerTest, AsynchronousLeader) {
storm::settings::Settings* s = storm::settings::Settings::getInstance();
storm::parser::AutoParser<double> parser(STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.tra", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.lab", "", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.trans.rew");
ASSERT_EQ(parser.getType(), storm::models::MDP);
std::shared_ptr<storm::models::Mdp<double>> mdp = parser.getModel<storm::models::Mdp<double>>();
ASSERT_EQ(mdp->getNumberOfStates(), 2095783ull);
ASSERT_EQ(mdp->getNumberOfTransitions(), 7714385ull);
storm::modelchecker::prctl::TopologicalValueIterationMdpPrctlModelChecker<double> mc(*mdp);
storm::property::prctl::Ap<double>* apFormula = new storm::property::prctl::Ap<double>("elected");
storm::property::prctl::Eventually<double>* eventuallyFormula = new storm::property::prctl::Eventually<double>(apFormula);
storm::property::prctl::ProbabilisticNoBoundOperator<double>* probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, true);
std::vector<double> result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[0] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("elected");
eventuallyFormula = new storm::property::prctl::Eventually<double>(apFormula);
probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, false);
result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[0] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("elected");
storm::property::prctl::BoundedEventually<double>* boundedEventuallyFormula = new storm::property::prctl::BoundedEventually<double>(apFormula, 25);
probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(boundedEventuallyFormula, true);
result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[0] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("elected");
boundedEventuallyFormula = new storm::property::prctl::BoundedEventually<double>(apFormula, 25);
probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(boundedEventuallyFormula, false);
result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[0] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("elected");
storm::property::prctl::ReachabilityReward<double>* reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward<double>(apFormula);
storm::property::prctl::RewardNoBoundOperator<double>* rewardFormula = new storm::property::prctl::RewardNoBoundOperator<double>(reachabilityRewardFormula, true);
result = mc.checkNoBoundOperator(*rewardFormula);
ASSERT_LT(std::abs(result[0] - 6.172433512), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete rewardFormula;
apFormula = new storm::property::prctl::Ap<double>("elected");
reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward<double>(apFormula);
rewardFormula = new storm::property::prctl::RewardNoBoundOperator<double>(reachabilityRewardFormula, false);
result = mc.checkNoBoundOperator(*rewardFormula);
ASSERT_LT(std::abs(result[0] - 6.1724344), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete rewardFormula;
}
TEST(TopologicalValueIterationMdpPrctlModelCheckerTest, Consensus) {
storm::settings::Settings* s = storm::settings::Settings::getInstance();
// Increase the maximal number of iterations, because the solver does not converge otherwise.
// This is done in the main cpp unit
storm::parser::AutoParser<double> parser(STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.tra", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.lab", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.steps.state.rew", "");
ASSERT_EQ(parser.getType(), storm::models::MDP);
std::shared_ptr<storm::models::Mdp<double>> mdp = parser.getModel<storm::models::Mdp<double>>();
ASSERT_EQ(mdp->getNumberOfStates(), 63616ull);
ASSERT_EQ(mdp->getNumberOfTransitions(), 213472ull);
storm::modelchecker::prctl::TopologicalValueIterationMdpPrctlModelChecker<double> mc(*mdp);
storm::property::prctl::Ap<double>* apFormula = new storm::property::prctl::Ap<double>("finished");
storm::property::prctl::Eventually<double>* eventuallyFormula = new storm::property::prctl::Eventually<double>(apFormula);
storm::property::prctl::ProbabilisticNoBoundOperator<double>* probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, true);
std::vector<double> result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[31168] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("finished");
storm::property::prctl::Ap<double>* apFormula2 = new storm::property::prctl::Ap<double>("all_coins_equal_0");
storm::property::prctl::And<double>* andFormula = new storm::property::prctl::And<double>(apFormula, apFormula2);
eventuallyFormula = new storm::property::prctl::Eventually<double>(andFormula);
probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, true);
result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[31168] - 0.4374282832), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("finished");
apFormula2 = new storm::property::prctl::Ap<double>("all_coins_equal_1");
andFormula = new storm::property::prctl::And<double>(apFormula, apFormula2);
eventuallyFormula = new storm::property::prctl::Eventually<double>(andFormula);
probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, false);
result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[31168] - 0.5293286369), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("finished");
apFormula2 = new storm::property::prctl::Ap<double>("agree");
storm::property::prctl::Not<double>* notFormula = new storm::property::prctl::Not<double>(apFormula2);
andFormula = new storm::property::prctl::And<double>(apFormula, notFormula);
eventuallyFormula = new storm::property::prctl::Eventually<double>(andFormula);
probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, false);
result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[31168] - 0.10414097), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("finished");
storm::property::prctl::BoundedEventually<double>* boundedEventuallyFormula = new storm::property::prctl::BoundedEventually<double>(apFormula, 50ull);
probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(boundedEventuallyFormula, true);
result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[31168] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("finished");
boundedEventuallyFormula = new storm::property::prctl::BoundedEventually<double>(apFormula, 50ull);
probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(boundedEventuallyFormula, false);
result = mc.checkNoBoundOperator(*probFormula);
ASSERT_LT(std::abs(result[31168] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete probFormula;
apFormula = new storm::property::prctl::Ap<double>("finished");
storm::property::prctl::ReachabilityReward<double>* reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward<double>(apFormula);
storm::property::prctl::RewardNoBoundOperator<double>* rewardFormula = new storm::property::prctl::RewardNoBoundOperator<double>(reachabilityRewardFormula, true);
result = mc.checkNoBoundOperator(*rewardFormula);
ASSERT_LT(std::abs(result[31168] - 1725.593313), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete rewardFormula;
apFormula = new storm::property::prctl::Ap<double>("finished");
reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward<double>(apFormula);
rewardFormula = new storm::property::prctl::RewardNoBoundOperator<double>(reachabilityRewardFormula, false);
result = mc.checkNoBoundOperator(*rewardFormula);
ASSERT_LT(std::abs(result[31168] - 2183.142422), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
delete rewardFormula;
}
Loading…
Cancel
Save