Added debug output to CUDA Kernel.

Added a performance test for the CUDA stuff. Former-commit-id: 9953befdea
12 years ago · d3f513b0a0
3 changed files with 243 additions and 19 deletions
--- a/resources/cudaForStorm/srcCuda/basicValueIteration.cu
+++ b/resources/cudaForStorm/srcCuda/basicValueIteration.cu
@ -58,11 +58,10 @@ void exploadVector(std::vector<std::pair<IndexType, ValueType>> const& inputVect

 template <bool Minimize, bool Relative, typename IndexType, typename ValueType>
 void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueType const precision, std::vector<IndexType> const& matrixRowIndices, std::vector<std::pair<IndexType, ValueType>> const& columnIndicesAndValues, std::vector<ValueType>& x, std::vector<ValueType> const& b, std::vector<IndexType> const& nondeterministicChoiceIndices) {
-	
 	std::vector<IndexType> matrixColumnIndices;
 	std::vector<ValueType> matrixValues;
 	exploadVector<IndexType, ValueType>(columnIndicesAndValues, matrixColumnIndices, matrixValues);
-	
+
 	IndexType* device_matrixRowIndices = nullptr;
 	IndexType* device_matrixColIndices = nullptr;
 	ValueType* device_matrixValues = nullptr;
@ -72,10 +71,12 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy
 	ValueType* device_multiplyResult = nullptr;
 	IndexType* device_nondeterministicChoiceIndices = nullptr;

+#ifdef DEBUG
 	std::cout.sync_with_stdio(true);
 	std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory()))*100 << "%)." << std::endl; 
 	size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size() + sizeof(ValueType) * b.size() + sizeof(IndexType) * nondeterministicChoiceIndices.size();
 	std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl;
+#endif

 	const IndexType matrixRowCount = matrixRowIndices.size() - 1;
 	const IndexType matrixColCount = nondeterministicChoiceIndices.size() - 1;
@ -237,7 +238,9 @@ void basicValueIteration_mvReduce(uint_fast64_t const maxIterationCount, ValueTy
 		// Swap pointers, device_x always contains the most current result
 		std::swap(device_x, device_xSwap);
 	}
+#ifdef DEBUG
 	std::cout << "(DLL) Executed " << iterationCount << " of max. " << maxIterationCount << " Iterations." << std::endl;
+#endif

 	// Get x back from the device
 	cudaCopyResult = cudaMemcpy(x.data(), device_x, sizeof(ValueType) * matrixColCount, cudaMemcpyDeviceToHost);
@ -311,17 +314,19 @@ void basicValueIteration_spmv(uint_fast64_t const matrixColCount, std::vector<In
 	std::vector<IndexType> matrixColumnIndices;
 	std::vector<ValueType> matrixValues;
 	exploadVector<IndexType, ValueType>(columnIndicesAndValues, matrixColumnIndices, matrixValues);
-	
+
 	IndexType* device_matrixRowIndices = nullptr;
 	IndexType* device_matrixColIndices = nullptr;
 	ValueType* device_matrixValues = nullptr;
 	ValueType* device_x = nullptr;
 	ValueType* device_multiplyResult = nullptr;

+#ifdef DEBUG
 	std::cout.sync_with_stdio(true);
 	std::cout << "(DLL) Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory()))*100 << "%)." << std::endl; 
 	size_t memSize = sizeof(IndexType) * matrixRowIndices.size() + sizeof(IndexType) * columnIndicesAndValues.size() * 2 + sizeof(ValueType) * x.size() + sizeof(ValueType) * b.size();
 	std::cout << "(DLL) We will allocate " << memSize << " Bytes." << std::endl;
+#endif

 	const IndexType matrixRowCount = matrixRowIndices.size() - 1;
 	const IndexType matrixNnzCount = columnIndicesAndValues.size();
--- a/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp
+++ b/src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.cpp
@ -8,6 +8,7 @@
 #include "src/models/PseudoModel.h"
 #include "src/storage/StronglyConnectedComponentDecomposition.h"
 #include "src/exceptions/IllegalArgumentException.h"
+#include "src/exceptions/InvalidStateException.h"

 #include "log4cplus/logger.h"
 #include "log4cplus/loggingmacros.h"
@ -83,7 +84,6 @@ namespace storm {

 			for (auto sccIndexIt = topologicalSort.begin(); sccIndexIt != topologicalSort.end() && converged; ++sccIndexIt) {
 				storm::storage::StateBlock const& scc = sccDecomposition[*sccIndexIt];
-				std::cout << "SCC Index: " << *sccIndexIt << std::endl;

 				// Generate a submatrix
 				storm::storage::BitVector subMatrixIndices(A.getColumnCount(), scc.cbegin(), scc.cend());
@ -125,7 +125,24 @@ namespace storm {
 				}

 				// For the current SCC, we need to perform value iteration until convergence.
-#ifndef STORM_HAVE_CUDAFORSTORM
+#ifdef STORM_HAVE_CUDAFORSTORM
+				if (!resetCudaDevice()) {
+					LOG4CPLUS_ERROR(logger, "Could not reset CUDA Device, can not use CUDA Equation Solver.");
+					throw storm::exceptions::InvalidStateException() << "Could not reset CUDA Device, can not use CUDA Equation Solver.";
+				}
+
+				LOG4CPLUS_INFO(logger, "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory())) * 100 << "%).");
+				LOG4CPLUS_INFO(logger, "We will allocate " << (sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size()) << " Bytes.");
+				LOG4CPLUS_INFO(logger, "The CUDA Runtime Version is " << getRuntimeCudaVersion());
+				
+				std::vector<ValueType> copyX(*currentX);
+				if (minimize) {
+					basicValueIteration_mvReduce_uint64_double_minimize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, copyX, sccSubB, sccSubNondeterministicChoiceIndices);
+				}
+				else {
+					basicValueIteration_mvReduce_uint64_double_maximize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, copyX, sccSubB, sccSubNondeterministicChoiceIndices);
+				}
+
 				localIterations = 0;
 				converged = false;
 				while (!converged && localIterations < this->maximalNumberOfIterations) {
@ -139,7 +156,7 @@ namespace storm {
 					/*
 					Versus:
 					A.multiplyWithVector(*currentX, *multiplyResult);
-					storm::utility::vector::addVectorsInPlace(*multiplyResult, b);					
+					storm::utility::vector::addVectorsInPlace(*multiplyResult, b);
 					*/

 					// Reduce the vector x' by applying min/max for all non-deterministic choices.
@ -162,22 +179,53 @@ namespace storm {
 					++localIterations;
 					++globalIterations;
 				}
-				std::cout << "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations." << std::endl;
-#else
-				if (!resetCudaDevice()) {
-					std::cout << "Could not reset CUDA Device!" << std::endl;
-				}
-				std::cout << "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory())) * 100 << "%)." << std::endl;
-				size_t memSize = sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size();
-				std::cout << "We will allocate " << memSize << " Bytes." << std::endl;
-				std::cout << "The CUDA Runtime Version is " << getRuntimeCudaVersion() << std::endl;
+				LOG4CPLUS_INFO(logger, "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations.");

-				if (minimize) {
-					basicValueIteration_mvReduce_uint64_double_minimize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices);
+				uint_fast64_t diffCount = 0;
+				for (size_t i = 0; i < currentX->size(); ++i) {
+					if (currentX->at(i) != copyX.at(i)) {
+						LOG4CPLUS_WARN(logger, "CUDA solution differs on index " << i << " diff. " << std::abs(currentX->at(i) - copyX.at(i)) << ", CPU: " << currentX->at(i) << ", CUDA: " << copyX.at(i));
+						std::cout << "CUDA solution differs on index " << i << " diff. " << std::abs(currentX->at(i) - copyX.at(i)) << ", CPU: " << currentX->at(i) << ", CUDA: " << copyX.at(i) << std::endl;
+					}
 				}
-				else {
-					basicValueIteration_mvReduce_uint64_double_maximize(this->maximalNumberOfIterations, this->precision, this->relative, sccSubmatrix.rowIndications, sccSubmatrix.columnsAndValues, *currentX, sccSubB, sccSubNondeterministicChoiceIndices);
+#else
+				localIterations = 0;
+				converged = false;
+				while (!converged && localIterations < this->maximalNumberOfIterations) {
+					// Compute x' = A*x + b.
+					sccSubmatrix.multiplyWithVector(*currentX, sccMultiplyResult);
+					storm::utility::vector::addVectorsInPlace<ValueType>(sccMultiplyResult, sccSubB);
+
+					//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
+					//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
+
+					/*
+					Versus:
+					A.multiplyWithVector(*currentX, *multiplyResult);
+					storm::utility::vector::addVectorsInPlace(*multiplyResult, b);
+					*/
+
+					// Reduce the vector x' by applying min/max for all non-deterministic choices.
+					if (minimize) {
+						storm::utility::vector::reduceVectorMin<ValueType>(sccMultiplyResult, *swap, sccSubNondeterministicChoiceIndices);
+					}
+					else {
+						storm::utility::vector::reduceVectorMax<ValueType>(sccMultiplyResult, *swap, sccSubNondeterministicChoiceIndices);
+					}
+
+					// Determine whether the method converged.
+					// TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
+					// running time. In fact, it is faster. This has to be investigated.
+					// converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
+					converged = storm::utility::vector::equalModuloPrecision<ValueType>(*currentX, *swap, this->precision, this->relative);
+
+					// Update environment variables.
+					std::swap(currentX, swap);
+
+					++localIterations;
+					++globalIterations;
 				}
+				LOG4CPLUS_INFO(logger, "Executed " << localIterations << " of max. " << maximalNumberOfIterations << " Iterations.");
 #endif

 				// The Result of this SCC has to be taken back into the main result vector
--- a/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp
+++ b/test/performance/modelchecker/TopologicalValueIterationMdpPrctlModelCheckerTest.cpp
@ -0,0 +1,171 @@
+#include "gtest/gtest.h"
+#include "storm-config.h"
+
+#include "src/settings/Settings.h"
+#include "src/modelchecker/prctl/TopologicalValueIterationMdpPrctlModelChecker.h"
+#include "src/solver/NativeNondeterministicLinearEquationSolver.h"
+#include "src/parser/AutoParser.h"
+
+TEST(TopologicalValueIterationMdpPrctlModelCheckerTest, AsynchronousLeader) {
+	storm::settings::Settings* s = storm::settings::Settings::getInstance();
+	storm::parser::AutoParser<double> parser(STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.tra", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.lab", "", STORM_CPP_BASE_PATH "/examples/mdp/asynchronous_leader/leader7.trans.rew");
+
+	ASSERT_EQ(parser.getType(), storm::models::MDP);
+
+	std::shared_ptr<storm::models::Mdp<double>> mdp = parser.getModel<storm::models::Mdp<double>>();
+
+	ASSERT_EQ(mdp->getNumberOfStates(), 2095783ull);
+	ASSERT_EQ(mdp->getNumberOfTransitions(), 7714385ull);
+
+	storm::modelchecker::prctl::TopologicalValueIterationMdpPrctlModelChecker<double> mc(*mdp);
+
+	storm::property::prctl::Ap<double>* apFormula = new storm::property::prctl::Ap<double>("elected");
+	storm::property::prctl::Eventually<double>* eventuallyFormula = new storm::property::prctl::Eventually<double>(apFormula);
+	storm::property::prctl::ProbabilisticNoBoundOperator<double>* probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, true);
+
+	std::vector<double> result = mc.checkNoBoundOperator(*probFormula);
+
+	ASSERT_LT(std::abs(result[0] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+	delete probFormula;
+
+	apFormula = new storm::property::prctl::Ap<double>("elected");
+	eventuallyFormula = new storm::property::prctl::Eventually<double>(apFormula);
+	probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, false);
+
+	result = mc.checkNoBoundOperator(*probFormula);
+
+	ASSERT_LT(std::abs(result[0] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+	delete probFormula;
+
+	apFormula = new storm::property::prctl::Ap<double>("elected");
+	storm::property::prctl::BoundedEventually<double>* boundedEventuallyFormula = new storm::property::prctl::BoundedEventually<double>(apFormula, 25);
+	probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(boundedEventuallyFormula, true);
+
+	result = mc.checkNoBoundOperator(*probFormula);
+
+	ASSERT_LT(std::abs(result[0] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+	delete probFormula;
+
+	apFormula = new storm::property::prctl::Ap<double>("elected");
+	boundedEventuallyFormula = new storm::property::prctl::BoundedEventually<double>(apFormula, 25);
+	probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(boundedEventuallyFormula, false);
+
+	result = mc.checkNoBoundOperator(*probFormula);
+
+	ASSERT_LT(std::abs(result[0] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+	delete probFormula;
+
+	apFormula = new storm::property::prctl::Ap<double>("elected");
+	storm::property::prctl::ReachabilityReward<double>* reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward<double>(apFormula);
+	storm::property::prctl::RewardNoBoundOperator<double>* rewardFormula = new storm::property::prctl::RewardNoBoundOperator<double>(reachabilityRewardFormula, true);
+
+	result = mc.checkNoBoundOperator(*rewardFormula);
+
+	ASSERT_LT(std::abs(result[0] - 6.172433512), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+	delete rewardFormula;
+
+	apFormula = new storm::property::prctl::Ap<double>("elected");
+	reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward<double>(apFormula);
+	rewardFormula = new storm::property::prctl::RewardNoBoundOperator<double>(reachabilityRewardFormula, false);
+
+	result = mc.checkNoBoundOperator(*rewardFormula);
+
+	ASSERT_LT(std::abs(result[0] - 6.1724344), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+	delete rewardFormula;
+}
+
+TEST(TopologicalValueIterationMdpPrctlModelCheckerTest, Consensus) {
+	storm::settings::Settings* s = storm::settings::Settings::getInstance();
+    // Increase the maximal number of iterations, because the solver does not converge otherwise.
+	// This is done in the main cpp unit
+    
+	storm::parser::AutoParser<double> parser(STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.tra", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.lab", STORM_CPP_BASE_PATH "/examples/mdp/consensus/coin4_6.steps.state.rew", "");
+    
+	ASSERT_EQ(parser.getType(), storm::models::MDP);
+    
+	std::shared_ptr<storm::models::Mdp<double>> mdp = parser.getModel<storm::models::Mdp<double>>();
+    
+	ASSERT_EQ(mdp->getNumberOfStates(), 63616ull);
+	ASSERT_EQ(mdp->getNumberOfTransitions(), 213472ull);
+    
+	storm::modelchecker::prctl::TopologicalValueIterationMdpPrctlModelChecker<double> mc(*mdp);
+    
+    storm::property::prctl::Ap<double>* apFormula = new storm::property::prctl::Ap<double>("finished");
+	storm::property::prctl::Eventually<double>* eventuallyFormula = new storm::property::prctl::Eventually<double>(apFormula);
+	storm::property::prctl::ProbabilisticNoBoundOperator<double>* probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, true);
+    
+	std::vector<double> result = mc.checkNoBoundOperator(*probFormula);
+    
+	ASSERT_LT(std::abs(result[31168] - 1.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+    delete probFormula;
+    
+    apFormula = new storm::property::prctl::Ap<double>("finished");
+    storm::property::prctl::Ap<double>* apFormula2 = new storm::property::prctl::Ap<double>("all_coins_equal_0");
+    storm::property::prctl::And<double>* andFormula = new storm::property::prctl::And<double>(apFormula, apFormula2);
+	eventuallyFormula = new storm::property::prctl::Eventually<double>(andFormula);
+	probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, true);
+    
+	result = mc.checkNoBoundOperator(*probFormula);
+
+	ASSERT_LT(std::abs(result[31168] - 0.4374282832), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+    delete probFormula;
+    
+    apFormula = new storm::property::prctl::Ap<double>("finished");
+    apFormula2 = new storm::property::prctl::Ap<double>("all_coins_equal_1");
+    andFormula = new storm::property::prctl::And<double>(apFormula, apFormula2);
+    eventuallyFormula = new storm::property::prctl::Eventually<double>(andFormula);
+	probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, false);
+    
+	result = mc.checkNoBoundOperator(*probFormula);
+    
+	ASSERT_LT(std::abs(result[31168] - 0.5293286369), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+    delete probFormula;
+    
+    apFormula = new storm::property::prctl::Ap<double>("finished");
+    apFormula2 = new storm::property::prctl::Ap<double>("agree");
+    storm::property::prctl::Not<double>* notFormula = new storm::property::prctl::Not<double>(apFormula2);
+    andFormula = new storm::property::prctl::And<double>(apFormula, notFormula);
+    eventuallyFormula = new storm::property::prctl::Eventually<double>(andFormula);
+	probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(eventuallyFormula, false);
+    
+	result = mc.checkNoBoundOperator(*probFormula);
+    
+	ASSERT_LT(std::abs(result[31168] - 0.10414097), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+    delete probFormula;
+    
+    apFormula = new storm::property::prctl::Ap<double>("finished");
+	storm::property::prctl::BoundedEventually<double>* boundedEventuallyFormula = new storm::property::prctl::BoundedEventually<double>(apFormula, 50ull);
+	probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(boundedEventuallyFormula, true);
+    
+	result = mc.checkNoBoundOperator(*probFormula);
+    
+	ASSERT_LT(std::abs(result[31168] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+    delete probFormula;
+    
+    apFormula = new storm::property::prctl::Ap<double>("finished");
+	boundedEventuallyFormula = new storm::property::prctl::BoundedEventually<double>(apFormula, 50ull);
+	probFormula = new storm::property::prctl::ProbabilisticNoBoundOperator<double>(boundedEventuallyFormula, false);
+    
+	result = mc.checkNoBoundOperator(*probFormula);
+    
+	ASSERT_LT(std::abs(result[31168] - 0.0), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+    delete probFormula;
+
+    apFormula = new storm::property::prctl::Ap<double>("finished");
+	storm::property::prctl::ReachabilityReward<double>* reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward<double>(apFormula);
+	storm::property::prctl::RewardNoBoundOperator<double>* rewardFormula = new storm::property::prctl::RewardNoBoundOperator<double>(reachabilityRewardFormula, true);
+    
+	result = mc.checkNoBoundOperator(*rewardFormula);
+    
+	ASSERT_LT(std::abs(result[31168] - 1725.593313), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+	delete rewardFormula;
+    
+	apFormula = new storm::property::prctl::Ap<double>("finished");
+	reachabilityRewardFormula = new storm::property::prctl::ReachabilityReward<double>(apFormula);
+	rewardFormula = new storm::property::prctl::RewardNoBoundOperator<double>(reachabilityRewardFormula, false);
+    
+	result = mc.checkNoBoundOperator(*rewardFormula);
+    
+	ASSERT_LT(std::abs(result[31168] - 2183.142422), s->getOptionByLongName("precision").getArgument(0).getValueAsDouble());
+	delete rewardFormula;
+}