@ -1,6 +1,7 @@
# include "src/solver/TopologicalValueIterationNondeterministicLinearEquationSolver.h"
# include <utility>
# include <chrono>
# include "src/settings/Settings.h"
# include "src/utility/vector.h"
@ -42,386 +43,283 @@ namespace storm {
NondeterministicLinearEquationSolver < ValueType > * TopologicalValueIterationNondeterministicLinearEquationSolver < ValueType > : : clone ( ) const {
return new TopologicalValueIterationNondeterministicLinearEquationSolver < ValueType > ( * this ) ;
}
template < typename ValueType >
void TopologicalValueIterationNondeterministicLinearEquationSolver < ValueType > : : solveEquationSystem ( bool minimize , storm : : storage : : SparseMatrix < ValueType > const & A , std : : vector < ValueType > & x , std : : vector < ValueType > const & b , std : : vector < ValueType > * multiplyResult , std : : vector < ValueType > * newX ) const {
# ifdef GPU_USE_FLOAT
# define __FORCE_FLOAT_CALCULATION true
# else
# define __FORCE_FLOAT_CALCULATION false
# endif
if ( __FORCE_FLOAT_CALCULATION & & ( sizeof ( ValueType ) = = sizeof ( double ) ) ) {
TopologicalValueIterationNondeterministicLinearEquationSolver < float > tvindles ( precision , maximalNumberOfIterations , relative ) ;
template < >
void TopologicalValueIterationNondeterministicLinearEquationSolver < float > : : solveEquationSystem ( bool minimize , storm : : storage : : SparseMatrix < float > const & A , std : : vector < float > & x , std : : vector < float > const & b , std : : vector < float > * multiplyResult , std : : vector < float > * newX ) const {
// For testing only
std : : cout < < " <<< Using CUDA-FLOAT Kernels >>> " < < std : : endl ;
LOG4CPLUS_INFO ( logger , " <<< Using CUDA-FLOAT Kernels >>> " ) ;
storm : : storage : : SparseMatrix < float > new_A = A . toValueType < float > ( ) ;
std : : vector < float > new_x = storm : : utility : : vector : : toValueType < float > ( x ) ;
std : : vector < float > const new_b = storm : : utility : : vector : : toValueType < float > ( b ) ;
// Now, we need to determine the SCCs of the MDP and perform a topological sort.
std : : vector < uint_fast64_t > const & nondeterministicChoiceIndices = A . getRowGroupIndices ( ) ;
storm : : models : : NonDeterministicMatrixBasedPseudoModel < float > pseudoModel ( A , nondeterministicChoiceIndices ) ;
storm : : storage : : StronglyConnectedComponentDecomposition < float > sccDecomposition ( pseudoModel , false , false ) ;
if ( sccDecomposition . size ( ) = = 0 ) {
LOG4CPLUS_ERROR ( logger , " Can not solve given Equation System as the SCC Decomposition returned no SCCs. " ) ;
throw storm : : exceptions : : IllegalArgumentException ( ) < < " Can not solve given Equation System as the SCC Decomposition returned no SCCs. " ;
}
tvindles . solveEquationSystem ( minimize , new_A , new_x , new_b , nullptr , nullptr ) ;
storm : : storage : : SparseMatrix < float > stronglyConnectedComponentsDependencyGraph = pseudoModel . extractPartitionDependencyGraph ( sccDecomposition ) ;
std : : vector < uint_fast64_t > topologicalSort = storm : : utility : : graph : : getTopologicalSort ( stronglyConnectedComponentsDependencyGraph ) ;
// Calculate the optimal distribution of sccs
std : : vector < std : : pair < bool , storm : : storage : : StateBlock > > optimalSccs = this - > getOptimalGroupingFromTopologicalSccDecomposition ( sccDecomposition , topologicalSort , A ) ;
LOG4CPLUS_INFO ( logger , " Optimized SCC Decomposition, originally " < < topologicalSort . size ( ) < < " SCCs, optimized to " < < optimalSccs . size ( ) < < " SCCs. " ) ;
std : : vector < float > * currentX = nullptr ;
std : : vector < float > * swap = nullptr ;
size_t currentMaxLocalIterations = 0 ;
size_t localIterations = 0 ;
size_t globalIterations = 0 ;
bool converged = true ;
// Iterate over all SCCs of the MDP as specified by the topological sort. This guarantees that an SCC is only
// solved after all SCCs it depends on have been solved.
int counter = 0 ;
for ( auto sccIndexIt = optimalSccs . cbegin ( ) ; sccIndexIt ! = optimalSccs . cend ( ) & & converged ; + + sccIndexIt ) {
bool const useGpu = sccIndexIt - > first ;
storm : : storage : : StateBlock const & scc = sccIndexIt - > second ;
// Generate a sub matrix
storm : : storage : : BitVector subMatrixIndices ( A . getColumnCount ( ) , scc . cbegin ( ) , scc . cend ( ) ) ;
storm : : storage : : SparseMatrix < float > sccSubmatrix = A . getSubmatrix ( true , subMatrixIndices , subMatrixIndices ) ;
std : : vector < float > sccSubB ( sccSubmatrix . getRowCount ( ) ) ;
storm : : utility : : vector : : selectVectorValues < float > ( sccSubB , subMatrixIndices , nondeterministicChoiceIndices , b ) ;
std : : vector < float > sccSubX ( sccSubmatrix . getColumnCount ( ) ) ;
std : : vector < float > sccSubXSwap ( sccSubmatrix . getColumnCount ( ) ) ;
std : : vector < float > sccMultiplyResult ( sccSubmatrix . getRowCount ( ) ) ;
// Prepare the pointers for swapping in the calculation
currentX = & sccSubX ;
swap = & sccSubXSwap ;
storm : : utility : : vector : : selectVectorValues < float > ( sccSubX , subMatrixIndices , x ) ; // x is getCols() large, where as b and multiplyResult are getRows() (nondet. choices times states)
std : : vector < uint_fast64_t > sccSubNondeterministicChoiceIndices ( sccSubmatrix . getColumnCount ( ) + 1 ) ;
sccSubNondeterministicChoiceIndices . at ( 0 ) = 0 ;
// Pre-process all dependent states
// Remove outgoing transitions and create the ChoiceIndices
uint_fast64_t innerIndex = 0 ;
uint_fast64_t outerIndex = 0 ;
for ( uint_fast64_t state : scc ) {
// Choice Indices
sccSubNondeterministicChoiceIndices . at ( outerIndex + 1 ) = sccSubNondeterministicChoiceIndices . at ( outerIndex ) + ( nondeterministicChoiceIndices [ state + 1 ] - nondeterministicChoiceIndices [ state ] ) ;
for ( auto rowGroupIt = nondeterministicChoiceIndices [ state ] ; rowGroupIt ! = nondeterministicChoiceIndices [ state + 1 ] ; + + rowGroupIt ) {
storm : : storage : : SparseMatrix < float > : : const_rows row = A . getRow ( rowGroupIt ) ;
for ( auto rowIt = row . begin ( ) ; rowIt ! = row . end ( ) ; + + rowIt ) {
if ( ! subMatrixIndices . get ( rowIt - > getColumn ( ) ) ) {
// This is an outgoing transition of a state in the SCC to a state not included in the SCC
// Subtracting Pr(tau) * x_other from b fixes that
sccSubB . at ( innerIndex ) = sccSubB . at ( innerIndex ) + ( rowIt - > getValue ( ) * x . at ( rowIt - > getColumn ( ) ) ) ;
}
}
+ + innerIndex ;
}
+ + outerIndex ;
for ( size_t i = 0 , size = new_x . size ( ) ; i < size ; + + i ) {
x . at ( i ) = new_x . at ( i ) ;
}
return ;
}
// For testing only
if ( sizeof ( ValueType ) = = sizeof ( double ) ) {
std : : cout < < " <<< Using CUDA-DOUBLE Kernels >>> " < < std : : endl ;
LOG4CPLUS_INFO ( logger , " <<< Using CUDA-DOUBLE Kernels >>> " ) ;
} else {
std : : cout < < " <<< Using CUDA-FLOAT Kernels >>> " < < std : : endl ;
LOG4CPLUS_INFO ( logger , " <<< Using CUDA-FLOAT Kernels >>> " ) ;
}
// For the current SCC, we need to perform value iteration until convergence.
if ( useGpu ) {
// Now, we need to determine the SCCs of the MDP and perform a topological sort.
std : : vector < uint_fast64_t > const & nondeterministicChoiceIndices = A . getRowGroupIndices ( ) ;
storm : : models : : NonDeterministicMatrixBasedPseudoModel < ValueType > const pseudoModel ( A , nondeterministicChoiceIndices ) ;
// Check if the decomposition is necessary
# ifdef STORM_HAVE_CUDAFORSTORM
if ( ! resetCudaDevice ( ) ) {
LOG4CPLUS_ERROR ( logger , " Could not reset CUDA Device, can not use CUDA Equation Solver. " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " Could not reset CUDA Device, can not use CUDA Equation Solver. " ;
}
//LOG4CPLUS_INFO(logger, "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory())) * 100 << "%).");
//LOG4CPLUS_INFO(logger, "We will allocate " << (sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size()) << " Bytes.");
//LOG4CPLUS_INFO(logger, "The CUDA Runtime Version is " << getRuntimeCudaVersion());
bool result = false ;
localIterations = 0 ;
if ( minimize ) {
result = basicValueIteration_mvReduce_uint64_float_minimize ( this - > maximalNumberOfIterations , this - > precision , this - > relative , sccSubmatrix . rowIndications , sccSubmatrix . columnsAndValues , * currentX , sccSubB , sccSubNondeterministicChoiceIndices , localIterations ) ;
} else {
result = basicValueIteration_mvReduce_uint64_float_maximize ( this - > maximalNumberOfIterations , this - > precision , this - > relative , sccSubmatrix . rowIndications , sccSubmatrix . columnsAndValues , * currentX , sccSubB , sccSubNondeterministicChoiceIndices , localIterations ) ;
}
LOG4CPLUS_INFO ( logger , " Executed " < < localIterations < < " of max. " < < maximalNumberOfIterations < < " Iterations on GPU. " ) ;
if ( ! result ) {
converged = false ;
LOG4CPLUS_ERROR ( logger , " An error occurred in the CUDA Plugin. Can not continue. " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " An error occurred in the CUDA Plugin. Can not continue. " ;
} else {
converged = true ;
}
// As the "number of iterations" of the full method is the maximum of the local iterations, we need to keep
// track of the maximum.
if ( localIterations > currentMaxLocalIterations ) {
currentMaxLocalIterations = localIterations ;
}
# define __USE_CUDAFORSTORM_OPT true
size_t const gpuSizeOfCompleteSystem = basicValueIteration_mvReduce_uint64_double_calculateMemorySize ( static_cast < size_t > ( A . getRowCount ( ) ) , nondeterministicChoiceIndices . size ( ) , static_cast < size_t > ( A . getEntryCount ( ) ) ) ;
size_t const cudaFreeMemory = static_cast < size_t > ( getFreeCudaMemory ( ) * 0.95 ) ;
# else
LOG4CPLUS_ERROR ( logger , " The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error! " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error! " ;
# define __USE_CUDAFORSTORM_OPT false
size_t const gpuSizeOfCompleteSystem = 0 ;
size_t const cudaFreeMemory = 0 ;
# endif
} else {
localIterations = 0 ;
converged = false ;
while ( ! converged & & localIterations < this - > maximalNumberOfIterations ) {
// Compute x' = A*x + b.
sccSubmatrix . multiplyWithVector ( * currentX , sccMultiplyResult ) ;
storm : : utility : : vector : : addVectorsInPlace < float > ( sccMultiplyResult , sccSubB ) ;
//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
std : : vector < std : : pair < bool , storm : : storage : : StateBlock > > sccDecomposition ;
if ( __USE_CUDAFORSTORM_OPT & & ( gpuSizeOfCompleteSystem < cudaFreeMemory ) ) {
// Dummy output for SCC Times
std : : cout < < " Computing the SCC Decomposition took 0ms " < < std : : endl ;
/*
Versus :
A . multiplyWithVector ( * currentX , * multiplyResult ) ;
storm : : utility : : vector : : addVectorsInPlace ( * multiplyResult , b ) ;
*/
// Reduce the vector x' by applying min/max for all non-deterministic choices.
if ( minimize ) {
storm : : utility : : vector : : reduceVectorMin < float > ( sccMultiplyResult , * swap , sccSubNondeterministicChoiceIndices ) ;
} else {
storm : : utility : : vector : : reduceVectorMax < float > ( sccMultiplyResult , * swap , sccSubNondeterministicChoiceIndices ) ;
}
// Determine whether the method converged.
// TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
// running time. In fact, it is faster. This has to be investigated.
// converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
converged = storm : : utility : : vector : : equalModuloPrecision < float > ( * currentX , * swap , this - > precision , this - > relative ) ;
// Update environment variables.
std : : swap ( currentX , swap ) ;
+ + localIterations ;
+ + globalIterations ;
}
LOG4CPLUS_INFO ( logger , " Executed " < < localIterations < < " of max. " < < maximalNumberOfIterations < < " Iterations. " ) ;
# ifdef STORM_HAVE_CUDAFORSTORM
if ( ! resetCudaDevice ( ) ) {
LOG4CPLUS_ERROR ( logger , " Could not reset CUDA Device, can not use CUDA Equation Solver. " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " Could not reset CUDA Device, can not use CUDA Equation Solver. " ;
}
// The Result of this SCC has to be taken back into the main result vector
innerIndex = 0 ;
for ( uint_fast64_t state : scc ) {
x . at ( state ) = currentX - > at ( innerIndex ) ;
+ + innerIndex ;
std : : chrono : : high_resolution_clock : : time_point calcStartTime = std : : chrono : : high_resolution_clock : : now ( ) ;
bool result = false ;
size_t globalIterations = 0 ;
if ( minimize ) {
result = __basicValueIteration_mvReduce_uint64_minimize < ValueType > ( this - > maximalNumberOfIterations , this - > precision , this - > relative , A . rowIndications , A . columnsAndValues , x , b , nondeterministicChoiceIndices , globalIterations ) ;
} else {
result = __basicValueIteration_mvReduce_uint64_maximize < ValueType > ( this - > maximalNumberOfIterations , this - > precision , this - > relative , A . rowIndications , A . columnsAndValues , x , b , nondeterministicChoiceIndices , globalIterations ) ;
}
LOG4CPLUS_INFO ( logger , " Executed " < < globalIterations < < " of max. " < < maximalNumberOfIterations < < " Iterations on GPU. " ) ;
// Since the pointers for swapping in the calculation point to temps they should not be valid anymore
currentX = nullptr ;
swap = nullptr ;
// As the "number of iterations" of the full method is the maximum of the local iterations, we need to keep
// track of the maximum.
if ( localIterations > currentMaxLocalIterations ) {
currentMaxLocalIterations = localIterations ;
bool converged = false ;
if ( ! result ) {
converged = false ;
LOG4CPLUS_ERROR ( logger , " An error occurred in the CUDA Plugin. Can not continue. " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " An error occurred in the CUDA Plugin. Can not continue. " ;
} else {
converged = true ;
}
}
// Check if the solver converged and issue a warning otherwise.
if ( converged ) {
LOG4CPLUS_INFO ( logger , " Iterative solver converged after " < < currentMaxLocalIterations < < " iterations. " ) ;
} else {
LOG4CPLUS_WARN ( logger , " Iterative solver did not converged after " < < currentMaxLocalIterations < < " iterations. " ) ;
}
}
template < typename ValueType >
void TopologicalValueIterationNondeterministicLinearEquationSolver < ValueType > : : solveEquationSystem ( bool minimize , storm : : storage : : SparseMatrix < ValueType > const & A , std : : vector < ValueType > & x , std : : vector < ValueType > const & b , std : : vector < ValueType > * multiplyResult , std : : vector < ValueType > * newX ) const {
# ifndef GPU_USE_DOUBLE
TopologicalValueIterationNondeterministicLinearEquationSolver < float > tvindles ( precision , maximalNumberOfIterations , relative ) ;
storm : : storage : : SparseMatrix < float > new_A = A . toValueType < float > ( ) ;
std : : vector < float > new_x = storm : : utility : : vector : : toValueType < float > ( x ) ;
std : : vector < float > const new_b = storm : : utility : : vector : : toValueType < float > ( b ) ;
std : : chrono : : high_resolution_clock : : time_point calcEndTime = std : : chrono : : high_resolution_clock : : now ( ) ;
std : : cout < < " Obtaining the fixpoint solution took " < < std : : chrono : : duration_cast < std : : chrono : : milliseconds > ( calcEndTime - calcStartTime ) . count ( ) < < " ms. " < < std : : endl ;
tvindles . solveEquationSystem ( minimize , new_A , new_x , new_b , nullptr , nullptr ) ;
std : : cout < < " Used a total of " < < globalIterations < < " iterations with a maximum of " < < globalIterations < < " iterations in a single block. " < < std : : endl ;
for ( size_t i = 0 , size = new_x . size ( ) ; i < size ; + + i ) {
x . at ( i ) = new_x . at ( i ) ;
}
// Check if the solver converged and issue a warning otherwise.
if ( converged ) {
LOG4CPLUS_INFO ( logger , " Iterative solver converged after " < < globalIterations < < " iterations. " ) ;
} else {
LOG4CPLUS_WARN ( logger , " Iterative solver did not converged after " < < globalIterations < < " iterations. " ) ;
}
# else
// For testing only
std : : cout < < " <<< Using CUDA-DOUBLE Kernels >>> " < < std : : endl ;
LOG4CPLUS_INFO ( logger , " <<< Using CUDA-DOUBLE Kernels >>> " ) ;
// Now, we need to determine the SCCs of the MDP and perform a topological sort.
std : : vector < uint_fast64_t > const & nondeterministicChoiceIndices = A . getRowGroupIndices ( ) ;
storm : : models : : NonDeterministicMatrixBasedPseudoModel < ValueType > pseudoModel ( A , nondeterministicChoiceIndices ) ;
storm : : storage : : StronglyConnectedComponentDecomposition < ValueType > sccDecomposition ( pseudoModel , false , false ) ;
LOG4CPLUS_ERROR ( logger , " The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error! " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error! " ;
# endif
} else {
std : : chrono : : high_resolution_clock : : time_point sccStartTime = std : : chrono : : high_resolution_clock : : now ( ) ;
storm : : storage : : StronglyConnectedComponentDecomposition < ValueType > sccDecomposition ( pseudoModel , false , false ) ;
if ( sccDecomposition . size ( ) = = 0 ) {
LOG4CPLUS_ERROR ( logger , " Can not solve given Equation System as the SCC Decomposition returned no SCCs. " ) ;
throw storm : : exceptions : : IllegalArgumentException ( ) < < " Can not solve given Equation System as the SCC Decomposition returned no SCCs. " ;
}
if ( sccDecomposition . size ( ) = = 0 ) {
LOG4CPLUS_ERROR ( logger , " Can not solve given Equation System as the SCC Decomposition returned no SCCs. " ) ;
throw storm : : exceptions : : IllegalArgumentException ( ) < < " Can not solve given Equation System as the SCC Decomposition returned no SCCs. " ;
}
storm : : storage : : SparseMatrix < ValueType > stronglyConnectedComponentsDependencyGraph = pseudoModel . extractPartitionDependencyGraph ( sccDecomposition ) ;
std : : vector < uint_fast64_t > topologicalSort = storm : : utility : : graph : : getTopologicalSort ( stronglyConnectedComponentsDependencyGraph ) ;
// Calculate the optimal distribution of sccs
std : : vector < std : : pair < bool , storm : : storage : : StateBlock > > optimalSccs = this - > getOptimalGroupingFromTopologicalSccDecomposition ( sccDecomposition , topologicalSort , A ) ;
LOG4CPLUS_INFO ( logger , " Optimized SCC Decomposition, originally " < < topologicalSort . size ( ) < < " SCCs, optimized to " < < optimalSccs . size ( ) < < " SCCs. " ) ;
std : : vector < ValueType > * currentX = nullptr ;
std : : vector < ValueType > * swap = nullptr ;
size_t currentMaxLocalIterations = 0 ;
size_t localIterations = 0 ;
size_t globalIterations = 0 ;
bool converged = true ;
// Iterate over all SCCs of the MDP as specified by the topological sort. This guarantees that an SCC is only
// solved after all SCCs it depends on have been solved.
int counter = 0 ;
for ( auto sccIndexIt = optimalSccs . cbegin ( ) ; sccIndexIt ! = optimalSccs . cend ( ) & & converged ; + + sccIndexIt ) {
bool const useGpu = sccIndexIt - > first ;
storm : : storage : : StateBlock const & scc = sccIndexIt - > second ;
// Generate a sub matrix
storm : : storage : : BitVector subMatrixIndices ( A . getColumnCount ( ) , scc . cbegin ( ) , scc . cend ( ) ) ;
storm : : storage : : SparseMatrix < ValueType > sccSubmatrix = A . getSubmatrix ( true , subMatrixIndices , subMatrixIndices ) ;
std : : vector < ValueType > sccSubB ( sccSubmatrix . getRowCount ( ) ) ;
storm : : utility : : vector : : selectVectorValues < ValueType > ( sccSubB , subMatrixIndices , nondeterministicChoiceIndices , b ) ;
std : : vector < ValueType > sccSubX ( sccSubmatrix . getColumnCount ( ) ) ;
std : : vector < ValueType > sccSubXSwap ( sccSubmatrix . getColumnCount ( ) ) ;
std : : vector < ValueType > sccMultiplyResult ( sccSubmatrix . getRowCount ( ) ) ;
// Prepare the pointers for swapping in the calculation
currentX = & sccSubX ;
swap = & sccSubXSwap ;
storm : : utility : : vector : : selectVectorValues < ValueType > ( sccSubX , subMatrixIndices , x ) ; // x is getCols() large, where as b and multiplyResult are getRows() (nondet. choices times states)
std : : vector < uint_fast64_t > sccSubNondeterministicChoiceIndices ( sccSubmatrix . getColumnCount ( ) + 1 ) ;
sccSubNondeterministicChoiceIndices . at ( 0 ) = 0 ;
// Pre-process all dependent states
// Remove outgoing transitions and create the ChoiceIndices
uint_fast64_t innerIndex = 0 ;
uint_fast64_t outerIndex = 0 ;
for ( uint_fast64_t state : scc ) {
// Choice Indices
sccSubNondeterministicChoiceIndices . at ( outerIndex + 1 ) = sccSubNondeterministicChoiceIndices . at ( outerIndex ) + ( nondeterministicChoiceIndices [ state + 1 ] - nondeterministicChoiceIndices [ state ] ) ;
for ( auto rowGroupIt = nondeterministicChoiceIndices [ state ] ; rowGroupIt ! = nondeterministicChoiceIndices [ state + 1 ] ; + + rowGroupIt ) {
typename storm : : storage : : SparseMatrix < ValueType > : : const_rows row = A . getRow ( rowGroupIt ) ;
for ( auto rowIt = row . begin ( ) ; rowIt ! = row . end ( ) ; + + rowIt ) {
if ( ! subMatrixIndices . get ( rowIt - > getColumn ( ) ) ) {
// This is an outgoing transition of a state in the SCC to a state not included in the SCC
// Subtracting Pr(tau) * x_other from b fixes that
sccSubB . at ( innerIndex ) = sccSubB . at ( innerIndex ) + ( rowIt - > getValue ( ) * x . at ( rowIt - > getColumn ( ) ) ) ;
storm : : storage : : SparseMatrix < ValueType > stronglyConnectedComponentsDependencyGraph = pseudoModel . extractPartitionDependencyGraph ( sccDecomposition ) ;
std : : vector < uint_fast64_t > topologicalSort = storm : : utility : : graph : : getTopologicalSort ( stronglyConnectedComponentsDependencyGraph ) ;
// Calculate the optimal distribution of sccs
std : : vector < std : : pair < bool , storm : : storage : : StateBlock > > optimalSccs = this - > getOptimalGroupingFromTopologicalSccDecomposition ( sccDecomposition , topologicalSort , A ) ;
LOG4CPLUS_INFO ( logger , " Optimized SCC Decomposition, originally " < < topologicalSort . size ( ) < < " SCCs, optimized to " < < optimalSccs . size ( ) < < " SCCs. " ) ;
std : : chrono : : high_resolution_clock : : time_point sccEndTime = std : : chrono : : high_resolution_clock : : now ( ) ;
std : : cout < < " Computing the SCC Decomposition took " < < std : : chrono : : duration_cast < std : : chrono : : milliseconds > ( sccEndTime - sccStartTime ) . count ( ) < < " ms. " < < std : : endl ;
std : : chrono : : high_resolution_clock : : time_point calcStartTime = std : : chrono : : high_resolution_clock : : now ( ) ;
std : : vector < ValueType > * currentX = nullptr ;
std : : vector < ValueType > * swap = nullptr ;
size_t currentMaxLocalIterations = 0 ;
size_t localIterations = 0 ;
size_t globalIterations = 0 ;
bool converged = true ;
// Iterate over all SCCs of the MDP as specified by the topological sort. This guarantees that an SCC is only
// solved after all SCCs it depends on have been solved.
int counter = 0 ;
for ( auto sccIndexIt = optimalSccs . cbegin ( ) ; sccIndexIt ! = optimalSccs . cend ( ) & & converged ; + + sccIndexIt ) {
bool const useGpu = sccIndexIt - > first ;
storm : : storage : : StateBlock const & scc = sccIndexIt - > second ;
// Generate a sub matrix
storm : : storage : : BitVector subMatrixIndices ( A . getColumnCount ( ) , scc . cbegin ( ) , scc . cend ( ) ) ;
storm : : storage : : SparseMatrix < ValueType > sccSubmatrix = A . getSubmatrix ( true , subMatrixIndices , subMatrixIndices ) ;
std : : vector < ValueType > sccSubB ( sccSubmatrix . getRowCount ( ) ) ;
storm : : utility : : vector : : selectVectorValues < ValueType > ( sccSubB , subMatrixIndices , nondeterministicChoiceIndices , b ) ;
std : : vector < ValueType > sccSubX ( sccSubmatrix . getColumnCount ( ) ) ;
std : : vector < ValueType > sccSubXSwap ( sccSubmatrix . getColumnCount ( ) ) ;
std : : vector < ValueType > sccMultiplyResult ( sccSubmatrix . getRowCount ( ) ) ;
// Prepare the pointers for swapping in the calculation
currentX = & sccSubX ;
swap = & sccSubXSwap ;
storm : : utility : : vector : : selectVectorValues < ValueType > ( sccSubX , subMatrixIndices , x ) ; // x is getCols() large, where as b and multiplyResult are getRows() (nondet. choices times states)
std : : vector < uint_fast64_t > sccSubNondeterministicChoiceIndices ( sccSubmatrix . getColumnCount ( ) + 1 ) ;
sccSubNondeterministicChoiceIndices . at ( 0 ) = 0 ;
// Pre-process all dependent states
// Remove outgoing transitions and create the ChoiceIndices
uint_fast64_t innerIndex = 0 ;
uint_fast64_t outerIndex = 0 ;
for ( uint_fast64_t state : scc ) {
// Choice Indices
sccSubNondeterministicChoiceIndices . at ( outerIndex + 1 ) = sccSubNondeterministicChoiceIndices . at ( outerIndex ) + ( nondeterministicChoiceIndices [ state + 1 ] - nondeterministicChoiceIndices [ state ] ) ;
for ( auto rowGroupIt = nondeterministicChoiceIndices [ state ] ; rowGroupIt ! = nondeterministicChoiceIndices [ state + 1 ] ; + + rowGroupIt ) {
typename storm : : storage : : SparseMatrix < ValueType > : : const_rows row = A . getRow ( rowGroupIt ) ;
for ( auto rowIt = row . begin ( ) ; rowIt ! = row . end ( ) ; + + rowIt ) {
if ( ! subMatrixIndices . get ( rowIt - > getColumn ( ) ) ) {
// This is an outgoing transition of a state in the SCC to a state not included in the SCC
// Subtracting Pr(tau) * x_other from b fixes that
sccSubB . at ( innerIndex ) = sccSubB . at ( innerIndex ) + ( rowIt - > getValue ( ) * x . at ( rowIt - > getColumn ( ) ) ) ;
}
}
+ + innerIndex ;
}
+ + innerIndex ;
+ + out erIndex ;
}
+ + outerIndex ;
}
// For the current SCC, we need to perform value iteration until convergence.
if ( useGpu ) {
// For the current SCC, we need to perform value iteration until convergence.
if ( useGpu ) {
# ifdef STORM_HAVE_CUDAFORSTORM
if ( ! resetCudaDevice ( ) ) {
LOG4CPLUS_ERROR ( logger , " Could not reset CUDA Device, can not use CUDA Equation Solver. " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " Could not reset CUDA Device, can not use CUDA Equation Solver. " ;
}
if ( ! resetCudaDevice ( ) ) {
LOG4CPLUS_ERROR ( logger , " Could not reset CUDA Device, can not use CUDA Equation Solver. " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " Could not reset CUDA Device, can not use CUDA Equation Solver. " ;
}
//LOG4CPLUS_INFO(logger, "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory())) * 100 << "%).");
//LOG4CPLUS_INFO(logger, "We will allocate " << (sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size()) << " Bytes.");
//LOG4CPLUS_INFO(logger, "The CUDA Runtime Version is " << getRuntimeCudaVersion());
//LOG4CPLUS_INFO(logger, "Device has " << getTotalCudaMemory() << " Bytes of Memory with " << getFreeCudaMemory() << "Bytes free (" << (static_cast<double>(getFreeCudaMemory()) / static_cast<double>(getTotalCudaMemory())) * 100 << "%).");
//LOG4CPLUS_INFO(logger, "We will allocate " << (sizeof(uint_fast64_t)* sccSubmatrix.rowIndications.size() + sizeof(uint_fast64_t)* sccSubmatrix.columnsAndValues.size() * 2 + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubX.size() + sizeof(double)* sccSubB.size() + sizeof(double)* sccSubB.size() + sizeof(uint_fast64_t)* sccSubNondeterministicChoiceIndices.size()) << " Bytes.");
//LOG4CPLUS_INFO(logger, "The CUDA Runtime Version is " << getRuntimeCudaVersion());
bool result = false ;
localIterations = 0 ;
if ( minimize ) {
result = basicValueIteration_mvReduce_uint64_double_minimize ( this - > maximalNumberOfIterations , this - > precision , this - > relative , sccSubmatrix . rowIndications , sccSubmatrix . columnsAndValues , * currentX , sccSubB , sccSubNondeterministicChoiceIndices , localIterations ) ;
} else {
result = basicValueIteration_mvReduce_uint64_double_maximize ( this - > maximalNumberOfIterations , this - > precision , this - > relative , sccSubmatrix . rowIndications , sccSubmatrix . columnsAndValues , * currentX , sccSubB , sccSubNondeterministicChoiceIndices , localIterations ) ;
}
LOG4CPLUS_INFO ( logger , " Executed " < < localIterations < < " of max. " < < maximalNumberOfIterations < < " Iterations on GPU. " ) ;
if ( ! result ) {
converged = false ;
LOG4CPLUS_ERROR ( logger , " An error occurred in the CUDA Plugin. Can not continue. " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " An error occurred in the CUDA Plugin. Can not continue. " ;
} else {
converged = true ;
}
bool result = false ;
localIterations = 0 ;
if ( minimize ) {
result = __basicValueIteration_mvReduce_uint64_minimize < ValueType > ( this - > maximalNumberOfIterations , this - > precision , this - > relative , sccSubmatrix . rowIndications , sccSubmatrix . columnsAndValues , * currentX , sccSubB , sccSubNondeterministicChoiceIndices , localIterations ) ;
} else {
result = __basicValueIteration_mvReduce_uint64_maximize < ValueType > ( this - > maximalNumberOfIterations , this - > precision , this - > relative , sccSubmatrix . rowIndications , sccSubmatrix . columnsAndValues , * currentX , sccSubB , sccSubNondeterministicChoiceIndices , localIterations ) ;
}
LOG4CPLUS_INFO ( logger , " Executed " < < localIterations < < " of max. " < < maximalNumberOfIterations < < " Iterations on GPU. " ) ;
// As the "number of iterations" of the full method is the maximum of the local iterations, we need to keep
// track of the maximum.
if ( localIterations > currentMaxLocalIterations ) {
currentMaxLocalIterations = localIterations ;
}
if ( ! result ) {
converged = false ;
LOG4CPLUS_ERROR ( logger , " An error occurred in the CUDA Plugin. Can not continue. " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " An error occurred in the CUDA Plugin. Can not continue. " ;
} else {
converged = true ;
}
// As the "number of iterations" of the full method is the maximum of the local iterations, we need to keep
// track of the maximum.
if ( localIterations > currentMaxLocalIterations ) {
currentMaxLocalIterations = localIterations ;
}
globalIterations + = localIterations ;
# else
LOG4CPLUS_ERROR ( logger , " The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error! " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error! " ;
LOG4CPLUS_ERROR ( logger , " The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error! " ) ;
throw storm : : exceptions : : InvalidStateException ( ) < < " The useGpu Flag of a SCC was set, but this version of StoRM does not support CUDA acceleration. Internal Error! " ;
# endif
} else {
localIterations = 0 ;
converged = false ;
while ( ! converged & & localIterations < this - > maximalNumberOfIterations ) {
// Compute x' = A*x + b.
sccSubmatrix . multiplyWithVector ( * currentX , sccMultiplyResult ) ;
storm : : utility : : vector : : addVectorsInPlace < ValueType > ( sccMultiplyResult , sccSubB ) ;
} else {
std : : cout < < " WARNING: Using CPU based TopoSolver! (double) " < < std : : endl ;
localIterations = 0 ;
converged = false ;
while ( ! converged & & localIterations < this - > maximalNumberOfIterations ) {
// Compute x' = A*x + b.
sccSubmatrix . multiplyWithVector ( * currentX , sccMultiplyResult ) ;
storm : : utility : : vector : : addVectorsInPlace < ValueType > ( sccMultiplyResult , sccSubB ) ;
//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
/*
Versus :
A . multiplyWithVector ( * currentX , * multiplyResult ) ;
storm : : utility : : vector : : addVectorsInPlace ( * multiplyResult , b ) ;
*/
// Reduce the vector x' by applying min/max for all non-deterministic choices.
if ( minimize ) {
storm : : utility : : vector : : reduceVectorMin < ValueType > ( sccMultiplyResult , * swap , sccSubNondeterministicChoiceIndices ) ;
} else {
storm : : utility : : vector : : reduceVectorMax < ValueType > ( sccMultiplyResult , * swap , sccSubNondeterministicChoiceIndices ) ;
}
//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
// Determine whether the method converged.
// TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
// running time. In fact, it is faster. This has to be investigated.
// converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
converged = storm : : utility : : vector : : equalModuloPrecision < ValueType > ( * currentX , * swap , this - > precision , this - > relative ) ;
/*
Versus :
A . multiplyWithVector ( * currentX , * multiplyResult ) ;
storm : : utility : : vector : : addVectorsInPlace ( * multiplyResult , b ) ;
*/
// Update environment variables.
std : : swap ( currentX , swap ) ;
// Reduce the vector x' by applying min/max for all non-deterministic choices.
if ( minimize ) {
storm : : utility : : vector : : reduceVectorMin < ValueType > ( sccMultiplyResult , * swap , sccSubNondeterministicChoiceIndices ) ;
+ + localIterations ;
+ + globalIterations ;
}
else {
storm : : utility : : vector : : reduceVectorMax < ValueType > ( sccMultiplyResult , * swap , sccSubNondeterministicChoiceIndices ) ;
}
// Determine whether the method converged.
// TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
// running time. In fact, it is faster. This has to be investigated.
// converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
converged = storm : : utility : : vector : : equalModuloPrecision < ValueType > ( * currentX , * swap , this - > precision , this - > relative ) ;
LOG4CPLUS_INFO ( logger , " Executed " < < localIterations < < " of max. " < < maximalNumberOfIterations < < " Iterations. " ) ;
}
// Update environment variables.
std : : swap ( currentX , swap ) ;
+ + localIterations ;
+ + globalIterations ;
// The Result of this SCC has to be taken back into the main result vector
innerIndex = 0 ;
for ( uint_fast64_t state : scc ) {
x . at ( state ) = currentX - > at ( innerIndex ) ;
+ + innerIndex ;
}
LOG4CPLUS_INFO ( logger , " Executed " < < localIterations < < " of max. " < < maximalNumberOfIterations < < " Iterations. " ) ;
}
// Since the pointers for swapping in the calculation point to temps they should not be valid anymore
currentX = nullptr ;
swap = nullptr ;
// The Result of this SCC has to be taken back into the main result vector
innerIndex = 0 ;
for ( uint_fast64_t state : scc ) {
x . at ( state ) = currentX - > at ( innerIndex ) ;
+ + innerIndex ;
// As the "number of iterations" of the full method is the maximum of the local iterations, we need to keep
// track of the maximum.
if ( localIterations > currentMaxLocalIterations ) {
currentMaxLocalIterations = localIterations ;
}
}
// Since the pointers for swapping in the calculation point to temps they should not be valid anymore
currentX = nullptr ;
swap = nullptr ;
std : : cout < < " Used a total of " < < globalIterations < < " iterations with a maximum of " < < localIterations < < " iterations in a single block. " < < std : : endl ;
// As the "number of iterations" of the full method is the maximum of the local iterations, we need to keep
// track of the maximum.
if ( localIterations > currentMaxLocalIterations ) {
currentMaxLocalIterations = localIterations ;
// Check if the solver converged and issue a warning otherwise.
if ( converged ) {
LOG4CPLUS_INFO ( logger , " Iterative solver converged after " < < currentMaxLocalIterations < < " iterations. " ) ;
} else {
LOG4CPLUS_WARN ( logger , " Iterative solver did not converged after " < < currentMaxLocalIterations < < " iterations. " ) ;
}
}
// Check if the solver converged and issue a warning otherwise.
if ( converged ) {
LOG4CPLUS_INFO ( logger , " Iterative solver converged after " < < currentMaxLocalIterations < < " iterations. " ) ;
}
else {
LOG4CPLUS_WARN ( logger , " Iterative solver did not converged after " < < currentMaxLocalIterations < < " iterations. " ) ;
std : : chrono : : high_resolution_clock : : time_point calcEndTime = std : : chrono : : high_resolution_clock : : now ( ) ;
std : : cout < < " Obtaining the fixpoint solution took " < < std : : chrono : : duration_cast < std : : chrono : : milliseconds > ( calcEndTime - calcStartTime ) . count ( ) < < " ms. " < < std : : endl ;
}
# endif
}
template < typename ValueType >