@ -42,12 +42,12 @@ namespace storm { 
			
		
	
		
			
				
					        }  
			
		
	
		
			
				
					         
			
		
	
		
			
				
					        template < typename  ValueType >  
			
		
	
		
			
				
							void  TopologicalValueIterationNondeterministicLinearEquationSolver < ValueType > : : solveEquationSystem ( bool  minimize ,  storm : : storage : : SparseMatrix < ValueType >  const &  A ,  std : : vector < ValueType > &  x ,  std : : vector < ValueType >  const &  b ,  std : : vector < uint_fast64_t >  const &  nondeterministicChoiceIndices ,  std : : vector < ValueType > *  multiplyResult ,  std : : vector < ValueType > *  newX )  const  {  
			
		
	
		
			
				
							void  TopologicalValueIterationNondeterministicLinearEquationSolver < ValueType > : : solveEquationSystem ( bool  minimize ,  storm : : storage : : SparseMatrix < ValueType >  const &  A ,  std : : vector < ValueType > &  x ,  std : : vector < ValueType >  const &  b ,  std : : vector < ValueType > *  multiplyResult ,  std : : vector < ValueType > *  newX )  const  {  
			
		
	
		
			
				
					             
			
		
	
		
			
				
								// Now, we need to determine the SCCs of the MDP and a topological sort.
  
			
		
	
		
			
				
								//std::vector<std::vector<uint_fast64_t>> stronglyConnectedComponents = storm::utility::graph::performSccDecomposition(this->getModel(), stronglyConnectedComponents, stronglyConnectedComponentsDependencyGraph);
  
			
		
	
		
			
				
								//storm::storage::SparseMatrix<T> stronglyConnectedComponentsDependencyGraph = this->getModel().extractSccDependencyGraph(stronglyConnectedComponents);
  
			
		
	
		
			
				
					
  
			
		
	
		
			
				
								std : : vector < uint_fast64_t >  const &  nondeterministicChoiceIndices  =  A . getRowGroupIndices ( ) ;   
			
		
	
		
			
				
								storm : : models : : NonDeterministicMatrixBasedPseudoModel < ValueType >  pseudoModel ( A ,  nondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
								//storm::storage::StronglyConnectedComponentDecomposition<ValueType> sccDecomposition(*static_cast<storm::models::AbstractPseudoModel<ValueType>*>(&pseudoModel), false, false);
  
			
		
	
		
			
				
								storm : : storage : : StronglyConnectedComponentDecomposition < ValueType >  sccDecomposition ( pseudoModel ,  false ,  false ) ;  
			
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
				@ -60,6 +60,9 @@ namespace storm { 
			
		
	
		
			
				
								storm : : storage : : SparseMatrix < ValueType >  stronglyConnectedComponentsDependencyGraph  =  pseudoModel . extractPartitionDependencyGraph ( sccDecomposition ) ;  
			
		
	
		
			
				
								std : : vector < uint_fast64_t >  topologicalSort  =  storm : : utility : : graph : : getTopologicalSort ( stronglyConnectedComponentsDependencyGraph ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
								// Calculate the optimal distribution of sccs
  
			
		
	
		
			
				
								std : : vector < std : : pair < bool ,  std : : vector < uint_fast64_t > > >  optimalSccs  =  this - > getOptimalGroupingFromTopologicalSccDecomposition ( sccDecomposition ,  topologicalSort ,  A ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
								// Set up the environment for the power method.
  
			
		
	
		
			
				
					//			bool multiplyResultMemoryProvided = true;
  
			
		
	
		
			
				
					//			if (multiplyResult == nullptr) {
  
			
		
	
	
		
			
				
					
						
						
						
							
								 
						
					 
				
				@ -82,12 +85,13 @@ namespace storm { 
			
		
	
		
			
				
								// solved after all SCCs it depends on have been solved.
  
			
		
	
		
			
				
								int  counter  =  0 ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
								for  ( auto  sccIndexIt  =  topologicalSort . begin ( ) ;  sccIndexIt  ! =  topologicalSort . end ( )  & &  converged ;  + + sccIndexIt )  {  
			
		
	
		
			
				
									storm : : storage : : StateBlock  const &  scc  =  sccDecomposition [ * sccIndexIt ] ;  
			
		
	
		
			
				
								for  ( auto  sccIndexIt  =  optimalSccs . cbegin ( ) ;  sccIndexIt  ! =  optimalSccs . cend ( )  & &  converged ;  + + sccIndexIt )  {  
			
		
	
		
			
				
									bool  const  useGpu  =  sccIndexIt - > first ;  
			
		
	
		
			
				
									std : : vector  < uint_fast64_t >  const &  scc  =  sccIndexIt - > second ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
									// Generate a submatrix
  
			
		
	
		
			
				
									storm : : storage : : BitVector  subMatrixIndices ( A . getColumnCount ( ) ,  scc . cbegin ( ) ,  scc . cend ( ) ) ;  
			
		
	
		
			
				
									storm : : storage : : SparseMatrix < ValueType >  sccSubmatrix  =  A . getSubmatrix ( subMatrixIndices ,  nondeterministicChoice Indices) ;  
			
		
	
		
			
				
									storm : : storage : : SparseMatrix < ValueType >  sccSubmatrix  =  A . getSubmatrix ( true ,  subMatrixIndices ,  subMatrix Indices) ;  
			
		
	
		
			
				
									std : : vector < ValueType >  sccSubB ( sccSubmatrix . getRowCount ( ) ) ;  
			
		
	
		
			
				
									storm : : utility : : vector : : selectVectorValues < ValueType > ( sccSubB ,  subMatrixIndices ,  nondeterministicChoiceIndices ,  b ) ;  
			
		
	
		
			
				
									std : : vector < ValueType >  sccSubX ( sccSubmatrix . getColumnCount ( ) ) ;  
			
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
				@ -125,108 +129,115 @@ namespace storm { 
			
		
	
		
			
				
									}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
									// For the current SCC, we need to perform value iteration until convergence.
  
			
		
	
		
			
				
									if  ( useGpu )  {  
			
		
	
		
			
				
					# ifdef STORM_HAVE_CUDAFORSTORM 
  
			
		
	
		
			
				
									if  ( ! resetCudaDevice ( ) )  {  
			
		
	
		
			
				
										LOG4CPLUS_ERROR ( logger ,  " Could not reset CUDA Device, can not use CUDA Equation Solver. " ) ;  
			
		
	
		
			
				
										throw  storm : : exceptions : : InvalidStateException ( )  < <  " Could not reset CUDA Device, can not use CUDA Equation Solver. " ;  
			
		
	
		
			
				
									}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
									LOG4CPLUS_INFO ( logger ,  " Device has  "  < <  getTotalCudaMemory ( )  < <  "  Bytes of Memory with  "  < <  getFreeCudaMemory ( )  < <  " Bytes free ( "  < <  ( static_cast < double > ( getFreeCudaMemory ( ) )  /  static_cast < double > ( getTotalCudaMemory ( ) ) )  *  100  < <  " %). " ) ;  
			
		
	
		
			
				
									LOG4CPLUS_INFO ( logger ,  " We will allocate  "  < <  ( sizeof ( uint_fast64_t ) *  sccSubmatrix . rowIndications . size ( )  +  sizeof ( uint_fast64_t ) *  sccSubmatrix . columnsAndValues . size ( )  *  2  +  sizeof ( double ) *  sccSubX . size ( )  +  sizeof ( double ) *  sccSubX . size ( )  +  sizeof ( double ) *  sccSubB . size ( )  +  sizeof ( double ) *  sccSubB . size ( )  +  sizeof ( uint_fast64_t ) *  sccSubNondeterministicChoiceIndices . size ( ) )  < <  "  Bytes. " ) ;  
			
		
	
		
			
				
									LOG4CPLUS_INFO ( logger ,  " The CUDA Runtime Version is  "  < <  getRuntimeCudaVersion ( ) ) ;  
			
		
	
		
			
				
									 
			
		
	
		
			
				
									std : : vector < ValueType >  copyX ( * currentX ) ;  
			
		
	
		
			
				
									if  ( minimize )  {  
			
		
	
		
			
				
										basicValueIteration_mvReduce_uint64_double_minimize ( this - > maximalNumberOfIterations ,  this - > precision ,  this - > relative ,  sccSubmatrix . rowIndications ,  sccSubmatrix . columnsAndValues ,  copyX ,  sccSubB ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
									}  
			
		
	
		
			
				
									else  {  
			
		
	
		
			
				
										basicValueIteration_mvReduce_uint64_double_maximize ( this - > maximalNumberOfIterations ,  this - > precision ,  this - > relative ,  sccSubmatrix . rowIndications ,  sccSubmatrix . columnsAndValues ,  copyX ,  sccSubB ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
									}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
									localIterations  =  0 ;  
			
		
	
		
			
				
									converged  =  false ;  
			
		
	
		
			
				
									while  ( ! converged  & &  localIterations  <  this - > maximalNumberOfIterations )  {  
			
		
	
		
			
				
										// Compute x' = A*x + b.
  
			
		
	
		
			
				
										sccSubmatrix . multiplyWithVector ( * currentX ,  sccMultiplyResult ) ;  
			
		
	
		
			
				
										storm : : utility : : vector : : addVectorsInPlace < ValueType > ( sccMultiplyResult ,  sccSubB ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
  
			
		
	
		
			
				
										//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
  
			
		
	
		
			
				
										if  ( ! resetCudaDevice ( ) )  {  
			
		
	
		
			
				
											LOG4CPLUS_ERROR ( logger ,  " Could not reset CUDA Device, can not use CUDA Equation Solver. " ) ;  
			
		
	
		
			
				
											throw  storm : : exceptions : : InvalidStateException ( )  < <  " Could not reset CUDA Device, can not use CUDA Equation Solver. " ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										/*
  
			
		
	
		
			
				
										Versus :  
			
		
	
		
			
				
										A . multiplyWithVector ( * currentX ,  * multiplyResult ) ;  
			
		
	
		
			
				
										storm : : utility : : vector : : addVectorsInPlace ( * multiplyResult ,  b ) ;  
			
		
	
		
			
				
										*/  
			
		
	
		
			
				
										LOG4CPLUS_INFO ( logger ,  " Device has  "  < <  getTotalCudaMemory ( )  < <  "  Bytes of Memory with  "  < <  getFreeCudaMemory ( )  < <  " Bytes free ( "  < <  ( static_cast < double > ( getFreeCudaMemory ( ) )  /  static_cast < double > ( getTotalCudaMemory ( ) ) )  *  100  < <  " %). " ) ;  
			
		
	
		
			
				
										LOG4CPLUS_INFO ( logger ,  " We will allocate  "  < <  ( sizeof ( uint_fast64_t ) *  sccSubmatrix . rowIndications . size ( )  +  sizeof ( uint_fast64_t ) *  sccSubmatrix . columnsAndValues . size ( )  *  2  +  sizeof ( double ) *  sccSubX . size ( )  +  sizeof ( double ) *  sccSubX . size ( )  +  sizeof ( double ) *  sccSubB . size ( )  +  sizeof ( double ) *  sccSubB . size ( )  +  sizeof ( uint_fast64_t ) *  sccSubNondeterministicChoiceIndices . size ( ) )  < <  "  Bytes. " ) ;  
			
		
	
		
			
				
										LOG4CPLUS_INFO ( logger ,  " The CUDA Runtime Version is  "  < <  getRuntimeCudaVersion ( ) ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										// Reduce the vector x' by applying min/max for all non-deterministic choices.
  
			
		
	
		
			
				
										std : : vector < ValueType >  copyX ( * currentX ) ;  
			
		
	
		
			
				
										if  ( minimize )  {  
			
		
	
		
			
				
											storm : : utility : : vector : : reduceVectorMin < ValueType > ( sccMultiplyResult ,  * swap ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
											basicValueIteration_mvReduce_uint64_double_minimize ( this - > maximalNumberOfIterations ,  this - > precision ,  this - > relative ,  sccSubmatrix . rowIndications ,  sccSubmatrix . columnsAndValues ,  copyX ,  sccSubB ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
										else  {  
			
		
	
		
			
				
											storm : : utility : : vector : : reduceVectorMax < ValueType > ( sccMultiplyResult ,  * swap ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
											basicValueIteration_mvReduce_uint64_double_maximize ( this - > maximalNumberOfIterations ,  this - > precision ,  this - > relative ,  sccSubmatrix . rowIndications ,  sccSubmatrix . columnsAndValues ,  copyX ,  sccSubB ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
										converged  =  true ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										// DEBUG
  
			
		
	
		
			
				
										localIterations  =  0 ;  
			
		
	
		
			
				
										converged  =  false ;  
			
		
	
		
			
				
										while  ( ! converged  & &  localIterations  <  this - > maximalNumberOfIterations )  {  
			
		
	
		
			
				
											// Compute x' = A*x + b.
  
			
		
	
		
			
				
											sccSubmatrix . multiplyWithVector ( * currentX ,  sccMultiplyResult ) ;  
			
		
	
		
			
				
											storm : : utility : : vector : : addVectorsInPlace < ValueType > ( sccMultiplyResult ,  sccSubB ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
											//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
  
			
		
	
		
			
				
											//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
											/*
  
			
		
	
		
			
				
											Versus :  
			
		
	
		
			
				
											A . multiplyWithVector ( * currentX ,  * multiplyResult ) ;  
			
		
	
		
			
				
											storm : : utility : : vector : : addVectorsInPlace ( * multiplyResult ,  b ) ;  
			
		
	
		
			
				
											*/  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
											// Reduce the vector x' by applying min/max for all non-deterministic choices.
  
			
		
	
		
			
				
											if  ( minimize )  {  
			
		
	
		
			
				
												storm : : utility : : vector : : reduceVectorMin < ValueType > ( sccMultiplyResult ,  * swap ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
											else  {  
			
		
	
		
			
				
												storm : : utility : : vector : : reduceVectorMax < ValueType > ( sccMultiplyResult ,  * swap ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										// Determine whether the method converged.
  
			
		
	
		
			
				
										// TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
  
			
		
	
		
			
				
										// running time. In fact, it is faster. This has to be investigated.
  
			
		
	
		
			
				
										// converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
  
			
		
	
		
			
				
										converged  =  storm : : utility : : vector : : equalModuloPrecision < ValueType > ( * currentX ,  * swap ,  this - > precision ,  this - > relative ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										// Update environment variables.
  
			
		
	
		
			
				
										std : : swap ( currentX ,  swap ) ;  
			
		
	
		
			
				
											// Determine whether the method converged.
  
			
		
	
		
			
				
											// TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
  
			
		
	
		
			
				
											// running time. In fact, it is faster. This has to be investigated.
  
			
		
	
		
			
				
											// converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
  
			
		
	
		
			
				
											converged  =  storm : : utility : : vector : : equalModuloPrecision < ValueType > ( * currentX ,  * swap ,  this - > precision ,  this - > relative ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										+ + localIterations ;  
			
		
	
		
			
				
										+ + globalIterations ;  
			
		
	
		
			
				
									}  
			
		
	
		
			
				
									LOG4CPLUS_INFO ( logger ,  " Executed  "  < <  localIterations  < <  "  of max.  "  < <  maximalNumberOfIterations  < <  "  Iterations. " ) ;  
			
		
	
		
			
				
											// Update environment variables.
  
			
		
	
		
			
				
											std : : swap ( currentX ,  swap ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
									uint_fast64_t  diffCount  =  0 ;  
			
		
	
		
			
				
									for  ( size_t  i  =  0 ;  i  <  currentX - > size ( ) ;  + + i )  {  
			
		
	
		
			
				
										if  ( currentX - > at ( i )  ! =  copyX . at ( i ) )  {  
			
		
	
		
			
				
											LOG4CPLUS_WARN ( logger ,  " CUDA solution differs on index  "  < <  i  < <  "  diff.  "  < <  std : : abs ( currentX - > at ( i )  -  copyX . at ( i ) )  < <  " , CPU:  "  < <  currentX - > at ( i )  < <  " , CUDA:  "  < <  copyX . at ( i ) ) ;  
			
		
	
		
			
				
											std : : cout  < <  " CUDA solution differs on index  "  < <  i  < <  "  diff.  "  < <  std : : abs ( currentX - > at ( i )  -  copyX . at ( i ) )  < <  " , CPU:  "  < <  currentX - > at ( i )  < <  " , CUDA:  "  < <  copyX . at ( i )  < <  std : : endl ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
									}  
			
		
	
		
			
				
					# else 
  
			
		
	
		
			
				
									localIterations  =  0 ;  
			
		
	
		
			
				
									converged  =  false ;  
			
		
	
		
			
				
									while  ( ! converged  & &  localIterations  <  this - > maximalNumberOfIterations )  {  
			
		
	
		
			
				
										// Compute x' = A*x + b.
  
			
		
	
		
			
				
										sccSubmatrix . multiplyWithVector ( * currentX ,  sccMultiplyResult ) ;  
			
		
	
		
			
				
										storm : : utility : : vector : : addVectorsInPlace < ValueType > ( sccMultiplyResult ,  sccSubB ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
  
			
		
	
		
			
				
										//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										/*
  
			
		
	
		
			
				
										Versus :  
			
		
	
		
			
				
										A . multiplyWithVector ( * currentX ,  * multiplyResult ) ;  
			
		
	
		
			
				
										storm : : utility : : vector : : addVectorsInPlace ( * multiplyResult ,  b ) ;  
			
		
	
		
			
				
										*/  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										// Reduce the vector x' by applying min/max for all non-deterministic choices.
  
			
		
	
		
			
				
										if  ( minimize )  {  
			
		
	
		
			
				
											storm : : utility : : vector : : reduceVectorMin < ValueType > ( sccMultiplyResult ,  * swap ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
											+ + localIterations ;  
			
		
	
		
			
				
											+ + globalIterations ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
										else  {  
			
		
	
		
			
				
											storm : : utility : : vector : : reduceVectorMax < ValueType > ( sccMultiplyResult ,  * swap ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
										LOG4CPLUS_INFO ( logger ,  " Executed  "  < <  localIterations  < <  "  of max.  "  < <  maximalNumberOfIterations  < <  "  Iterations. " ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										uint_fast64_t  diffCount  =  0 ;  
			
		
	
		
			
				
										for  ( size_t  i  =  0 ;  i  <  currentX - > size ( ) ;  + + i )  {  
			
		
	
		
			
				
											if  ( currentX - > at ( i )  ! =  copyX . at ( i ) )  {  
			
		
	
		
			
				
												LOG4CPLUS_WARN ( logger ,  " CUDA solution differs on index  "  < <  i  < <  "  diff.  "  < <  std : : abs ( currentX - > at ( i )  -  copyX . at ( i ) )  < <  " , CPU:  "  < <  currentX - > at ( i )  < <  " , CUDA:  "  < <  copyX . at ( i ) ) ;  
			
		
	
		
			
				
												std : : cout  < <  " CUDA solution differs on index  "  < <  i  < <  "  diff.  "  < <  std : : abs ( currentX - > at ( i )  -  copyX . at ( i ) )  < <  " , CPU:  "  < <  currentX - > at ( i )  < <  " , CUDA:  "  < <  copyX . at ( i )  < <  std : : endl ;  
			
		
	
		
			
				
												+ + diffCount ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
										std : : cout  < <  " CUDA solution differed in  "  < <  diffCount  < <  "  of  "  < <  currentX - > size ( )  < <  "  values. "  < <  std : : endl ;  
			
		
	
		
			
				
					# endif 
  
			
		
	
		
			
				
									}  else  {  
			
		
	
		
			
				
										localIterations  =  0 ;  
			
		
	
		
			
				
										converged  =  false ;  
			
		
	
		
			
				
										while  ( ! converged  & &  localIterations  <  this - > maximalNumberOfIterations )  {  
			
		
	
		
			
				
											// Compute x' = A*x + b.
  
			
		
	
		
			
				
											sccSubmatrix . multiplyWithVector ( * currentX ,  sccMultiplyResult ) ;  
			
		
	
		
			
				
											storm : : utility : : vector : : addVectorsInPlace < ValueType > ( sccMultiplyResult ,  sccSubB ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
											//A.multiplyWithVector(scc, nondeterministicChoiceIndices, *currentX, multiplyResult);
  
			
		
	
		
			
				
											//storm::utility::addVectors(scc, nondeterministicChoiceIndices, multiplyResult, b);
  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
											/*
  
			
		
	
		
			
				
											Versus :  
			
		
	
		
			
				
											A . multiplyWithVector ( * currentX ,  * multiplyResult ) ;  
			
		
	
		
			
				
											storm : : utility : : vector : : addVectorsInPlace ( * multiplyResult ,  b ) ;  
			
		
	
		
			
				
											*/  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
											// Reduce the vector x' by applying min/max for all non-deterministic choices.
  
			
		
	
		
			
				
											if  ( minimize )  {  
			
		
	
		
			
				
												storm : : utility : : vector : : reduceVectorMin < ValueType > ( sccMultiplyResult ,  * swap ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
											else  {  
			
		
	
		
			
				
												storm : : utility : : vector : : reduceVectorMax < ValueType > ( sccMultiplyResult ,  * swap ,  sccSubNondeterministicChoiceIndices ) ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										// Determine whether the method converged.
  
			
		
	
		
			
				
										// TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
  
			
		
	
		
			
				
										// running time. In fact, it is faster. This has to be investigated.
  
			
		
	
		
			
				
										// converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
  
			
		
	
		
			
				
										converged  =  storm : : utility : : vector : : equalModuloPrecision < ValueType > ( * currentX ,  * swap ,  this - > precision ,  this - > relative ) ;  
			
		
	
		
			
				
											 // Determine whether the method converged.
  
			
		
	
		
			
				
											 // TODO: It seems that the equalModuloPrecision call that compares all values should have a higher
  
			
		
	
		
			
				
											 // running time. In fact, it is faster. This has to be investigated.
  
			
		
	
		
			
				
											 // converged = storm::utility::equalModuloPrecision(*currentX, *newX, scc, precision, relative);
  
			
		
	
		
			
				
											 converged  =  storm : : utility : : vector : : equalModuloPrecision < ValueType > ( * currentX ,  * swap ,  this - > precision ,  this - > relative ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										// Update environment variables.
  
			
		
	
		
			
				
										std : : swap ( currentX ,  swap ) ;  
			
		
	
		
			
				
											 // Update environment variables.
  
			
		
	
		
			
				
											 std : : swap ( currentX ,  swap ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										+ + localIterations ;  
			
		
	
		
			
				
										+ + globalIterations ;  
			
		
	
		
			
				
											+ + localIterations ;  
			
		
	
		
			
				
											+ + globalIterations ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
										LOG4CPLUS_INFO ( logger ,  " Executed  "  < <  localIterations  < <  "  of max.  "  < <  maximalNumberOfIterations  < <  "  Iterations. " ) ;  
			
		
	
		
			
				
									}  
			
		
	
		
			
				
									LOG4CPLUS_INFO ( logger ,  " Executed  "  < <  localIterations  < <  "  of max.  "  < <  maximalNumberOfIterations  < <  "  Iterations. " ) ;  
			
		
	
		
			
				
					# endif 
  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					
 
			
		
	
		
			
				
									// The Result of this SCC has to be taken back into the main result vector
  
			
		
	
		
			
				
									innerIndex  =  0 ;  
			
		
	
	
		
			
				
					
						
							
								 
						
						
							
								 
						
						
					 
				
				@ -263,6 +274,72 @@ namespace storm { 
			
		
	
		
			
				
								}  
			
		
	
		
			
				
					        }  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
							template < typename  ValueType >  
			
		
	
		
			
				
							std : : vector < std : : pair < bool ,  std : : vector < uint_fast64_t > > >   
			
		
	
		
			
				
								TopologicalValueIterationNondeterministicLinearEquationSolver < ValueType > : : getOptimalGroupingFromTopologicalSccDecomposition ( storm : : storage : : StronglyConnectedComponentDecomposition < ValueType >  const &  sccDecomposition ,  std : : vector < uint_fast64_t >  const &  topologicalSort ,  storm : : storage : : SparseMatrix < ValueType >  const &  matrix )  const  {  
			
		
	
		
			
				
									std : : vector < std : : pair < bool ,  std : : vector < uint_fast64_t > > >  result ;  
			
		
	
		
			
				
					# ifdef STORM_HAVE_CUDAFORSTORM 
  
			
		
	
		
			
				
									// 95% to have a bit of padding
  
			
		
	
		
			
				
									size_t  const  cudaFreeMemory  =  static_cast < size_t > ( getFreeCudaMemory ( )  *  0.95 ) ;  
			
		
	
		
			
				
									size_t  lastResultIndex  =  0 ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
									std : : vector < uint_fast64_t >  const &  rowGroupIndices  =  matrix . getRowGroupIndices ( ) ;  
			
		
	
		
			
				
									size_t  currentSize  =  0 ;  
			
		
	
		
			
				
									for  ( auto  sccIndexIt  =  topologicalSort . cbegin ( ) ;  sccIndexIt  ! =  topologicalSort . cend ( ) ;  + + sccIndexIt )  {  
			
		
	
		
			
				
										storm : : storage : : StateBlock  const &  scc  =  sccDecomposition [ * sccIndexIt ] ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										uint_fast64_t  rowCount  =  0 ;  
			
		
	
		
			
				
										uint_fast64_t  entryCount  =  0 ;  
			
		
	
		
			
				
										std : : vector < uint_fast64_t >  rowGroups ;  
			
		
	
		
			
				
										rowGroups . reserve ( scc . size ( ) ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										for  ( auto  sccIt  =  scc . cbegin ( ) ;  sccIt  ! =  scc . cend ( ) ;  + + sccIt )  {  
			
		
	
		
			
				
											rowCount  + =  matrix . getRowGroupSize ( * sccIt ) ;  
			
		
	
		
			
				
											entryCount  + =  matrix . getRowGroupEntryCount ( * sccIt ) ;  
			
		
	
		
			
				
											rowGroups . push_back ( * sccIt ) ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										size_t  sccSize  =  basicValueIteration_mvReduce_uint64_double_calculateMemorySize ( static_cast < size_t > ( rowCount ) ,  scc . size ( ) ,  static_cast < size_t > ( entryCount ) ) ;  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
										if  ( ( currentSize  +  sccSize )  < =  cudaFreeMemory )  {  
			
		
	
		
			
				
											// There is enough space left in the current group
  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
											if  ( currentSize  = =  0 )  {  
			
		
	
		
			
				
												result . push_back ( std : : make_pair ( true ,  rowGroups ) ) ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
											else  {  
			
		
	
		
			
				
												result [ lastResultIndex ] . second . insert ( result [ lastResultIndex ] . second . end ( ) ,  rowGroups . begin ( ) ,  rowGroups . end ( ) ) ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
											currentSize  + =  sccSize ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
										else  {  
			
		
	
		
			
				
											if  ( sccSize  < =  cudaFreeMemory )  {  
			
		
	
		
			
				
												+ + lastResultIndex ;  
			
		
	
		
			
				
												result . push_back ( std : : make_pair ( true ,  rowGroups ) ) ;  
			
		
	
		
			
				
												currentSize  =  sccSize ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
											else  {  
			
		
	
		
			
				
												// This group is too big to fit into the CUDA Memory by itself
  
			
		
	
		
			
				
												lastResultIndex  + =  2 ;  
			
		
	
		
			
				
												result . push_back ( std : : make_pair ( false ,  rowGroups ) ) ;  
			
		
	
		
			
				
												currentSize  =  0 ;  
			
		
	
		
			
				
											}  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
									}  
			
		
	
		
			
				
					# else 
  
			
		
	
		
			
				
									for  ( auto  sccIndexIt  =  topologicalSort . cbegin ( ) ;  sccIndexIt  ! =  topologicalSort . cend ( ) ;  + + sccIndexIt )  {  
			
		
	
		
			
				
										storm : : storage : : StateBlock  const &  scc  =  sccDecomposition [ * sccIndexIt ] ;  
			
		
	
		
			
				
										std : : vector < uint_fast64_t >  rowGroups ;  
			
		
	
		
			
				
										rowGroups . reserve ( scc . size ( ) ) ;  
			
		
	
		
			
				
										for  ( auto  sccIt  =  scc . cbegin ( ) ;  sccIt  ! =  scc . cend ( ) ;  + + sccIt )  {  
			
		
	
		
			
				
											rowGroups . push_back ( * sccIt ) ;  
			
		
	
		
			
				
											result . push_back ( std : : make_pair ( false ,  rowGroups ) ) ;  
			
		
	
		
			
				
										}  
			
		
	
		
			
				
									}  
			
		
	
		
			
				
					# endif 
  
			
		
	
		
			
				
								return  result ;  
			
		
	
		
			
				
							}  
			
		
	
		
			
				
					
 
			
		
	
		
			
				
					        // Explicitly instantiate the solver.
  
			
		
	
		
			
				
							template  class  TopologicalValueIterationNondeterministicLinearEquationSolver < double > ;  
			
		
	
		
			
				
					    }  // namespace solver