@ -1,11 +1,8 @@
# include "src/modelchecker/reachability/SparseMdpLearningModelChecker.h"
# include "src/storage/SparseMatrix.h"
# include "src/storage/sparse/StateStorage.h"
# include "src/storage/MaximalEndComponentDecomposition.h"
# include "src/generator/PrismNextStateGenerator.h"
# include "src/logic/FragmentSpecification.h"
# include "src/utility/prism.h"
@ -18,6 +15,7 @@
# include "src/settings/modules/GeneralSettings.h"
# include "src/utility/macros.h"
# include "src/exceptions/InvalidPropertyException.h"
# include "src/exceptions/NotSupportedException.h"
namespace storm {
@ -36,239 +34,297 @@ namespace storm {
}
template < typename ValueType >
void SparseMdpLearningModelChecker < ValueType > : : updateProbabilities ( StateType const & sourceStateId , uint32_t action , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > const & transitionMatrix , std : : vector < StateType > const & rowGroupIndices , std : : vector < StateType > const & stateToRowGroupMapping , std : : vector < ValueType > & lowerBoundsPerAction , std : : vector < ValueType > & upperBoundsPerAction , std : : vector < ValueType > & lowerBoundsPerState , std : : vector < ValueType > & upperBoundsPerState , StateType const & unexploredMarker ) const {
// Find out which row of the matrix we have to consider for the given action.
StateType sourceRow = action ;
std : : unique_ptr < CheckResult > SparseMdpLearningModelChecker < ValueType > : : computeReachabilityProbabilities ( CheckTask < storm : : logic : : EventuallyFormula > const & checkTask ) {
storm : : logic : : EventuallyFormula const & eventuallyFormula = checkTask . getFormula ( ) ;
storm : : logic : : Formula const & subformula = eventuallyFormula . getSubformula ( ) ;
STORM_LOG_THROW ( program . isDeterministicModel ( ) | | checkTask . isOptimizationDirectionSet ( ) , storm : : exceptions : : InvalidPropertyException , " For nondeterministic systems, an optimization direction (min/max) must be given in the property. " ) ;
STORM_LOG_THROW ( subformula . isAtomicExpressionFormula ( ) | | subformula . isAtomicLabelFormula ( ) , storm : : exceptions : : NotSupportedException , " Learning engine can only deal with formulas of the form 'F \" label \" ' or 'F expression'. " ) ;
// Compute the new lower/upper values of the action.
ValueType newLowerValue = storm : : utility : : zero < ValueType > ( ) ;
ValueType newUpperValue = storm : : utility : : zero < ValueType > ( ) ;
// boost::optional<ValueType> loopProbability;
for ( auto const & element : transitionMatrix [ sourceRow ] ) {
// If the element is a self-loop, we treat the probability by a proper rescaling after the loop.
// if (element.getColumn() == sourceStateId) {
// STORM_LOG_TRACE("Found self-loop with probability " << element.getValue() << ".");
// loopProbability = element.getValue();
// continue;
// }
std : : cout < < " lower += " < < element . getValue ( ) < < " * state[ " < < element . getColumn ( ) < < " ] = " < < ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : zero < ValueType > ( ) : lowerBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) < < std : : endl ;
if ( stateToRowGroupMapping [ element . getColumn ( ) ] ! = unexploredMarker ) {
std : : cout < < " upper bounds per state @ " < < stateToRowGroupMapping [ element . getColumn ( ) ] < < " is " < < upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] < < std : : endl ;
}
std : : cout < < " upper += " < < element . getValue ( ) < < " * state[ " < < element . getColumn ( ) < < " ] = " < < ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) < < std : : endl ;
newLowerValue + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : zero < ValueType > ( ) : lowerBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
newUpperValue + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
std : : cout < < " after iter " < < newLowerValue < < " and " < < newUpperValue < < std : : endl ;
}
// If there was a self-loop with probability p, we scale the probabilities by 1/(1-p).
// if (loopProbability) {
// STORM_LOG_TRACE("Scaling values " << newLowerValue << " and " << newUpperValue << " with " << (storm::utility::one<ValueType>()/(storm::utility::one<ValueType>() - loopProbability.get())) << ".");
// newLowerValue = newLowerValue / (storm::utility::one<ValueType>() - loopProbability.get());
// newUpperValue = newUpperValue / (storm::utility::one<ValueType>() - loopProbability.get());
// }
StateGeneration stateGeneration ( storm : : generator : : PrismNextStateGenerator < ValueType , StateType > ( program , variableInformation , false ) , getTargetStateExpression ( subformula ) ) ;
// And set them as the current value.
std : : cout < < newLowerValue < < " vs " < < newUpperValue < < std : : endl ;
STORM_LOG_ASSERT ( newLowerValue < = newUpperValue , " Lower bound must always be smaller or equal than upper bound. " ) ;
STORM_LOG_TRACE ( " Updating lower value of action " < < action < < " of state " < < sourceStateId < < " to " < < newLowerValue < < " (was " < < lowerBoundsPerAction [ action ] < < " ). " ) ;
lowerBoundsPerAction [ action ] = newLowerValue ;
STORM_LOG_TRACE ( " Updating upper value of action " < < action < < " of state " < < sourceStateId < < " to " < < newUpperValue < < " (was " < < upperBoundsPerAction [ action ] < < " ). " ) ;
upperBoundsPerAction [ action ] = newUpperValue ;
ExplorationInformation explorationInformation ( variableInformation . getTotalBitOffset ( true ) ) ;
explorationInformation . optimizationDirection = checkTask . isOptimizationDirectionSet ( ) ? checkTask . getOptimizationDirection ( ) : storm : : OptimizationDirection : : Maximize ;
// Check if we need to update the values for the states.
if ( lowerBoundsPerState [ stateToRowGroupMapping [ sourceStateId ] ] < newLowerValue ) {
STORM_LOG_TRACE ( " Got new lower bound for state " < < sourceStateId < < " : " < < newLowerValue < < " (was " < < lowerBoundsPerState [ stateToRowGroupMapping [ sourceStateId ] ] < < " ). " ) ;
lowerBoundsPerState [ stateToRowGroupMapping [ sourceStateId ] ] = newLowerValue ;
}
// The first row group starts at action 0.
explorationInformation . newRowGroup ( 0 ) ;
uint32_t sourceRowGroup = stateToRowGroupMapping [ sourceStateId ] ;
if ( newUpperValue < upperBoundsPerState [ sourceRowGroup ] ) {
if ( rowGroupIndices [ sourceRowGroup + 1 ] - rowGroupIndices [ sourceRowGroup ] > 1 ) {
ValueType max = storm : : utility : : zero < ValueType > ( ) ;
// Create a callback for the next-state generator to enable it to request the index of states.
std : : function < StateType ( storm : : generator : : CompressedState const & ) > stateToIdCallback = createStateToIdCallback ( explorationInformation ) ;
for ( uint32_t currentAction = rowGroupIndices [ sourceRowGroup ] ; currentAction < rowGroupIndices [ sourceRowGroup + 1 ] ; + + currentAction ) {
std : : cout < < " cur: " < < currentAction < < std : : endl ;
if ( currentAction = = action ) {
continue ;
// Compute and return result.
std : : tuple < StateType , ValueType , ValueType > boundsForInitialState = performLearningProcedure ( stateGeneration , explorationInformation ) ;
return std : : make_unique < ExplicitQuantitativeCheckResult < ValueType > > ( std : : get < 0 > ( boundsForInitialState ) , std : : get < 1 > ( boundsForInitialState ) ) ;
}
ValueType currentValue = storm : : utility : : zero < ValueType > ( ) ;
for ( auto const & element : transitionMatrix [ currentAction ] ) {
currentValue + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
template < typename ValueType >
storm : : expressions : : Expression SparseMdpLearningModelChecker < ValueType > : : getTargetStateExpression ( storm : : logic : : Formula const & subformula ) const {
storm : : expressions : : Expression result ;
if ( subformula . isAtomicExpressionFormula ( ) ) {
result = subformula . asAtomicExpressionFormula ( ) . getExpression ( ) ;
} else {
result = program . getLabelExpression ( subformula . asAtomicLabelFormula ( ) . getLabel ( ) ) ;
}
max = std : : max ( max , currentValue ) ;
std : : cout < < " max is " < < max < < std : : endl ;
return result ;
}
newUpperValue = std : : max ( newUpperValue , max ) ;
}
template < typename ValueType >
std : : function < typename SparseMdpLearningModelChecker < ValueType > : : StateType ( storm : : generator : : CompressedState const & ) > SparseMdpLearningModelChecker < ValueType > : : createStateToIdCallback ( ExplorationInformation & explorationInformation ) const {
return [ & explorationInformation ] ( storm : : generator : : CompressedState const & state ) - > StateType {
StateType newIndex = explorationInformation . stateStorage . numberOfStates ;
if ( newUpperValue < upperBoundsPerState [ sourceRowGroup ] ) {
STORM_LOG_TRACE ( " Got new upper bound for state " < < sourceStateId < < " : " < < newUpperValue < < " (was " < < upperBoundsPerState [ sourceRowGroup ] < < " ). " ) ;
std : : cout < < " writing at index " < < sourceRowGroup < < std : : endl ;
upperBoundsPerState [ sourceRowGroup ] = newUpperValue ;
}
// Check, if the state was already registered.
std : : pair < uint32_t , std : : size_t > actualIndexBucketPair = explorationInformation . stateStorage . stateToId . findOrAddAndGetBucket ( state , newIndex ) ;
if ( actualIndexBucketPair . first = = newIndex ) {
explorationInformation . addUnexploredState ( state ) ;
}
return actualIndexBucketPair . first ;
} ;
}
template < typename ValueType >
void SparseMdpLearningModelChecker < ValueType > : : updateProbabilitiesUsingStack ( std : : vector < std : : pair < StateType , uint32_t > > & stateActionStack , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > const & transitionMatrix , std : : vector < StateType > const & rowGroupIndices , std : : vector < StateType > const & stateToRowGroupMapping , std : : vector < ValueType > & lowerBoundsPerAction , std : : vector < ValueType > & upperBoundsPerAction , std : : vector < ValueType > & lowerBoundsPerState , std : : vector < ValueType > & upperBoundsPerState , StateType const & unexploredMarker ) const {
std : : tuple < typename SparseMdpLearningModelChecker < ValueType > : : StateType , ValueType , ValueType > SparseMdpLearningModelChecker < ValueType > : : performLearningProcedure ( StateGeneration & stateGeneration , ExplorationInformation & explorationInformation ) const {
std : : cout < < " stack is: " < < std : : endl ;
for ( auto const & el : stateActionStack ) {
std : : cout < < el . first < < " -[ " < < el . second < < " ]-> " ;
}
std : : cout < < std : : endl ;
// Generate the initial state so we know where to start the simulation.
explorationInformation . setInitialStates ( stateGeneration . getInitialStates ( ) ) ;
STORM_LOG_THROW ( explorationInformation . getNumberOfInitialStates ( ) = = 1 , storm : : exceptions : : NotSupportedException , " Currently only models with one initial state are supported by the learning engine. " ) ;
StateType initialStateIndex = explorationInformation . getFirstInitialState ( ) ;
stateActionStack . pop_back ( ) ;
while ( ! stateActionStack . empty ( ) ) {
updateProbabilities ( stateActionStack . back ( ) . first , stateActionStack . back ( ) . second , transitionMatrix , rowGroupIndices , stateToRowGroupMapping , lowerBoundsPerAction , upperBoundsPerAction , lowerBoundsPerState , upperBoundsPerState , unexploredMarker ) ;
stateActionStack . pop_back ( ) ;
}
}
// Create a structure that holds the bounds for the states and actions.
BoundValues bounds ;
template < typename ValueType >
std : : pair < ValueType , ValueType > SparseMdpLearningModelChecker < ValueType > : : computeValuesOfChoice ( uint32_t action , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > const & transitionMatrix , std : : vector < StateType > const & stateToRowGroupMapping , std : : vector < ValueType > const & lowerBoundsPerState , std : : vector < ValueType > const & upperBoundsPerState , StateType const & unexploredMarker ) {
std : : pair < ValueType , ValueType > result = std : : make_pair ( storm : : utility : : zero < ValueType > ( ) , storm : : utility : : zero < ValueType > ( ) ) ;
for ( auto const & element : transitionMatrix [ action ] ) {
result . first + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : zero < ValueType > ( ) : lowerBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
result . second + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
// Create a stack that is used to track the path we sampled.
StateActionStack stack ;
// Now perform the actual sampling.
Statistics stats ;
bool convergenceCriterionMet = false ;
while ( ! convergenceCriterionMet ) {
bool result = samplePathFromState ( stateGeneration , explorationInformation , stack , bounds , stats ) ;
// If a terminal state was found, we update the probabilities along the path contained in the stack.
if ( result ) {
// Update the bounds along the path to the terminal state.
STORM_LOG_TRACE ( " Found terminal state, updating probabilities along path. " ) ;
updateProbabilityBoundsAlongSampledPath ( stack , explorationInformation , bounds ) ;
} else {
// If not terminal state was found, the search aborted, possibly because of an EC-detection. In this
// case, we cannot update the probabilities.
STORM_LOG_TRACE ( " Did not find terminal state. " ) ;
}
return result ;
STORM_LOG_DEBUG ( " Discovered states: " < < explorationInformation . getNumberOfDiscoveredStates ( ) < < " ( " < < stats . numberOfExploredStates < < " explored, " < < explorationInformation . getNumberOfUnexploredStates ( ) < < " unexplored). " ) ;
STORM_LOG_DEBUG ( " Value of initial state is in [ " < < bounds . getLowerBoundForState ( initialStateIndex , explorationInformation ) < < " , " < < bounds . getUpperBoundForState ( initialStateIndex , explorationInformation ) < < " ]. " ) ;
ValueType difference = bounds . getDifferenceOfStateBounds ( initialStateIndex , explorationInformation ) ;
STORM_LOG_DEBUG ( " Difference after iteration " < < stats . iterations < < " is " < < difference < < " . " ) ;
convergenceCriterionMet = difference < 1e-6 ;
+ + stats . iterations ;
}
template < typename ValueType >
std : : pair < ValueType , ValueType > SparseMdpLearningModelChecker < ValueType > : : computeValuesOfState ( StateType currentStateId , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > const & transitionMatrix , std : : vector < StateType > const & rowGroupIndices , std : : vector < StateType > const & stateToRowGroupMapping , std : : vector < ValueType > const & lowerBounds , std : : vector < ValueType > const & upperBounds , std : : vector < ValueType > const & lowerBoundsPerState , std : : vector < ValueType > const & upperBoundsPerState , StateType const & unexploredMarker ) {
StateType sourceRowGroup = stateToRowGroupMapping [ currentStateId ] ;
std : : pair < ValueType , ValueType > result = std : : make_pair ( storm : : utility : : zero < ValueType > ( ) , storm : : utility : : zero < ValueType > ( ) ) ;
for ( uint32_t choice = rowGroupIndices [ sourceRowGroup ] ; choice < rowGroupIndices [ sourceRowGroup + 1 ] ; + + choice ) {
std : : pair < ValueType , ValueType > choiceValues = computeValuesOfChoice ( choice , transitionMatrix , stateToRowGroupMapping , lowerBoundsPerState , upperBoundsPerState , unexploredMarker ) ;
result . first = std : : max ( choiceValues . first , result . first ) ;
result . second = std : : max ( choiceValues . second , result . second ) ;
if ( storm : : settings : : generalSettings ( ) . isShowStatisticsSet ( ) ) {
std : : cout < < std : : endl < < " Learning summary ------------------------- " < < std : : endl ;
std : : cout < < " Discovered states: " < < explorationInformation . getNumberOfDiscoveredStates ( ) < < " ( " < < stats . numberOfExploredStates < < " explored, " < < explorationInformation . getNumberOfUnexploredStates ( ) < < " unexplored, " < < stats . numberOfTargetStates < < " target states) " < < std : : endl ;
std : : cout < < " Sampling iterations: " < < stats . iterations < < std : : endl ;
std : : cout < < " Maximal path length: " < < stats . maxPathLength < < std : : endl ;
}
return result ;
return std : : make_tuple ( initialStateIndex , bounds . getLowerBoundForState ( initialStateIndex , explorationInformation ) , bounds . getUpperBoundForState ( initialStateIndex , explorationInformation ) ) ;
}
template < typename ValueType >
uint32_t SparseMdpLearningModelChecker < ValueType > : : sampleFromMaxActions ( StateType currentStateId , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > const & transitionMatrix , std : : vector < StateType > const & rowGroupIndices , std : : vector < StateType > const & stateToRowGroupMapping , std : : vector < ValueType > const & upperBoundsPerState , std : : unordered_map < StateType , ChoiceSetPointer > const & stateToLeavingChoicesOfEndComponent , StateType const & unexploredMarker ) {
bool SparseMdpLearningModelChecker < ValueType > : : samplePathFromState ( StateGeneration & stateGeneration , ExplorationInformation & explorationInformation , StateActionStack & stack , BoundValues & bounds , Statistics & stats ) const {
StateType rowGroup = stateToRowGroupMapping [ currentStateId ] ;
// Start the search from the initial state.
stack . push_back ( std : : make_pair ( explorationInformation . getFirstInitialState ( ) , 0 ) ) ;
// First, determine all maximizing actions.
std : : vector < uint32_t > allMaxActions ;
bool foundTerminalState = false ;
while ( ! foundTerminalState ) {
StateType const & currentStateId = stack . back ( ) . first ;
STORM_LOG_TRACE ( " State on top of stack is: " < < currentStateId < < " . " ) ;
for ( StateType state = 0 ; state < stateToRowGroupMapping . size ( ) ; + + state ) {
if ( stateToRowGroupMapping [ state ] ! = unexploredMarker ) {
std : : cout < < " state " < < state < < " (grp " < < stateToRowGroupMapping [ state ] < < " ) has bounds [x, " < < upperBoundsPerState [ stateToRowGroupMapping [ state ] ] < < " ] " < < std : : endl ;
// If the state is not yet explored, we need to retrieve its behaviors.
auto unexploredIt = explorationInformation . unexploredStates . find ( currentStateId ) ;
if ( unexploredIt ! = explorationInformation . unexploredStates . end ( ) ) {
STORM_LOG_TRACE ( " State was not yet explored. " ) ;
// Explore the previously unexplored state.
storm : : generator : : CompressedState const & compressedState = unexploredIt - > second ;
foundTerminalState = exploreState ( stateGeneration , currentStateId , compressedState , explorationInformation , bounds , stats ) ;
explorationInformation . unexploredStates . erase ( unexploredIt ) ;
} else {
std : : cout < < " state " < < state < < " is unexplored " < < std : : endl ;
// If the state was already explored, we check whether it is a terminal state or not.
if ( explorationInformation . isTerminal ( currentStateId ) ) {
STORM_LOG_TRACE ( " Found already explored terminal state: " < < currentStateId < < " . " ) ;
foundTerminalState = true ;
}
}
// Determine the maximal value of any action.
ValueType max = 0 ;
auto choicesInEcIt = stateToLeavingChoicesOfEndComponent . find ( currentStateId ) ;
if ( choicesInEcIt ! = stateToLeavingChoicesOfEndComponent . end ( ) ) {
STORM_LOG_TRACE ( " Sampling from actions leaving the previously detected EC. " ) ;
for ( auto const & row : * choicesInEcIt - > second ) {
ValueType current = 0 ;
for ( auto const & element : transitionMatrix [ row ] ) {
current + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
}
// If the state was not a terminal state, we continue the path search and sample the next state .
if ( ! foundTerminalState ) {
// At this point, we can be sure that the state was expanded and that we can sample according to the
// probabilities in the matrix.
uint32_t chosenAction = sampleFromMaxActions ( currentStateId , explorationInformation , bounds ) ;
stack . back ( ) . second = chosenAction ;
STORM_LOG_TRACE ( " Sampled action " < < chosenAction < < " in state " < < currentStateId < < " . " ) ;
StateType successor = sampleSuccessorFromAction ( chosenAction , explorationInformation ) ;
STORM_LOG_TRACE ( " Sampled successor " < < successor < < " according to action " < < chosenAction < < " of state " < < currentStateId < < " . " ) ;
max = std : : max ( max , current ) ;
// Put the successor state and a dummy action on top of the stack.
stack . emplace_back ( successor , 0 ) ;
stats . maxPathLength = std : : max ( stats . maxPathLength , stack . size ( ) ) ;
// If the current path length exceeds the threshold and the model is a nondeterministic one, we
// perform an EC detection.
if ( stack . size ( ) > stats . pathLengthUntilEndComponentDetection & & ! program . isDeterministicModel ( ) ) {
detectEndComponents ( stack , explorationInformation , bounds ) ;
// Abort the current search.
STORM_LOG_TRACE ( " Aborting the search after EC detection. " ) ;
stack . clear ( ) ;
break ;
}
} else {
STORM_LOG_TRACE ( " Sampling from actions leaving the state. " ) ;
for ( uint32_t row = rowGroupIndices [ rowGroup ] ; row < rowGroupIndices [ rowGroup + 1 ] ; + + row ) {
ValueType current = 0 ;
for ( auto const & element : transitionMatrix [ row ] ) {
current + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
}
max = std : : max ( max , current ) ;
}
return foundTerminalState ;
}
STORM_LOG_TRACE ( " Looking for action with value " < < max < < " . " ) ;
template < typename ValueType >
bool SparseMdpLearningModelChecker < ValueType > : : exploreState ( StateGeneration & stateGeneration , StateType const & currentStateId , storm : : generator : : CompressedState const & currentState , ExplorationInformation & explorationInformation , BoundValues & bounds , Statistics & stats ) const {
bool isTerminalState = false ;
bool isTargetState = false ;
for ( StateType state = 0 ; state < stateToRowGroupMapping . size ( ) ; + + state ) {
if ( stateToRowGroupMapping [ state ] ! = unexploredMarker ) {
std : : cout < < " state " < < state < < " (grp " < < stateToRowGroupMapping [ state ] < < " ) has bounds [x, " < < upperBoundsPerState [ stateToRowGroupMapping [ state ] ] < < " ] " < < std : : endl ;
// Before generating the behavior of the state, we need to determine whether it's a target state that
// does not need to be expanded.
stateGeneration . generator . load ( currentState ) ;
if ( stateGeneration . isTargetState ( ) ) {
+ + stats . numberOfTargetStates ;
isTargetState = true ;
isTerminalState = true ;
} else {
std : : cout < < " state " < < state < < " is unexplored " < < std : : endl ;
STORM_LOG_TRACE ( " Exploring state. " ) ;
// If it needs to be expanded, we use the generator to retrieve the behavior of the new state.
storm : : generator : : StateBehavior < ValueType , StateType > behavior = stateGeneration . expand ( ) ;
STORM_LOG_TRACE ( " State has " < < behavior . getNumberOfChoices ( ) < < " choices. " ) ;
// Clumsily check whether we have found a state that forms a trivial BMEC.
if ( behavior . getNumberOfChoices ( ) = = 0 ) {
isTerminalState = true ;
} else if ( behavior . getNumberOfChoices ( ) = = 1 ) {
auto const & onlyChoice = * behavior . begin ( ) ;
if ( onlyChoice . size ( ) = = 1 ) {
auto const & onlyEntry = * onlyChoice . begin ( ) ;
if ( onlyEntry . first = = currentStateId ) {
isTerminalState = true ;
}
}
}
if ( choicesInEcIt ! = stateToLeavingChoicesOfEndComponent . end ( ) ) {
for ( auto const & row : * choicesInEcIt - > second ) {
ValueType current = 0 ;
for ( auto const & element : transitionMatrix [ row ] ) {
current + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
// If the state was neither a trivial (non-accepting) terminal state nor a target state, we
// need to store its behavior.
if ( ! isTerminalState ) {
// Next, we insert the behavior into our matrix structure.
StateType startRow = explorationInformation . matrix . size ( ) ;
explorationInformation . addRowsToMatrix ( behavior . getNumberOfChoices ( ) ) ;
// Terminate the row group.
explorationInformation . rowGroupIndices . push_back ( explorationInformation . matrix . size ( ) ) ;
ActionType currentAction = 0 ;
for ( auto const & choice : behavior ) {
for ( auto const & entry : choice ) {
std : : cout < < " adding " < < currentStateId < < " -> " < < entry . first < < " with prob " < < entry . second < < std : : endl ;
explorationInformation . matrix [ startRow + currentAction ] . emplace_back ( entry . first , entry . second ) ;
}
STORM_LOG_TRACE ( " Computed (upper) bound " < < current < < " for row " < < row < < " . " ) ;
// If the action is one of the maximizing ones, insert it into our list.
if ( comparator . isEqual ( current , max ) ) {
allMaxActions . push_back ( row ) ;
bounds . initializeActionBoundsForNextAction ( computeBoundsOfAction ( startRow + currentAction , explorationInformation , bounds ) ) ;
STORM_LOG_TRACE ( " Initializing bounds of action " < < ( startRow + currentAction ) < < " to " < < bounds . getLowerBoundForAction ( startRow + currentAction ) < < " and " < < bounds . getUpperBoundForAction ( startRow + currentAction ) < < " . " ) ;
+ + currentAction ;
}
bounds . initializeStateBoundsForNextState ( computeBoundsOfState ( currentStateId , explorationInformation , bounds ) ) ;
STORM_LOG_TRACE ( " Initializing bounds of state " < < currentStateId < < " to " < < bounds . getLowerBoundForState ( currentStateId ) < < " and " < < bounds . getUpperBoundForState ( currentStateId ) < < " . " ) ;
}
}
if ( isTerminalState ) {
STORM_LOG_TRACE ( " State does not need to be explored, because it is " < < ( isTargetState ? " a target state " : " a rejecting terminal state " ) < < " . " ) ;
explorationInformation . addTerminalState ( currentStateId ) ;
if ( isTargetState ) {
bounds . initializeStateBoundsForNextState ( std : : make_pair ( storm : : utility : : one < ValueType > ( ) , storm : : utility : : one < ValueType > ( ) ) ) ;
bounds . initializeStateBoundsForNextAction ( std : : make_pair ( storm : : utility : : one < ValueType > ( ) , storm : : utility : : one < ValueType > ( ) ) ) ;
} else {
for ( uint32_t row = rowGroupIndices [ rowGroup ] ; row < rowGroupIndices [ rowGroup + 1 ] ; + + row ) {
ValueType current = 0 ;
for ( auto const & element : transitionMatrix [ row ] ) {
if ( stateToRowGroupMapping [ element . getColumn ( ) ] ! = unexploredMarker ) {
std : : cout < < " upper bounds per state @ " < < stateToRowGroupMapping [ element . getColumn ( ) ] < < " is " < < upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] < < std : : endl ;
bounds . initializeStateBoundsForNextState ( std : : make_pair ( storm : : utility : : zero < ValueType > ( ) , storm : : utility : : zero < ValueType > ( ) ) ) ;
bounds . initializeStateBoundsForNextAction ( std : : make_pair ( storm : : utility : : zero < ValueType > ( ) , storm : : utility : : zero < ValueType > ( ) ) ) ;
}
// Increase the size of the matrix, but leave the row empty.
explorationInformation . addRowsToMatrix ( 1 ) ;
// Terminate the row group.
explorationInformation . newRowGroup ( ) ;
}
std : : cout < < " += " < < element . getValue ( ) < < " * " < < " state[ " < < element . getColumn ( ) < < " ] = " < < ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) < < std : : endl ;
current + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
// Finally, map the unexplored state to the row group.
explorationInformation . assignStateToNextRowGroup ( currentStateId ) ;
STORM_LOG_TRACE ( " Assigning row group " < < explorationInformation . getRowGroup ( currentStateId ) < < " to state " < < currentStateId < < " . " ) ;
return isTerminalState ;
}
STORM_LOG_TRACE ( " Computed (upper) bound " < < current < < " for row " < < row < < " . " ) ;
// If the action is one of the maximizing ones, insert it into our list.
if ( comparator . isEqual ( current , max ) ) {
allMaxActions . push_back ( row ) ;
template < typename ValueType >
uint32_t SparseMdpLearningModelChecker < ValueType > : : sampleMaxAction ( StateType const & currentStateId , ExplorationInformation const & explorationInformation , BoundValues & bounds ) const {
StateType rowGroup = explorationInformation . getRowGroup ( currentStateId ) ;
// First, determine all maximizing actions.
std : : vector < ActionType > allMaxActions ;
// Determine the values of all available actions.
std : : vector < std : : pair < ActionType , ValueType > > actionValues ;
auto choicesInEcIt = explorationInformation . stateToLeavingChoicesOfEndComponent . find ( currentStateId ) ;
if ( choicesInEcIt ! = explorationInformation . stateToLeavingChoicesOfEndComponent . end ( ) ) {
STORM_LOG_TRACE ( " Sampling from actions leaving the previously detected EC. " ) ;
for ( auto const & row : * choicesInEcIt - > second ) {
actionValues . push_back ( std : : make_pair ( row , computeUpperBoundOfAction ( row , explorationInformation , bounds ) ) ) ;
}
} else {
STORM_LOG_TRACE ( " Sampling from actions leaving the state. " ) ;
for ( uint32_t row = explorationInformation . getStartRowOfGroup ( rowGroup ) ; row < explorationInformation . getStartRowOfGroup ( rowGroup + 1 ) ; + + row ) {
actionValues . push_back ( std : : make_pair ( row , computeUpperBoundOfAction ( row , explorationInformation , bounds ) ) ) ;
}
}
STORM_LOG_ASSERT ( ! allMaxActions . empty ( ) , " Must have at least one maximizing action. " ) ;
std : : sort ( actionValues . begin ( ) , actionValues . end ( ) , [ ] ( std : : pair < ActionType , ValueType > const & a , std : : pair < ActionType , ValueType > const & b ) { return b . second > a . second ; } ) ;
auto end = std : : equal_range ( actionValues . begin ( ) , actionValues . end ( ) , [ this ] ( std : : pair < ActionType , ValueType > const & a , std : : pair < ActionType , ValueType > const & b ) { return comparator . isEqual ( a . second , b . second ) ; } ) ;
// Now sample from all maximizing actions.
std : : uniform_int_distribution < uint32_t > distribution ( 0 , allMaxActions . size ( ) - 1 ) ;
return allMaxActions [ distribution ( randomGenerator ) ] ;
std : : uniform_int_distribution < uint32_t > distribution ( 0 , std : : distance ( actionValues . begin ( ) , end ) ) ;
return actionValue s [ distribution ( randomGenerator ) ] . first ;
}
template < typename ValueType >
typename SparseMdpLearningModelChecker < ValueType > : : StateType SparseMdpLearningModelChecker < ValueType > : : sampleSuccessorFromAction ( StateType chosenAction , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > const & transitionMatrix , std : : vector < StateType > const & rowGroupIndices , std : : vector < StateType > const & stateToRowGroupMapping ) {
uint32_t row = chosenAction ;
typename SparseMdpLearningModelChecker < ValueType > : : StateType SparseMdpLearningModelChecker < ValueType > : : sampleSuccessorFromAction ( ActionType const & chosenAction , ExplorationInformation const & explorationInformation ) const {
// TODO: precompute this?
std : : vector < ValueType > probabilities ( transitionMatrix [ row ] . size ( ) ) ;
std : : transform ( transitionMatrix [ row ] . begin ( ) , transitionMatrix [ row ] . end ( ) , probabilities . begin ( ) , [ ] ( storm : : storage : : MatrixEntry < StateType , ValueType > const & entry ) { return entry . getValue ( ) ; } ) ;
std : : vector < ValueType > probabilities ( explorationInformation . getRowOfMatrix ( chosenAction ) . size ( ) ) ;
std : : transform ( explorationInformation . getRowOfMatrix ( chosenAction ) . begin ( ) , explorationInformation . getRowOfMatrix ( chosenAction ) . end ( ) , probabilities . begin ( ) , [ ] ( storm : : storage : : MatrixEntry < StateType , ValueType > const & entry ) { return entry . getValue ( ) ; } ) ;
// Now sample according to the probabilities.
std : : discrete_distribution < StateType > distribution ( probabilities . begin ( ) , probabilities . end ( ) ) ;
StateType offset = distribution ( randomGenerator ) ;
return transitionMatrix [ row ] [ offset ] . getColumn ( ) ;
return explorationInformation . getRowOfMatrix ( chosenAction ) [ offset ] . getColumn ( ) ;
}
template < typename ValueType >
storm : : expressions : : Expression SparseMdpLearningModelChecker < ValueType > : : getTargetStateExpression ( storm : : logic : : Formula const & subformula ) {
storm : : expressions : : Expression result ;
if ( subformula . isAtomicExpressionFormula ( ) ) {
result = subformula . asAtomicExpressionFormula ( ) . getExpression ( ) ;
} else {
result = program . getLabelExpression ( subformula . asAtomicLabelFormula ( ) . getLabel ( ) ) ;
}
return result ;
}
template < typename ValueType >
void SparseMdpLearningModelChecker < ValueType > : : detectEndComponents ( std : : vector < std : : pair < StateType , uint32_t > > const & stateActionStack , boost : : container : : flat_set < StateType > & terminalStates , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > & transitionMatrix , std : : vector < StateType > const & rowGroupIndices , std : : vector < StateType > const & stateToRowGroupMapping , std : : vector < ValueType > & lowerBoundsPerAction , std : : vector < ValueType > & upperBoundsPerAction , std : : vector < ValueType > & lowerBoundsPerState , std : : vector < ValueType > & upperBoundsPerState , std : : unordered_map < StateType , ChoiceSetPointer > & stateToLeavingChoicesOfEndComponent , StateType const & unexploredMarker ) const {
void SparseMdpLearningModelChecker < ValueType > : : detectEndComponents ( StateActionStack const & stack , ExplorationInformation & explorationInformation , BoundValues & bounds ) const {
STORM_LOG_TRACE ( " Starting EC detection. " ) ;
// Outline:
@ -281,13 +337,14 @@ namespace storm {
// Determine the set of states that was expanded.
std : : vector < StateType > relevantStates ;
for ( StateType state = 0 ; state < stateToRowGroupMapping . size ( ) ; + + state ) {
if ( stateToRowGroupMapping [ state ] ! = unexploredMarker ) {
for ( StateType state = 0 ; state < explorationInformation . stateStorage . numberOfStates ; + + state ) {
if ( ! explorationInformation . isUnexplored ( state ) ) {
relevantStates . push_back ( state ) ;
}
}
// Sort according to the actual row groups so we can insert the elements in order later.
std : : sort ( relevantStates . begin ( ) , relevantStates . end ( ) , [ & stateToRowGroupMapping ] ( StateType const & a , StateType const & b ) { return stateToRowGroupMapping [ a ] < stateToRowGroupMapping [ b ] ; } ) ;
std : : sort ( relevantStates . begin ( ) , relevantStates . end ( ) , [ & explorationInformation ] ( StateType const & a , StateType const & b ) { return explorationInformation . getRowGroup ( a ) < explorationInformation . getRowGroup ( b ) ; } ) ;
StateType unexploredState = relevantStates . size ( ) ;
// Create a mapping for faster look-up during the translation of flexible matrix to the real sparse matrix.
@ -301,10 +358,10 @@ namespace storm {
StateType currentRow = 0 ;
for ( auto const & state : relevantStates ) {
builder . newRowGroup ( currentRow ) ;
StateType rowGroup = stateToRowGroupMapping [ state ] ;
for ( auto row = rowGroupIndices [ rowGroup ] ; row < rowGroupIndices [ rowGroup + 1 ] ; + + row ) {
StateType rowGroup = explorationInformation . getRowGroup ( state ) ;
for ( auto row = explorationInformation . getStartRowOfGroup ( rowGroup ) ; row < explorationInformation . getStartRowOfGroup ( rowGroup + 1 ) ; + + row ) {
ValueType unexpandedProbability = storm : : utility : : zero < ValueType > ( ) ;
for ( auto const & entry : transitionMatrix [ row ] ) {
for ( auto const & entry : explorationInformation . getRowOfMatrix ( row ) ) {
auto it = relevantStateToNewRowGroupMapping . find ( entry . getColumn ( ) ) ;
if ( it ! = relevantStateToNewRowGroupMapping . end ( ) ) {
// If the entry is a relevant state, we copy it over (and compensate for the offset change).
@ -340,41 +397,41 @@ namespace storm {
bool containsTargetState = false ;
// Now we record all choices leaving the EC.
Choice SetPointer leavingChoices = std : : make_shared < Choice Set> ( ) ;
Action SetPointer leavingChoices = std : : make_shared < Action Set> ( ) ;
for ( auto const & stateAndChoices : mec ) {
// Compute the state of the original model that corresponds to the current state.
std : : cout < < " local state: " < < stateAndChoices . first < < std : : endl ;
StateType originalState = relevantStates [ stateAndChoices . first ] ;
std : : cout < < " original state: " < < originalState < < std : : endl ;
uint32_t originalRowGroup = stateToRowGroupMapping [ originalState ] ;
uint32_t originalRowGroup = explorationInformation . getRowGroup ( originalState ) ;
std : : cout < < " original row group: " < < originalRowGroup < < std : : endl ;
// TODO: This check for a target state is a bit hackish and only works for max probabilities.
if ( ! containsTargetState & & lowerBoundsPerState [ originalRowGroup ] = = storm : : utility : : one < ValueType > ( ) ) {
// TODO: This checks for a target state is a bit hackish and only works for max probabilities.
if ( ! containsTargetState & & comparator . isOne ( bounds . getLowerBoundForRowGroup ( originalRowGroup , explorationInformation ) ) ) {
containsTargetState = true ;
}
auto includedChoicesIt = stateAndChoices . second . begin ( ) ;
auto includedChoicesIte = stateAndChoices . second . end ( ) ;
for ( auto choice = rowGroupIndices [ originalRowGroup ] ; choice < rowGroupIndices [ originalRowGroup + 1 ] ; + + choice ) {
for ( auto action = explorationInformation . getStartRowOfGroup ( originalRowGroup ) ; action < explorationInformation . getStartRowOfGroup ( originalRowGroup + 1 ) ; + + action ) {
if ( includedChoicesIt ! = includedChoicesIte ) {
STORM_LOG_TRACE ( " Next (local) choice contained in MEC is " < < ( * includedChoicesIt - relevantStatesMatrix . getRowGroupIndices ( ) [ stateAndChoices . first ] ) ) ;
STORM_LOG_TRACE ( " Current (local) choice iterated is " < < ( choice - rowGroupIndices [ originalRowGroup ] ) ) ;
if ( choice - rowGroupIndices [ originalRowGroup ] ! = * includedChoicesIt - relevantStatesMatrix . getRowGroupIndices ( ) [ stateAndChoices . first ] ) {
STORM_LOG_TRACE ( " Current (local) choice iterated is " < < ( action - explorationInformation . getStartRowOfGroup ( originalRowGroup ) ) ) ;
if ( action - explorationInformation . getStartRowOfGroup ( originalRowGroup ) ! = * includedChoicesIt - relevantStatesMatrix . getRowGroupIndices ( ) [ stateAndChoices . first ] ) {
STORM_LOG_TRACE ( " Choice leaves the EC. " ) ;
leavingChoices - > insert ( choice ) ;
leavingChoices - > insert ( action ) ;
} else {
STORM_LOG_TRACE ( " Choice stays in the EC. " ) ;
+ + includedChoicesIt ;
}
} else {
STORM_LOG_TRACE ( " Choice leaves the EC, because there is no more choice staying in the EC. " ) ;
leavingChoices - > insert ( choice ) ;
leavingChoices - > insert ( action ) ;
}
}
stateToLeavingChoicesOfEndComponent [ originalState ] = leavingChoices ;
explorationInformation . stateToLeavingChoicesOfEndComponent [ originalState ] = leavingChoices ;
}
// If one of the states of the EC is a target state, all states in the EC have probability 1.
@ -384,9 +441,9 @@ namespace storm {
// Compute the state of the original model that corresponds to the current state.
StateType originalState = relevantStates [ stateAndChoices . first ] ;
STORM_LOG_TRACE ( " Setting lower bound of state in row group " < < stateToRowGroupMapping [ originalState ] < < " to 1. " ) ;
lowerBoundsPerState [ stateToRowGroupMapping [ originalState ] ] = storm : : utility : : one < ValueType > ( ) ;
terminalStates . insert ( originalState ) ;
STORM_LOG_TRACE ( " Setting lower bound of state in row group " < < explorationInformation . getRowGroup ( originalState ) < < " to 1. " ) ;
bounds . setLowerBoundForState ( originalState , explorationInformation , storm : : utility : : one < ValueType > ( ) ) ;
explorationInformation . addTerminalState ( originalState ) ;
}
} else if ( leavingChoices - > empty ( ) ) {
STORM_LOG_TRACE ( " MEC's leaving choices are empty. " ) ;
@ -395,317 +452,138 @@ namespace storm {
// Compute the state of the original model that corresponds to the current state.
StateType originalState = relevantStates [ stateAndChoices . first ] ;
STORM_LOG_TRACE ( " Setting upper bound of state in row group " < < stateToRowGroupMapping [ originalState ] < < " to 0. " ) ;
upperBoundsPerState [ stateToRowGroupMapping [ originalState ] ] = storm : : utility : : zero < ValueType > ( ) ;
terminalStates . insert ( originalState ) ;
STORM_LOG_TRACE ( " Setting upper bound of state in row group " < < explorationInformation . getRowGroup ( originalState ) < < " to 0. " ) ;
bounds . setUpperBoundForState ( originalState , explorationInformation , storm : : utility : : zero < ValueType > ( ) ) ;
explorationInformation . addTerminalState ( originalState ) ;
}
}
}
}
template < typename ValueType >
bool SparseMdpLearningModelChecker < ValueType > : : exploreState ( storm : : generator : : PrismNextStateGenerator < ValueType , StateType > & generator , std : : function < StateType ( storm : : generator : : CompressedState const & ) > const & stateToIdCallback , StateType const & currentStateId , storm : : generator : : CompressedState const & compressedState , StateType const & unexploredMarker , boost : : container : : flat_set < StateType > & terminalStates , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > & matrix , std : : vector < StateType > & rowGroupIndices , std : : vector < StateType > & stateToRowGroupMapping , storm : : expressions : : Expression const & targetStateExpression , std : : vector < ValueType > & lowerBoundsPerAction , std : : vector < ValueType > & upperBoundsPerAction , std : : vector < ValueType > & lowerBoundsPerState , std : : vector < ValueType > & upperBoundsPerState , Statistics & stats ) {
bool isTerminalState = false ;
bool isTargetState = false ;
// Map the unexplored state to a row group.
stateToRowGroupMapping [ currentStateId ] = rowGroupIndices . size ( ) - 1 ;
STORM_LOG_TRACE ( " Assigning row group " < < stateToRowGroupMapping [ currentStateId ] < < " to state " < < currentStateId < < " . " ) ;
lowerBoundsPerState . push_back ( storm : : utility : : zero < ValueType > ( ) ) ;
upperBoundsPerState . push_back ( storm : : utility : : one < ValueType > ( ) ) ;
// Before generating the behavior of the state, we need to determine whether it's a target state that
// does not need to be expanded.
generator . load ( compressedState ) ;
if ( generator . satisfies ( targetStateExpression ) ) {
+ + stats . numberOfTargetStates ;
isTargetState = true ;
isTerminalState = true ;
} else {
STORM_LOG_TRACE ( " Exploring state. " ) ;
// If it needs to be expanded, we use the generator to retrieve the behavior of the new state.
storm : : generator : : StateBehavior < ValueType , StateType > behavior = generator . expand ( stateToIdCallback ) ;
STORM_LOG_TRACE ( " State has " < < behavior . getNumberOfChoices ( ) < < " choices. " ) ;
// Clumsily check whether we have found a state that forms a trivial BMEC.
if ( behavior . getNumberOfChoices ( ) = = 0 ) {
isTerminalState = true ;
} else if ( behavior . getNumberOfChoices ( ) = = 1 ) {
auto const & onlyChoice = * behavior . begin ( ) ;
if ( onlyChoice . size ( ) = = 1 ) {
auto const & onlyEntry = * onlyChoice . begin ( ) ;
if ( onlyEntry . first = = currentStateId ) {
isTerminalState = true ;
}
ValueType SparseMdpLearningModelChecker < ValueType > : : computeLowerBoundOfAction ( ActionType const & action , ExplorationInformation const & explorationInformation , BoundValues const & bounds ) const {
ValueType result = storm : : utility : : zero < ValueType > ( ) ;
for ( auto const & element : explorationInformation . getRowOfMatrix ( action ) ) {
result + = element . getValue ( ) * bounds . getLowerBoundForState ( element . getColumn ( ) , explorationInformation ) ;
}
return result ;
}
// If the state was neither a trivial (non-accepting) terminal state nor a target state, we
// need to store its behavior.
if ( ! isTerminalState ) {
// Next, we insert the behavior into our matrix structure.
StateType startRow = matrix . size ( ) ;
matrix . resize ( startRow + behavior . getNumberOfChoices ( ) ) ;
// Terminate the row group.
rowGroupIndices . push_back ( matrix . size ( ) ) ;
uint32_t currentAction = 0 ;
for ( auto const & choice : behavior ) {
for ( auto const & entry : choice ) {
std : : cout < < " adding " < < currentStateId < < " -> " < < entry . first < < " with prob " < < entry . second < < std : : endl ;
matrix [ startRow + currentAction ] . emplace_back ( entry . first , entry . second ) ;
template < typename ValueType >
ValueType SparseMdpLearningModelChecker < ValueType > : : computeUpperBoundOfAction ( ActionType const & action , ExplorationInformation const & explorationInformation , BoundValues const & bounds ) const {
ValueType result = storm : : utility : : zero < ValueType > ( ) ;
for ( auto const & element : explorationInformation . getRowOfMatrix ( action ) ) {
result + = element . getValue ( ) * bounds . getUpperBoundForState ( element . getColumn ( ) , explorationInformation ) ;
}
lowerBoundsPerAction . push_back ( storm : : utility : : zero < ValueType > ( ) ) ;
upperBoundsPerAction . push_back ( storm : : utility : : one < ValueType > ( ) ) ;
std : : pair < ValueType , ValueType > values = computeValuesOfChoice ( startRow + currentAction , matrix , stateToRowGroupMapping , lowerBoundsPerState , upperBoundsPerState , unexploredMarker ) ;
lowerBoundsPerAction . back ( ) = values . first ;
upperBoundsPerAction . back ( ) = values . second ;
STORM_LOG_TRACE ( " Initializing bounds of action " < < ( startRow + currentAction ) < < " to " < < lowerBoundsPerAction . back ( ) < < " and " < < upperBoundsPerAction . back ( ) < < " . " ) ;
+ + currentAction ;
return result ;
}
std : : pair < ValueType , ValueType > values = computeValuesOfState ( currentStateId , matrix , rowGroupIndices , stateToRowGroupMapping , lowerBoundsPerAction , upperBoundsPerAction , lowerBoundsPerState , upperBoundsPerState , unexploredMarker ) ;
lowerBoundsPerState . back ( ) = values . first ;
upperBoundsPerState . back ( ) = values . second ;
STORM_LOG_TRACE ( " Initializing bounds of state " < < currentStateId < < " to " < < lowerBoundsPerState . back ( ) < < " and " < < upperBoundsPerState . back ( ) < < " . " ) ;
template < typename ValueType >
ValueType SparseMdpLearningModelChecker < ValueType > : : computeLowerBoundOfState ( StateType const & state , ExplorationInformation const & explorationInformation , BoundValues const & bounds ) const {
StateType group = explorationInformation . getRowGroup ( state ) ;
ValueType result = std : : make_pair ( storm : : utility : : zero < ValueType > ( ) , storm : : utility : : zero < ValueType > ( ) ) ;
for ( ActionType action = explorationInformation . getStartRowOfGroup ( group ) ; action < explorationInformation . getStartRowOfGroup ( group + 1 ) ; + + action ) {
ValueType actionValue = computeLowerBoundOfAction ( action , explorationInformation , bounds ) ;
result = std : : max ( actionValue , result ) ;
}
return result ;
}
if ( isTerminalState ) {
STORM_LOG_TRACE ( " State does not need to be explored, because it is " < < ( isTargetState ? " a target state " : " a rejecting terminal state " ) < < " . " ) ;
terminalStates . insert ( currentStateId ) ;
if ( isTargetState ) {
lowerBoundsPerState . back ( ) = storm : : utility : : one < ValueType > ( ) ;
lowerBoundsPerAction . push_back ( storm : : utility : : one < ValueType > ( ) ) ;
upperBoundsPerAction . push_back ( storm : : utility : : one < ValueType > ( ) ) ;
} else {
upperBoundsPerState . back ( ) = storm : : utility : : zero < ValueType > ( ) ;
lowerBoundsPerAction . push_back ( storm : : utility : : zero < ValueType > ( ) ) ;
upperBoundsPerAction . push_back ( storm : : utility : : zero < ValueType > ( ) ) ;
template < typename ValueType >
ValueType SparseMdpLearningModelChecker < ValueType > : : computeUpperBoundOfState ( StateType const & state , ExplorationInformation const & explorationInformation , BoundValues const & bounds ) const {
StateType group = explorationInformation . getRowGroup ( state ) ;
ValueType result = std : : make_pair ( storm : : utility : : zero < ValueType > ( ) , storm : : utility : : zero < ValueType > ( ) ) ;
for ( ActionType action = explorationInformation . getStartRowOfGroup ( group ) ; action < explorationInformation . getStartRowOfGroup ( group + 1 ) ; + + action ) {
ValueType actionValue = computeUpperBoundOfAction ( action , explorationInformation , bounds ) ;
result = std : : max ( actionValue , result ) ;
}
// Increase the size of the matrix, but leave the row empty.
matrix . resize ( matrix . size ( ) + 1 ) ;
// Terminate the row group.
rowGroupIndices . push_back ( matrix . size ( ) ) ;
return result ;
}
return isTerminalState ;
template < typename ValueType >
std : : pair < ValueType , ValueType > SparseMdpLearningModelChecker < ValueType > : : computeBoundsOfAction ( ActionType const & action , ExplorationInformation const & explorationInformation , BoundValues const & bounds ) const {
// TODO: take into account self-loops?
std : : pair < ValueType , ValueType > result = std : : make_pair ( storm : : utility : : zero < ValueType > ( ) , storm : : utility : : zero < ValueType > ( ) ) ;
for ( auto const & element : explorationInformation . getRowOfMatrix ( action ) ) {
result . first + = element . getValue ( ) * bounds . getLowerBoundForState ( element . getColumn ( ) , explorationInformation ) ;
result . second + = element . getValue ( ) * bounds . getUpperBoundForState ( element . getColumn ( ) , explorationInformation ) ;
}
return result ;
}
template < typename ValueType >
bool SparseMdpLearningModelChecker < ValueType > : : samplePathFromState ( storm : : generator : : PrismNextStateGenerator < ValueType , StateType > & generator , std : : function < StateType ( storm : : generator : : CompressedState const & ) > const & stateToIdCallback , StateType initialStateIndex , std : : vector < std : : pair < StateType , uint32_t > > & stateActionStack , std : : unordered_map < StateType , storm : : generator : : CompressedState > & unexploredStates , StateType const & unexploredMarker , boost : : container : : flat_set < StateType > & terminalStates , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > & matrix , std : : vector < StateType > & rowGroupIndices , std : : vector < StateType > & stateToRowGroupMapping , std : : unordered_map < StateType , ChoiceSetPointer > & stateToLeavingChoicesOfEndComponent , storm : : expressions : : Expression const & targetStateExpression , std : : vector < ValueType > & lowerBoundsPerAction , std : : vector < ValueType > & upperBoundsPerAction , std : : vector < ValueType > & lowerBoundsPerState , std : : vector < ValueType > & upperBoundsPerState , Statistics & stats ) {
// Start the search from the initial state.
stateActionStack . push_back ( std : : make_pair ( initialStateIndex , 0 ) ) ;
bool foundTerminalState = false ;
while ( ! foundTerminalState ) {
StateType const & currentStateId = stateActionStack . back ( ) . first ;
STORM_LOG_TRACE ( " State on top of stack is: " < < currentStateId < < " . " ) ;
// If the state is not yet explored, we need to retrieve its behaviors.
auto unexploredIt = unexploredStates . find ( currentStateId ) ;
if ( unexploredIt ! = unexploredStates . end ( ) ) {
STORM_LOG_TRACE ( " State was not yet explored. " ) ;
// Explore the previously unexplored state.
foundTerminalState = exploreState ( generator , stateToIdCallback , currentStateId , unexploredIt - > second ) ;
unexploredStates . erase ( unexploredIt ) ;
} else {
// If the state was already explored, we check whether it is a terminal state or not.
if ( terminalStates . find ( currentStateId ) ! = terminalStates . end ( ) ) {
STORM_LOG_TRACE ( " Found already explored terminal state: " < < currentStateId < < " . " ) ;
foundTerminalState = true ;
std : : pair < ValueType , ValueType > SparseMdpLearningModelChecker < ValueType > : : computeBoundsOfState ( StateType const & currentStateId , ExplorationInformation const & explorationInformation , BoundValues const & bounds ) const {
StateType group = explorationInformation . getRowGroup ( currentStateId ) ;
std : : pair < ValueType , ValueType > result = std : : make_pair ( storm : : utility : : zero < ValueType > ( ) , storm : : utility : : zero < ValueType > ( ) ) ;
for ( ActionType action = explorationInformation . getStartRowOfGroup ( group ) ; action < explorationInformation . getStartRowOfGroup ( group + 1 ) ; + + action ) {
std : : pair < ValueType , ValueType > actionValues = computeBoundsOfAction ( action , explorationInformation , bounds ) ;
result . first = std : : max ( actionValues . first , result . first ) ;
result . second = std : : max ( actionValues . second , result . second ) ;
}
return result ;
}
// If the state was not a terminal state, we continue the path search and sample the next state.
if ( ! foundTerminalState ) {
std : : cout < < " (2) stack is:" < < std : : endl ;
for ( auto const & el : stateActionS tack ) {
template < typename ValueType >
void SparseMdpLearningModelChecker < ValueType > : : updateProbabilityBoundsAlongSampledPath ( StateActionStack & stack , ExplorationInformation const & explorationInformation , BoundValues & bounds ) const {
std : : cout < < " stack is: " < < std : : endl ;
for ( auto const & el : stack ) {
std : : cout < < el . first < < " -[ " < < el . second < < " ]-> " ;
}
std : : cout < < std : : endl ;
for ( StateType state = 0 ; state < stateToRowGroupMapping . size ( ) ; + + state ) {
if ( stateToRowGroupMapping [ state ] ! = unexploredMarker ) {
std : : cout < < " state " < < state < < " (grp " < < stateToRowGroupMapping [ state ] < < " ) has bounds [ " < < lowerBoundsPerState [ stateToRowGroupMapping [ state ] ] < < " , " < < upperBoundsPerState [ stateToRowGroupMapping [ state ] ] < < " ], actions: " ;
for ( auto choice = rowGroupIndices [ stateToRowGroupMapping [ state ] ] ; choice < rowGroupIndices [ stateToRowGroupMapping [ state ] + 1 ] ; + + choice ) {
std : : cout < < choice < < " = [ " < < lowerBoundsPerAction [ choice ] < < " , " < < upperBoundsPerAction [ choice ] < < " ], " ;
}
std : : cout < < std : : endl ;
} else {
std : : cout < < " state " < < state < < " is unexplored " < < std : : endl ;
stack . pop_back ( ) ;
while ( ! stack . empty ( ) ) {
updateProbabilityOfAction ( stack . back ( ) . first , stack . back ( ) . second , explorationInformation , bounds ) ;
stack . pop_back ( ) ;
}
}
// At this point, we can be sure that the state was expanded and that we can sample according to the
// probabilities in the matrix.
uint32_t chosenAction = sampleFromMaxActions ( currentStateId , matrix , rowGroupIndices , stateToRowGroupMapping , upperBoundsPerState , stateToLeavingChoicesOfEndComponent , unexploredMarker ) ;
stateActionStack . back ( ) . second = chosenAction ;
STORM_LOG_TRACE ( " Sampled action " < < chosenAction < < " in state " < < currentStateId < < " . " ) ;
StateType successor = sampleSuccessorFromAction ( chosenAction , matrix , rowGroupIndices , stateToRowGroupMapping ) ;
STORM_LOG_TRACE ( " Sampled successor " < < successor < < " according to action " < < chosenAction < < " of state " < < currentStateId < < " . " ) ;
// Put the successor state and a dummy action on top of the stack.
stateActionStack . emplace_back ( successor , 0 ) ;
stats . maxPathLength = std : : max ( stats . maxPathLength , stateActionStack . size ( ) ) ;
// If the current path length exceeds the threshold and the model is a nondeterministic one, we
// perform an EC detection.
if ( stateActionStack . size ( ) > stats . pathLengthUntilEndComponentDetection & & ! program . isDeterministicModel ( ) ) {
detectEndComponents ( stateActionStack , terminalStates , matrix , rowGroupIndices , stateToRowGroupMapping , lowerBoundsPerAction , upperBoundsPerAction , lowerBoundsPerState , upperBoundsPerState , stateToLeavingChoicesOfEndComponent , unexploredMarker ) ;
// Abort the current search.
STORM_LOG_TRACE ( " Aborting the search after EC detection. " ) ;
stateActionStack . clear ( ) ;
break ;
}
}
}
return foundTerminalState ;
}
template < typename ValueType >
std : : tuple < typename SparseMdpLearningModelChecker < ValueType > : : StateType , ValueType , ValueType > SparseMdpLearningModelChecker < ValueType > : : performLearningProcedure ( storm : : expressions : : Expression const & targetStateExpression , storm : : storage : : sparse : : StateStorage < StateType > & stateStorage , storm : : generator : : PrismNextStateGenerator < ValueType , StateType > & generator , std : : function < StateType ( storm : : generator : : CompressedState const & ) > const & stateToIdCallback , std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > & matrix , std : : vector < StateType > & rowGroupIndices , std : : vector < StateType > & stateToRowGroupMapping , std : : unordered_map < StateType , storm : : generator : : CompressedState > & unexploredStates , StateType const & unexploredMarker ) {
void SparseMdpLearningModelChecker < ValueType > : : updateProbabilityOfAction ( StateType const & state , ActionType const & action , ExplorationInformation const & explorationInformation , BoundValues & bounds ) const {
// Compute the new lower/upper values of the action.
std : : pair < ValueType , ValueType > newBoundsForAction = computeBoundsOfAction ( action , explorationInformation , bounds ) ;
// Generate the initial state so we know where to start the simulation.
stateStorage . initialStateIndices = generator . getInitialStates ( stateToIdCallback ) ;
STORM_LOG_THROW ( stateStorage . initialStateIndices . size ( ) = = 1 , storm : : exceptions : : NotSupportedException , " Currently only models with one initial state are supported by the learning engine. " ) ;
StateType initialStateIndex = stateStorage . initialStateIndices . front ( ) ;
// And set them as the current value.
bounds . setBoundForAction ( action , newBoundsForAction ) ;
// A set storing all states in which to terminate the search .
boost : : container : : flat_set < StateType > terminalStates ;
// Check if we need to update the values for the states.
bounds . setNewLowerBoundOfStateIfGreaterThanOld ( state , explorationInformation , newBoundsForAction . first ) ;
// Vectors to store the lower/upper bounds for each action (in each state).
std : : vector < ValueType > lowerBoundsPerAction ;
std : : vector < ValueType > upperBoundsPerAction ;
std : : vector < ValueType > lowerBoundsPerState ;
std : : vector < ValueType > upperBoundsPerState ;
StateType rowGroup = explorationInformation . getRowGroup ( state ) ;
if ( newBoundsForAction < bounds . getUpperBoundOfRowGroup ( rowGroup ) ) {
// Since we might run into end-components, we track a mapping from states in ECs to all leaving choices of
// that EC.
std : : unordered_map < StateType , ChoiceSetPointer > stateToLeavingChoicesOfEndComponent ;
}
// Now perform the actual sampling.
std : : vector < std : : pair < StateType , uint32_t > > stateActionStack ;
ValueType upperBound = computeUpperBoundOverAllOtherActions ( state , action , explorationInformation , bounds ) ;
Statistics stats ;
bool convergenceCriterionMet = false ;
while ( ! convergenceCriterionMet ) {
bool result = samplePathFromState ( generator , stateToIdCallback , initialStateIndex , stateActionStack , unexploredStates , unexploredMarker , terminalStates , matrix , rowGroupIndices , stateToRowGroupMapping , stateToLeavingChoicesOfEndComponent , targetStateExpression , lowerBoundsPerAction , upperBoundsPerAction , lowerBoundsPerState , upperBoundsPerState , stats ) ;
uint32_t sourceRowGroup = stateToRowGroupMapping [ sourceStateId ] ;
if ( newUpperValue < upperBoundsPerState [ sourceRowGroup ] ) {
if ( rowGroupIndices [ sourceRowGroup + 1 ] - rowGroupIndices [ sourceRowGroup ] > 1 ) {
ValueType max = storm : : utility : : zero < ValueType > ( ) ;
// If a terminal state was found, we update the probabilities along the path contained in the stack.
if ( result ) {
// Update the bounds along the path to the terminal state.
STORM_LOG_TRACE ( " Found terminal state, updating probabilities along path. " ) ;
updateProbabilitiesUsingStack ( stateActionStack , matrix , rowGroupIndices , stateToRowGroupMapping , lowerBoundsPerAction , upperBoundsPerAction , lowerBoundsPerState , upperBoundsPerState , unexploredMarker ) ;
} else {
// If not terminal state was found, the search aborted, possibly because of an EC-detection. In this
// case, we cannot update the probabilities.
STORM_LOG_TRACE ( " Did not find terminal state. " ) ;
for ( uint32_t currentAction = rowGroupIndices [ sourceRowGroup ] ; currentAction < rowGroupIndices [ sourceRowGroup + 1 ] ; + + currentAction ) {
std : : cout < < " cur: " < < currentAction < < std : : endl ;
if ( currentAction = = action ) {
continue ;
}
// Sanity check of results.
for ( StateType state = 0 ; state < stateToRowGroupMapping . size ( ) ; + + state ) {
if ( stateToRowGroupMapping [ state ] ! = unexploredMarker ) {
STORM_LOG_ASSERT ( lowerBoundsPerState [ stateToRowGroupMapping [ state ] ] < = upperBoundsPerState [ stateToRowGroupMapping [ state ] ] , " The bounds for state " < < state < < " are not in a sane relation: " < < lowerBoundsPerState [ stateToRowGroupMapping [ state ] ] < < " > " < < upperBoundsPerState [ stateToRowGroupMapping [ state ] ] < < " . " ) ;
}
ValueType currentValue = storm : : utility : : zero < ValueType > ( ) ;
for ( auto const & element : transitionMatrix [ currentAction ] ) {
currentValue + = element . getValue ( ) * ( stateToRowGroupMapping [ element . getColumn ( ) ] = = unexploredMarker ? storm : : utility : : one < ValueType > ( ) : upperBoundsPerState [ stateToRowGroupMapping [ element . getColumn ( ) ] ] ) ;
}
// TODO: remove debug output when superfluous
// for (StateType state = 0; state < stateToRowGroupMapping.size(); ++state) {
// if (stateToRowGroupMapping[state] != unexploredMarker) {
// std::cout << "state " << state << " (grp " << stateToRowGroupMapping[state] << ") has bounds [" << lowerBoundsPerState[stateToRowGroupMapping[state]] << ", " << upperBoundsPerState[stateToRowGroupMapping[state]] << "], actions: ";
// for (auto choice = rowGroupIndices[stateToRowGroupMapping[state]]; choice < rowGroupIndices[stateToRowGroupMapping[state] + 1]; ++choice) {
// std::cout << choice << " = [" << lowerBoundsPerAction[choice] << ", " << upperBoundsPerAction[choice] << "], ";
// }
// std::cout << std::endl;
// } else {
// std::cout << "state " << state << " is unexplored" << std::endl;
// }
// }
STORM_LOG_DEBUG ( " Discovered states: " < < stateStorage . numberOfStates < < " ( " < < unexploredStates . size ( ) < < " unexplored). " ) ;
STORM_LOG_DEBUG ( " Value of initial state is in [ " < < lowerBoundsPerState [ initialStateIndex ] < < " , " < < upperBoundsPerState [ initialStateIndex ] < < " ]. " ) ;
ValueType difference = upperBoundsPerState [ initialStateIndex ] - lowerBoundsPerState [ initialStateIndex ] ;
STORM_LOG_DEBUG ( " Difference after iteration " < < stats . iterations < < " is " < < difference < < " . " ) ;
convergenceCriterionMet = difference < 1e-6 ;
+ + stats . iterations ;
max = std : : max ( max , currentValue ) ;
std : : cout < < " max is " < < max < < std : : endl ;
}
if ( storm : : settings : : generalSettings ( ) . isShowStatisticsSet ( ) ) {
std : : cout < < std : : endl < < " Learning summary ------------------------- " < < std : : endl ;
std : : cout < < " Discovered states: " < < stateStorage . numberOfStates < < " ( " < < unexploredStates . size ( ) < < " unexplored, " < < stats . numberOfTargetStates < < " target states) " < < std : : endl ;
std : : cout < < " Sampling iterations: " < < stats . iterations < < std : : endl ;
std : : cout < < " Maximal path length: " < < stats . maxPathLength < < std : : endl ;
newUpperValue = std : : max ( newUpperValue , max ) ;
}
return std : : make_tuple ( initialStateIndex , lowerBoundsPerState [ initialStateIndex ] , upperBoundsPerState [ initialStateIndex ] ) ;
if ( newUpperValue < upperBoundsPerState [ sourceRowGroup ] ) {
STORM_LOG_TRACE ( " Got new upper bound for state " < < sourceStateId < < " : " < < newUpperValue < < " (was " < < upperBoundsPerState [ sourceRowGroup ] < < " ). " ) ;
std : : cout < < " writing at index " < < sourceRowGroup < < std : : endl ;
upperBoundsPerState [ sourceRowGroup ] = newUpperValue ;
}
template < typename ValueType >
std : : unique_ptr < CheckResult > SparseMdpLearningModelChecker < ValueType > : : computeReachabilityProbabilities ( CheckTask < storm : : logic : : EventuallyFormula > const & checkTask ) {
storm : : logic : : EventuallyFormula const & eventuallyFormula = checkTask . getFormula ( ) ;
storm : : logic : : Formula const & subformula = eventuallyFormula . getSubformula ( ) ;
STORM_LOG_THROW ( subformula . isAtomicExpressionFormula ( ) | | subformula . isAtomicLabelFormula ( ) , storm : : exceptions : : NotSupportedException , " Learning engine can only deal with formulas of the form 'F \" label \" ' or 'F expression'. " ) ;
// Derive the expression for the target states, so we know when to abort the learning run.
storm : : expressions : : Expression targetStateExpression = getTargetStateExpression ( subformula ) ;
// A container for the encountered states.
storm : : storage : : sparse : : StateStorage < StateType > stateStorage ( variableInformation . getTotalBitOffset ( true ) ) ;
// A generator used to explore the model.
storm : : generator : : PrismNextStateGenerator < ValueType , StateType > generator ( program , variableInformation , false ) ;
// A container that stores the transitions found so far.
std : : vector < std : : vector < storm : : storage : : MatrixEntry < StateType , ValueType > > > matrix ;
// A vector storing where the row group of each state starts.
std : : vector < StateType > rowGroupIndices ;
rowGroupIndices . push_back ( 0 ) ;
// A vector storing the mapping from state ids to row groups.
std : : vector < StateType > stateToRowGroupMapping ;
StateType unexploredMarker = std : : numeric_limits < StateType > : : max ( ) ;
// A mapping of unexplored IDs to their actual compressed states.
std : : unordered_map < StateType , storm : : generator : : CompressedState > unexploredStates ;
// Create a callback for the next-state generator to enable it to request the index of states.
std : : function < StateType ( storm : : generator : : CompressedState const & ) > stateToIdCallback = [ & stateStorage , & stateToRowGroupMapping , & unexploredStates , & unexploredMarker ] ( storm : : generator : : CompressedState const & state ) - > StateType {
StateType newIndex = stateStorage . numberOfStates ;
// Check, if the state was already registered.
std : : pair < uint32_t , std : : size_t > actualIndexBucketPair = stateStorage . stateToId . findOrAddAndGetBucket ( state , newIndex ) ;
if ( actualIndexBucketPair . first = = newIndex ) {
+ + stateStorage . numberOfStates ;
stateToRowGroupMapping . push_back ( unexploredMarker ) ;
unexploredStates [ newIndex ] = state ;
}
return actualIndexBucketPair . first ;
} ;
// Compute and return result.
std : : tuple < StateType , ValueType , ValueType > boundsForInitialState = performLearningProcedure ( targetStateExpression , stateStorage , generator , stateToIdCallback , matrix , rowGroupIndices , stateToRowGroupMapping , unexploredStates , unexploredMarker ) ;
return std : : make_unique < ExplicitQuantitativeCheckResult < ValueType > > ( std : : get < 0 > ( boundsForInitialState ) , std : : get < 1 > ( boundsForInitialState ) ) ;
}
template class SparseMdpLearningModelChecker < double > ;