Browse Source

Adjusted Refinement Procedure for rewards

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
0507da4ffa
  1. 69
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp
  2. 2
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

69
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -54,7 +54,7 @@ namespace storm {
STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported");
} }
if (options.doRefinement) { if (options.doRefinement) {
return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
return refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false);
} else { } else {
return computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); return computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
} }
@ -62,8 +62,7 @@ namespace storm {
// FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing.
STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported");
if (options.doRefinement) { if (options.doRefinement) {
STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Rewards with refinement not implemented yet");
//return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize());
return refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true);
} else { } else {
// FIXME: pick the non-unique reward model here // FIXME: pick the non-unique reward model here
STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet.");
@ -77,7 +76,7 @@ namespace storm {
template<typename ValueType, typename RewardModelType> template<typename ValueType, typename RewardModelType>
std::unique_ptr<POMDPCheckResult<ValueType>> std::unique_ptr<POMDPCheckResult<ValueType>>
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::refineReachabilityProbability(std::set<uint32_t> const &targetObservations, bool min) {
ApproximatePOMDPModelchecker<ValueType, RewardModelType>::refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards) {
std::srand(time(NULL)); std::srand(time(NULL));
// Compute easy upper and lower bounds // Compute easy upper and lower bounds
storm::utility::Stopwatch underlyingWatch(true); storm::utility::Stopwatch underlyingWatch(true);
@ -94,11 +93,15 @@ namespace storm {
storm::models::sparse::Mdp<ValueType, RewardModelType> underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); storm::models::sparse::Mdp<ValueType, RewardModelType> underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels());
auto underlyingModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>( auto underlyingModel = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(
std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underlyingMdp)); std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(underlyingMdp));
std::string initPropString = min ? "Pmin" : "Pmax";
std::string initPropString = computeRewards ? "R" : "P";
initPropString += min ? "min" : "max";
initPropString += "=? [F \"__goal__\"]"; initPropString += "=? [F \"__goal__\"]";
std::vector<storm::jani::Property> propVector = storm::api::parseProperties(initPropString); std::vector<storm::jani::Property> propVector = storm::api::parseProperties(initPropString);
std::shared_ptr<storm::logic::Formula const> underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); std::shared_ptr<storm::logic::Formula const> underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front();
STORM_PRINT("Underlying MDP" << std::endl) STORM_PRINT("Underlying MDP" << std::endl)
if (computeRewards) {
underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel());
}
underlyingMdp.printModelInformationToStream(std::cout); underlyingMdp.printModelInformationToStream(std::cout);
std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes( std::unique_ptr<storm::modelchecker::CheckResult> underlyingRes(
storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false))); storm::api::verifyWithSparseEngine<ValueType>(underlyingModel, storm::api::createTask<ValueType>(underlyingProperty, false)));
@ -119,6 +122,9 @@ namespace storm {
} }
} }
auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false);
if (computeRewards) {
underApproxModel->restrictRewardModels({"std"});
}
STORM_PRINT("Random Positional Scheduler" << std::endl) STORM_PRINT("Random Positional Scheduler" << std::endl)
underApproxModel->printModelInformationToStream(std::cout); underApproxModel->printModelInformationToStream(std::cout);
std::unique_ptr<storm::modelchecker::CheckResult> underapproxRes( std::unique_ptr<storm::modelchecker::CheckResult> underapproxRes(
@ -140,10 +146,12 @@ namespace storm {
uint64_t underApproxModelSize = 200; uint64_t underApproxModelSize = 200;
uint64_t refinementCounter = 1; uint64_t refinementCounter = 1;
STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl)
std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, false, initialOverApproxMap,
std::shared_ptr<RefinementComponents<ValueType>> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
initialOverApproxMap,
initialUnderApproxMap, underApproxModelSize); initialUnderApproxMap, underApproxModelSize);
ValueType lastMinScore = storm::utility::infinity<ValueType>(); ValueType lastMinScore = storm::utility::infinity<ValueType>();
while (refinementCounter < 1000 && res->overApproxValue - res->underApproxValue > options.refinementPrecision) {
while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) ||
(min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) {
// TODO the actual refinement // TODO the actual refinement
// choose which observation(s) to refine // choose which observation(s) to refine
std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>()); std::vector<ValueType> obsAccumulator(pomdp.getNrObservations(), storm::utility::zero<ValueType>());
@ -215,10 +223,11 @@ namespace storm {
} }
STORM_PRINT( STORM_PRINT(
"==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl)
res = computeRefinementStep(targetObservations, min, observationResolutionVector, false,
res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards,
res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize);
//storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot");
STORM_LOG_ERROR_COND(cc.isLess(res->underApproxValue, res->overApproxValue) || cc.isEqual(res->underApproxValue, res->overApproxValue),
STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) ||
cc.isEqual(res->underApproxValue, res->overApproxValue),
"The value for the under-approximation is larger than the value for the over-approximation."); "The value for the under-approximation is larger than the value for the over-approximation.");
++refinementCounter; ++refinementCounter;
} }
@ -373,8 +382,8 @@ namespace storm {
uint64_t currId = beliefsToBeExpanded.front(); uint64_t currId = beliefsToBeExpanded.front();
beliefsToBeExpanded.pop_front(); beliefsToBeExpanded.pop_front();
bool isTarget = beliefIsTarget[currId]; bool isTarget = beliefIsTarget[currId];
if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) {
// TODO: with rewards whe would have to assign the corresponding reward to this transition // TODO: with rewards whe would have to assign the corresponding reward to this transition
mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one<ValueType>() - weightedSumOverMap[currId]}}}); mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one<ValueType>() - weightedSumOverMap[currId]}}});
continue; continue;
@ -458,19 +467,10 @@ namespace storm {
} }
} }
} }
if (computeRewards) {
actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
beliefList[currId]);
}
if (!transitionInActionBelief.empty()) { if (!transitionInActionBelief.empty()) {
transitionsInBelief.push_back(transitionInActionBelief); transitionsInBelief.push_back(transitionInActionBelief);
} }
} }
if (computeRewards) {
beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState));
}
if (transitionsInBelief.empty()) { if (transitionsInBelief.empty()) {
std::map<uint64_t, ValueType> transitionInActionBelief; std::map<uint64_t, ValueType> transitionInActionBelief;
transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one<ValueType>(); transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one<ValueType>();
@ -654,11 +654,6 @@ namespace storm {
} }
} }
} }
/* TODO
if (computeRewards) {
actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
refinementComponents->beliefList[currId]);
}*/
if (!transitionInActionBelief.empty()) { if (!transitionInActionBelief.empty()) {
transitionsStateActionPair[stateActionPair] = transitionInActionBelief; transitionsStateActionPair[stateActionPair] = transitionInActionBelief;
} }
@ -754,19 +749,10 @@ namespace storm {
} }
} }
} }
/*
if (computeRewards) {
actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
beliefList[currId]);
}*/
if (!transitionInActionBelief.empty()) { if (!transitionInActionBelief.empty()) {
transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief;
} }
} }
/*
if (computeRewards) {
beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState));
}*/
} }
} }
@ -817,6 +803,21 @@ namespace storm {
} }
storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(smb.build(), mdpLabeling); storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(smb.build(), mdpLabeling);
storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents); storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents);
if (computeRewards) {
storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount()));
for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) {
auto currentBelief = refinementComponents->beliefList[iter.first];
auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front();
for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) {
// Add the reward
mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)),
getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)),
currentBelief));
}
}
overApproxMdp.addRewardModel("std", mdpRewardModel);
overApproxMdp.restrictRewardModels(std::set<std::string>({"std"}));
}
overApproxMdp.printModelInformationToStream(std::cout); overApproxMdp.printModelInformationToStream(std::cout);
auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp); auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp);

2
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h

@ -75,7 +75,7 @@ namespace storm {
* @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values
*/ */
std::unique_ptr<POMDPCheckResult<ValueType>> std::unique_ptr<POMDPCheckResult<ValueType>>
refineReachabilityProbability(std::set<uint32_t> const &targetObservations, bool min);
refineReachability(std::set<uint32_t> const &targetObservations, bool min, bool computeRewards);
/** /**
* Compute the reachability probability of given target observations on a POMDP for the given resolution only. * Compute the reachability probability of given target observations on a POMDP for the given resolution only.
Loading…
Cancel
Save