diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 594d213b8..9b1c24a70 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -54,7 +54,7 @@ namespace storm { STORM_LOG_THROW(reachableFromSinkStates.empty(), storm::exceptions::NotSupportedException, "There are sink states that can reach non-sink states. This is currently not supported"); } if (options.doRefinement) { - return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + return refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), false); } else { return computeReachabilityProbabilityOTF(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); } @@ -62,8 +62,7 @@ namespace storm { // FIXME: Instead of giving up, introduce a new observation for target states and make sink states absorbing. STORM_LOG_THROW(formulaInfo.getTargetStates().observationClosed, storm::exceptions::NotSupportedException, "There are non-target states with the same observation as a target state. This is currently not supported"); if (options.doRefinement) { - STORM_LOG_THROW(false, storm::exceptions::NotSupportedException, "Rewards with refinement not implemented yet"); - //return refineReachabilityProbability(formulaInfo.getTargetStates().observations, formulaInfo.minimize()); + return refineReachability(formulaInfo.getTargetStates().observations, formulaInfo.minimize(), true); } else { // FIXME: pick the non-unique reward model here STORM_LOG_THROW(pomdp.hasUniqueRewardModel(), storm::exceptions::NotSupportedException, "Non-unique reward models not implemented yet."); @@ -77,7 +76,7 @@ namespace storm { template std::unique_ptr> - ApproximatePOMDPModelchecker::refineReachabilityProbability(std::set const &targetObservations, bool min) { + ApproximatePOMDPModelchecker::refineReachability(std::set const &targetObservations, bool min, bool computeRewards) { std::srand(time(NULL)); // Compute easy upper and lower bounds storm::utility::Stopwatch underlyingWatch(true); @@ -94,11 +93,15 @@ namespace storm { storm::models::sparse::Mdp underlyingMdp(pomdp.getTransitionMatrix(), underlyingMdpLabeling, pomdp.getRewardModels()); auto underlyingModel = std::static_pointer_cast>( std::make_shared>(underlyingMdp)); - std::string initPropString = min ? "Pmin" : "Pmax"; + std::string initPropString = computeRewards ? "R" : "P"; + initPropString += min ? "min" : "max"; initPropString += "=? [F \"__goal__\"]"; std::vector propVector = storm::api::parseProperties(initPropString); std::shared_ptr underlyingProperty = storm::api::extractFormulasFromProperties(propVector).front(); STORM_PRINT("Underlying MDP" << std::endl) + if (computeRewards) { + underlyingMdp.addRewardModel("std", pomdp.getUniqueRewardModel()); + } underlyingMdp.printModelInformationToStream(std::cout); std::unique_ptr underlyingRes( storm::api::verifyWithSparseEngine(underlyingModel, storm::api::createTask(underlyingProperty, false))); @@ -119,6 +122,9 @@ namespace storm { } } auto underApproxModel = underlyingMdp.applyScheduler(pomdpScheduler, false); + if (computeRewards) { + underApproxModel->restrictRewardModels({"std"}); + } STORM_PRINT("Random Positional Scheduler" << std::endl) underApproxModel->printModelInformationToStream(std::cout); std::unique_ptr underapproxRes( @@ -140,10 +146,12 @@ namespace storm { uint64_t underApproxModelSize = 200; uint64_t refinementCounter = 1; STORM_PRINT("==============================" << std::endl << "Initial Computation" << std::endl << "------------------------------" << std::endl) - std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, false, initialOverApproxMap, + std::shared_ptr> res = computeFirstRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, + initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); ValueType lastMinScore = storm::utility::infinity(); - while (refinementCounter < 1000 && res->overApproxValue - res->underApproxValue > options.refinementPrecision) { + while (refinementCounter < 1000 && ((!min && res->overApproxValue - res->underApproxValue > options.refinementPrecision) || + (min && res->underApproxValue - res->overApproxValue > options.refinementPrecision))) { // TODO the actual refinement // choose which observation(s) to refine std::vector obsAccumulator(pomdp.getNrObservations(), storm::utility::zero()); @@ -215,10 +223,11 @@ namespace storm { } STORM_PRINT( "==============================" << std::endl << "Refinement Step " << refinementCounter << std::endl << "------------------------------" << std::endl) - res = computeRefinementStep(targetObservations, min, observationResolutionVector, false, + res = computeRefinementStep(targetObservations, min, observationResolutionVector, computeRewards, res, changedObservations, initialOverApproxMap, initialUnderApproxMap, underApproxModelSize); //storm::api::exportSparseModelAsDot(res->overApproxModelPtr, "oa_model_" + std::to_string(refinementCounter +1) + ".dot"); - STORM_LOG_ERROR_COND(cc.isLess(res->underApproxValue, res->overApproxValue) || cc.isEqual(res->underApproxValue, res->overApproxValue), + STORM_LOG_ERROR_COND((!min && cc.isLess(res->underApproxValue, res->overApproxValue)) || (min && cc.isLess(res->overApproxValue, res->underApproxValue)) || + cc.isEqual(res->underApproxValue, res->overApproxValue), "The value for the under-approximation is larger than the value for the over-approximation."); ++refinementCounter; } @@ -373,8 +382,8 @@ namespace storm { uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; - - if (boundMapsSet && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { + + if (boundMapsSet && !computeRewards && cc.isLess(weightedSumOverMap[currId] - weightedSumUnderMap[currId], options.explorationThreshold)) { // TODO: with rewards whe would have to assign the corresponding reward to this transition mdpTransitions.push_back({{{1, weightedSumOverMap[currId]}, {0, storm::utility::one() - weightedSumOverMap[currId]}}}); continue; @@ -458,19 +467,10 @@ namespace storm { } } } - if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - beliefList[currId]); - } if (!transitionInActionBelief.empty()) { transitionsInBelief.push_back(transitionInActionBelief); } } - if (computeRewards) { - beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); - } - - if (transitionsInBelief.empty()) { std::map transitionInActionBelief; transitionInActionBelief[beliefStateMap.left.at(currId)] = storm::utility::one(); @@ -654,11 +654,6 @@ namespace storm { } } } - /* TODO - if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - refinementComponents->beliefList[currId]); - }*/ if (!transitionInActionBelief.empty()) { transitionsStateActionPair[stateActionPair] = transitionInActionBelief; } @@ -754,19 +749,10 @@ namespace storm { } } } - /* - if (computeRewards) { - actionRewardsInState[action] = getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), - beliefList[currId]); - }*/ if (!transitionInActionBelief.empty()) { transitionsStateActionPair[std::make_pair(refinementComponents->overApproxBeliefStateMap.left.at(currId), action)] = transitionInActionBelief; } } - /* - if (computeRewards) { - beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); - }*/ } } @@ -817,6 +803,21 @@ namespace storm { } storm::storage::sparse::ModelComponents modelComponents(smb.build(), mdpLabeling); storm::models::sparse::Mdp overApproxMdp(modelComponents); + if (computeRewards) { + storm::models::sparse::StandardRewardModel mdpRewardModel(boost::none, std::vector(modelComponents.transitionMatrix.getRowCount())); + for (auto const &iter : refinementComponents->overApproxBeliefStateMap.left) { + auto currentBelief = refinementComponents->beliefList[iter.first]; + auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); + for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { + // Add the reward + mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), + getRewardAfterAction(pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), + currentBelief)); + } + } + overApproxMdp.addRewardModel("std", mdpRewardModel); + overApproxMdp.restrictRewardModels(std::set({"std"})); + } overApproxMdp.printModelInformationToStream(std::cout); auto model = std::make_shared>(overApproxMdp); diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5805449f4..26877d062 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -75,7 +75,7 @@ namespace storm { * @return A struct containing the final overapproximation (overApproxValue) and underapproximation (underApproxValue) values */ std::unique_ptr> - refineReachabilityProbability(std::set const &targetObservations, bool min); + refineReachability(std::set const &targetObservations, bool min, bool computeRewards); /** * Compute the reachability probability of given target observations on a POMDP for the given resolution only.