|
@ -78,8 +78,7 @@ namespace storm { |
|
|
storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId); |
|
|
storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId); |
|
|
++nextId; |
|
|
++nextId; |
|
|
beliefList.push_back(initialBelief); |
|
|
beliefList.push_back(initialBelief); |
|
|
beliefIsTarget.push_back( |
|
|
|
|
|
targetObservations.find(initialBelief.observation) != targetObservations.end()); |
|
|
|
|
|
|
|
|
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); |
|
|
|
|
|
|
|
|
// These are the components to build the MDP from the grid
|
|
|
// These are the components to build the MDP from the grid
|
|
|
std::map<uint64_t, uint64_t> beliefStateMap; |
|
|
std::map<uint64_t, uint64_t> beliefStateMap; |
|
@ -344,8 +343,6 @@ namespace storm { |
|
|
|
|
|
|
|
|
auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp); |
|
|
auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp); |
|
|
auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model); |
|
|
auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model); |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::string> parameterNames; |
|
|
std::vector<std::string> parameterNames; |
|
|
storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); |
|
|
storm::api::exportSparseModelAsDrn(modelPtr, "test", parameterNames); |
|
|
|
|
|
|
|
@ -398,8 +395,16 @@ namespace storm { |
|
|
storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId); |
|
|
storm::pomdp::Belief<ValueType> initialBelief = getInitialBelief(pomdp, nextId); |
|
|
++nextId; |
|
|
++nextId; |
|
|
beliefList.push_back(initialBelief); |
|
|
beliefList.push_back(initialBelief); |
|
|
beliefIsTarget.push_back( |
|
|
|
|
|
targetObservations.find(initialBelief.observation) != targetObservations.end()); |
|
|
|
|
|
|
|
|
beliefIsTarget.push_back(targetObservations.find(initialBelief.observation) != targetObservations.end()); |
|
|
|
|
|
|
|
|
|
|
|
// These are the components to build the MDP from the grid
|
|
|
|
|
|
std::map<uint64_t, uint64_t> beliefStateMap; |
|
|
|
|
|
std::vector<std::vector<std::map<uint64_t, ValueType>>> mdpTransitions; |
|
|
|
|
|
std::vector<uint64_t> targetStates; |
|
|
|
|
|
uint64_t mdpStateId = 0; |
|
|
|
|
|
|
|
|
|
|
|
beliefStateMap[initialBelief.id] = mdpStateId; |
|
|
|
|
|
++mdpStateId; |
|
|
|
|
|
|
|
|
// for the initial belief, add the triangulated initial states
|
|
|
// for the initial belief, add the triangulated initial states
|
|
|
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> initTemp = computeSubSimplexAndLambdas( |
|
|
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> initTemp = computeSubSimplexAndLambdas( |
|
@ -410,6 +415,8 @@ namespace storm { |
|
|
lambdaCache[0] = initLambdas; |
|
|
lambdaCache[0] = initLambdas; |
|
|
bool initInserted = false; |
|
|
bool initInserted = false; |
|
|
|
|
|
|
|
|
|
|
|
std::vector<std::map<uint64_t, ValueType>> initTransitionsInBelief; |
|
|
|
|
|
std::map<uint64_t, ValueType> initTransitionInActionBelief; |
|
|
|
|
|
|
|
|
for (size_t j = 0; j < initLambdas.size(); ++j) { |
|
|
for (size_t j = 0; j < initLambdas.size(); ++j) { |
|
|
if (!cc.isEqual(initLambdas[j], storm::utility::zero<ValueType>())) { |
|
|
if (!cc.isEqual(initLambdas[j], storm::utility::zero<ValueType>())) { |
|
@ -429,9 +436,20 @@ namespace storm { |
|
|
targetObservations.find(initialBelief.observation) != targetObservations.end()); |
|
|
targetObservations.find(initialBelief.observation) != targetObservations.end()); |
|
|
beliefsToBeExpanded.push_back(nextId); |
|
|
beliefsToBeExpanded.push_back(nextId); |
|
|
++nextId; |
|
|
++nextId; |
|
|
|
|
|
|
|
|
|
|
|
beliefStateMap[nextId] = mdpStateId; |
|
|
|
|
|
initTransitionInActionBelief[mdpStateId] = initLambdas[j]; |
|
|
|
|
|
++nextId; |
|
|
|
|
|
++mdpStateId; |
|
|
|
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
// If the initial belief is not on the grid, we add the transitions from our initial MDP state to the triangulated beliefs
|
|
|
|
|
|
if (!initTransitionInActionBelief.empty()) { |
|
|
|
|
|
initTransitionsInBelief.push_back(initTransitionInActionBelief); |
|
|
|
|
|
mdpTransitions.push_back(initTransitionsInBelief); |
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -445,17 +463,26 @@ namespace storm { |
|
|
|
|
|
|
|
|
result.emplace(std::make_pair(currId, storm::utility::zero<ValueType>())); |
|
|
result.emplace(std::make_pair(currId, storm::utility::zero<ValueType>())); |
|
|
result_backup.emplace(std::make_pair(currId, storm::utility::zero<ValueType>())); |
|
|
result_backup.emplace(std::make_pair(currId, storm::utility::zero<ValueType>())); |
|
|
if (!isTarget) { |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if (isTarget) { |
|
|
|
|
|
// MDP stuff
|
|
|
|
|
|
std::vector<std::map<uint64_t, ValueType>> transitionsInBelief; |
|
|
|
|
|
targetStates.push_back(beliefStateMap[currId]); |
|
|
|
|
|
std::map<uint64_t, ValueType> transitionInActionBelief; |
|
|
|
|
|
transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one<ValueType>(); |
|
|
|
|
|
transitionsInBelief.push_back(transitionInActionBelief); |
|
|
|
|
|
mdpTransitions.push_back(transitionsInBelief); |
|
|
|
|
|
} else { |
|
|
uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); |
|
|
uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); |
|
|
uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); |
|
|
uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); |
|
|
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices); |
|
|
std::vector<std::map<uint32_t, ValueType>> observationProbabilitiesInAction(numChoices); |
|
|
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices); |
|
|
std::vector<std::map<uint32_t, uint64_t>> nextBelievesInAction(numChoices); |
|
|
std::vector<ValueType> actionRewardsInState(numChoices); |
|
|
std::vector<ValueType> actionRewardsInState(numChoices); |
|
|
|
|
|
std::vector<std::map<uint64_t, ValueType>> transitionsInBelief; |
|
|
|
|
|
|
|
|
for (uint64_t action = 0; action < numChoices; ++action) { |
|
|
for (uint64_t action = 0; action < numChoices; ++action) { |
|
|
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); |
|
|
std::map<uint32_t, ValueType> actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); |
|
|
std::map<uint32_t, uint64_t> actionObservationBelieves; |
|
|
std::map<uint32_t, uint64_t> actionObservationBelieves; |
|
|
|
|
|
std::map<uint64_t, ValueType> transitionInActionBelief; |
|
|
for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { |
|
|
for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { |
|
|
uint32_t observation = iter->first; |
|
|
uint32_t observation = iter->first; |
|
|
// THIS CALL IS SLOW
|
|
|
// THIS CALL IS SLOW
|
|
@ -490,7 +517,13 @@ namespace storm { |
|
|
beliefIsTarget.push_back( |
|
|
beliefIsTarget.push_back( |
|
|
targetObservations.find(observation) != targetObservations.end()); |
|
|
targetObservations.find(observation) != targetObservations.end()); |
|
|
beliefsToBeExpanded.push_back(nextId); |
|
|
beliefsToBeExpanded.push_back(nextId); |
|
|
|
|
|
|
|
|
|
|
|
beliefStateMap[nextId] = mdpStateId; |
|
|
|
|
|
transitionInActionBelief[mdpStateId] = iter->second * lambdas[j]; |
|
|
++nextId; |
|
|
++nextId; |
|
|
|
|
|
++mdpStateId; |
|
|
|
|
|
} else { |
|
|
|
|
|
transitionInActionBelief[beliefStateMap[getBeliefIdInVector(beliefGrid, observation, subSimplex[j])]] = iter->second * lambdas[j]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
@ -499,12 +532,20 @@ namespace storm { |
|
|
nextBelievesInAction[action] = actionObservationBelieves; |
|
|
nextBelievesInAction[action] = actionObservationBelieves; |
|
|
actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), |
|
|
actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), |
|
|
beliefList[currId]); |
|
|
beliefList[currId]); |
|
|
|
|
|
if (!transitionInActionBelief.empty()) { |
|
|
|
|
|
transitionsInBelief.push_back(transitionInActionBelief); |
|
|
} |
|
|
} |
|
|
observationProbabilities.emplace( |
|
|
|
|
|
std::make_pair(currId, observationProbabilitiesInAction)); |
|
|
|
|
|
|
|
|
} |
|
|
|
|
|
observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); |
|
|
nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); |
|
|
nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); |
|
|
beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); |
|
|
beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); |
|
|
|
|
|
|
|
|
|
|
|
if (transitionsInBelief.empty()) { |
|
|
|
|
|
std::map<uint64_t, ValueType> transitionInActionBelief; |
|
|
|
|
|
transitionInActionBelief[beliefStateMap[currId]] = storm::utility::one<ValueType>(); |
|
|
|
|
|
transitionsInBelief.push_back(transitionInActionBelief); |
|
|
|
|
|
} |
|
|
|
|
|
mdpTransitions.push_back(transitionsInBelief); |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
expansionTimer.stop(); |
|
|
expansionTimer.stop(); |
|
@ -512,6 +553,31 @@ namespace storm { |
|
|
STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) |
|
|
STORM_PRINT("#Believes in List: " << beliefList.size() << std::endl) |
|
|
STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) |
|
|
STORM_PRINT("Belief space expansion took " << expansionTimer << std::endl) |
|
|
|
|
|
|
|
|
|
|
|
storm::models::sparse::StateLabeling mdpLabeling(mdpTransitions.size()); |
|
|
|
|
|
mdpLabeling.addLabel("init"); |
|
|
|
|
|
mdpLabeling.addLabel("target"); |
|
|
|
|
|
mdpLabeling.addLabelToState("init", 0); |
|
|
|
|
|
for (auto targetState : targetStates) { |
|
|
|
|
|
mdpLabeling.addLabelToState("target", targetState); |
|
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
storm::storage::sparse::ModelComponents<ValueType, RewardModelType> modelComponents(buildTransitionMatrix(mdpTransitions), mdpLabeling); |
|
|
|
|
|
storm::models::sparse::Mdp<ValueType, RewardModelType> overApproxMdp(modelComponents); |
|
|
|
|
|
storm::models::sparse::StandardRewardModel<ValueType> mdpRewardModel(boost::none, std::vector<ValueType>(modelComponents.transitionMatrix.getRowCount())); |
|
|
|
|
|
for (auto const &iter : beliefStateMap) { |
|
|
|
|
|
auto currentBelief = beliefList[iter.first]; |
|
|
|
|
|
auto representativeState = pomdp.getStatesWithObservation(currentBelief.observation).front(); |
|
|
|
|
|
for (uint64_t action = 0; action < overApproxMdp.getNumberOfChoices(iter.second); ++action) { |
|
|
|
|
|
// Add the reward
|
|
|
|
|
|
mdpRewardModel.setStateActionReward(overApproxMdp.getChoiceIndex(storm::storage::StateActionPair(iter.second, action)), |
|
|
|
|
|
getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), |
|
|
|
|
|
currentBelief)); |
|
|
|
|
|
} |
|
|
|
|
|
} |
|
|
|
|
|
overApproxMdp.addRewardModel("std", mdpRewardModel); |
|
|
|
|
|
overApproxMdp.restrictRewardModels(std::set<std::string>({"std"})); |
|
|
|
|
|
overApproxMdp.printModelInformationToStream(std::cout); |
|
|
|
|
|
|
|
|
storm::utility::Stopwatch overApproxTimer(true); |
|
|
storm::utility::Stopwatch overApproxTimer(true); |
|
|
// Value Iteration
|
|
|
// Value Iteration
|
|
|
while (!finished && iteration < maxIterations) { |
|
|
while (!finished && iteration < maxIterations) { |
|
@ -600,13 +666,12 @@ namespace storm { |
|
|
auto overApprox = storm::utility::zero<ValueType>(); |
|
|
auto overApprox = storm::utility::zero<ValueType>(); |
|
|
for (size_t j = 0; j < initLambdas.size(); ++j) { |
|
|
for (size_t j = 0; j < initLambdas.size(); ++j) { |
|
|
if (initLambdas[j] != storm::utility::zero<ValueType>()) { |
|
|
if (initLambdas[j] != storm::utility::zero<ValueType>()) { |
|
|
overApprox += initLambdas[j] * |
|
|
|
|
|
result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, |
|
|
|
|
|
initSubSimplex[j])]; |
|
|
|
|
|
|
|
|
overApprox += initLambdas[j] * result_backup[getBeliefIdInVector(beliefGrid, initialBelief.observation, initSubSimplex[j])]; |
|
|
} |
|
|
} |
|
|
} |
|
|
} |
|
|
overApproxTimer.stop(); |
|
|
overApproxTimer.stop(); |
|
|
|
|
|
|
|
|
|
|
|
ValueType underApprox = storm::utility::zero<ValueType>(); |
|
|
|
|
|
/*
|
|
|
storm::utility::Stopwatch underApproxTimer(true); |
|
|
storm::utility::Stopwatch underApproxTimer(true); |
|
|
ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, |
|
|
ValueType underApprox = computeUnderapproximationWithMDP(pomdp, beliefList, beliefIsTarget, targetObservations, observationProbabilities, nextBelieves, |
|
|
result, chosenActions, gridResolution, initialBelief.id, min, true); |
|
|
result, chosenActions, gridResolution, initialBelief.id, min, true); |
|
@ -615,13 +680,26 @@ namespace storm { |
|
|
STORM_PRINT("Time Overapproximation: " << overApproxTimer |
|
|
STORM_PRINT("Time Overapproximation: " << overApproxTimer |
|
|
<< std::endl |
|
|
<< std::endl |
|
|
<< "Time Underapproximation: " << underApproxTimer |
|
|
<< "Time Underapproximation: " << underApproxTimer |
|
|
<< std::endl); |
|
|
|
|
|
|
|
|
<< std::endl);*/ |
|
|
|
|
|
|
|
|
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); |
|
|
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); |
|
|
STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl); |
|
|
|
|
|
|
|
|
//STORM_PRINT("Under-Approximation Result: " << underApprox << std::endl);
|
|
|
|
|
|
|
|
|
return std::make_unique<POMDPCheckResult<ValueType>>( |
|
|
|
|
|
POMDPCheckResult<ValueType>{overApprox, underApprox}); |
|
|
|
|
|
|
|
|
auto model = std::make_shared<storm::models::sparse::Mdp<ValueType, RewardModelType>>(overApproxMdp); |
|
|
|
|
|
auto modelPtr = std::static_pointer_cast<storm::models::sparse::Model<ValueType, RewardModelType>>(model); |
|
|
|
|
|
std::vector<std::string> parameterNames; |
|
|
|
|
|
storm::api::exportSparseModelAsDrn(modelPtr, "rewardTest", parameterNames); |
|
|
|
|
|
|
|
|
|
|
|
std::string propertyString = min ? "Rmin=? [F \"target\"]" : "Rmax=? [F \"target\"]"; |
|
|
|
|
|
std::vector<storm::jani::Property> propertyVector = storm::api::parseProperties(propertyString); |
|
|
|
|
|
std::shared_ptr<storm::logic::Formula const> property = storm::api::extractFormulasFromProperties(propertyVector).front(); |
|
|
|
|
|
|
|
|
|
|
|
std::unique_ptr<storm::modelchecker::CheckResult> res(storm::api::verifyWithSparseEngine<ValueType>(model, storm::api::createTask<ValueType>(property, true))); |
|
|
|
|
|
STORM_LOG_ASSERT(res, "Result not exist."); |
|
|
|
|
|
res->filter(storm::modelchecker::ExplicitQualitativeCheckResult(model->getInitialStates())); |
|
|
|
|
|
STORM_PRINT("OverApprox MDP: " << (res->asExplicitQuantitativeCheckResult<ValueType>().getValueMap().begin()->second) << std::endl); |
|
|
|
|
|
|
|
|
|
|
|
return std::make_unique<POMDPCheckResult<ValueType>>(POMDPCheckResult<ValueType>{overApprox, underApprox}); |
|
|
|
|
|
|
|
|
} |
|
|
} |
|
|
|
|
|
|
|
|