diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index ab14f7349..bdf9fab1a 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -113,7 +113,7 @@ namespace storm { } else { //otherwise, we approximate a value TODO this is critical, we have to think about it auto overApproxValue = storm::utility::zero(); - auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation]); + auto temp = computeSubSimplexAndLambdas(currentBelief.probabilities, observationResolutionVector[currentBelief.observation], pomdp.getNumberOfStates()); auto subSimplex = temp.first; auto lambdas = temp.second; for (size_t j = 0; j < lambdas.size(); ++j) { @@ -190,16 +190,12 @@ namespace storm { std::vector beliefIsTarget; std::vector> beliefGrid; //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; + std::map>> subSimplexCache; std::map> lambdaCache; bsmap_type beliefStateMap; std::deque beliefsToBeExpanded; - // Belief ID -> Observation -> Probability - std::map>> observationProbabilities; - // current ID -> action -> next ID - std::map>> nextBelieves; // current ID -> action -> reward std::map> beliefActionRewards; uint64_t nextId = 0; @@ -227,10 +223,9 @@ namespace storm { std::map weightedSumUnderMap; // for the initial belief, add the triangulated initial states - std::pair>, std::vector> initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, - observationResolutionVector[initialBelief.observation]); - std::vector> initSubSimplex = initTemp.first; - std::vector initLambdas = initTemp.second; + auto initTemp = computeSubSimplexAndLambdas(initialBelief.probabilities, observationResolutionVector[initialBelief.observation], pomdp.getNumberOfStates()); + auto initSubSimplex = initTemp.first; + auto initLambdas = initTemp.second; if (cacheSubsimplices) { subSimplexCache[0] = initSubSimplex; lambdaCache[0] = initLambdas; @@ -297,7 +292,6 @@ namespace storm { initTransitionsInBelief.push_back(initTransitionInActionBelief); mdpTransitions.push_back(initTransitionsInBelief); } - //beliefsToBeExpanded.push_back(initialBelief.id); I'm curious what happens if we do this instead of first triangulating. Should do nothing special if belief is on grid, otherwise it gets interesting // Expand the beliefs to generate the grid on-the-fly @@ -305,7 +299,7 @@ namespace storm { STORM_PRINT("Exploration threshold: " << explorationThreshold << std::endl) } while (!beliefsToBeExpanded.empty()) { - // TODO add direct generation of transition matrix + // TODO direct generation of transition matrix? uint64_t currId = beliefsToBeExpanded.front(); beliefsToBeExpanded.pop_front(); bool isTarget = beliefIsTarget[currId]; @@ -323,14 +317,11 @@ namespace storm { } else { uint64_t representativeState = pomdp.getStatesWithObservation(beliefList[currId].observation).front(); uint64_t numChoices = pomdp.getNumberOfChoices(representativeState); - std::vector> observationProbabilitiesInAction(numChoices); - std::vector> nextBelievesInAction(numChoices); std::vector actionRewardsInState(numChoices); std::vector> transitionsInBelief; for (uint64_t action = 0; action < numChoices; ++action) { std::map actionObservationProbabilities = computeObservationProbabilitiesAfterAction(pomdp, beliefList[currId], action); - std::map actionObservationBelieves; std::map transitionInActionBelief; for (auto iter = actionObservationProbabilities.begin(); iter != actionObservationProbabilities.end(); ++iter) { uint32_t observation = iter->first; @@ -339,16 +330,15 @@ namespace storm { uint64_t idNextBelief = getBeliefAfterActionAndObservation(pomdp, beliefList, beliefIsTarget, targetObservations, beliefList[currId], action, observation, nextId); nextId = beliefList.size(); - actionObservationBelieves[observation] = idNextBelief; //Triangulate here and put the possibly resulting belief in the grid - std::vector> subSimplex; + std::vector> subSimplex; std::vector lambdas; if (cacheSubsimplices && subSimplexCache.count(idNextBelief) > 0) { subSimplex = subSimplexCache[idNextBelief]; lambdas = lambdaCache[idNextBelief]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - beliefList[idNextBelief].probabilities, observationResolutionVector[beliefList[idNextBelief].observation]); + auto temp = computeSubSimplexAndLambdas(beliefList[idNextBelief].probabilities, + observationResolutionVector[beliefList[idNextBelief].observation], pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; if(cacheSubsimplices){ @@ -397,8 +387,6 @@ namespace storm { } } } - observationProbabilitiesInAction[action] = actionObservationProbabilities; - nextBelievesInAction[action] = actionObservationBelieves; if (computeRewards) { actionRewardsInState[action] = getRewardAfterAction(pomdp, pomdp.getChoiceIndex(storm::storage::StateActionPair(representativeState, action)), beliefList[currId]); @@ -407,8 +395,6 @@ namespace storm { transitionsInBelief.push_back(transitionInActionBelief); } } - observationProbabilities.emplace(std::make_pair(currId, observationProbabilitiesInAction)); - nextBelieves.emplace(std::make_pair(currId, nextBelievesInAction)); if (computeRewards) { beliefActionRewards.emplace(std::make_pair(currId, actionRewardsInState)); } @@ -499,7 +485,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map> &beliefActionRewards, - std::map>> &subSimplexCache, + std::map>> &subSimplexCache, std::map> &lambdaCache, std::map &result, std::map> &chosenActions, @@ -531,14 +517,13 @@ namespace storm { storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief // cache the values to not always re-calculate - std::vector> subSimplex; + std::vector> subSimplex; std::vector lambdas; if (cacheSubsimplices && subSimplexCache.count(nextBelief.id) > 0) { subSimplex = subSimplexCache[nextBelief.id]; lambdas = lambdaCache[nextBelief.id]; } else { - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(nextBelief.probabilities, - gridResolution); + auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); subSimplex = temp.first; lambdas = temp.second; if(cacheSubsimplices) { @@ -588,13 +573,13 @@ namespace storm { STORM_PRINT("Overapproximation took " << iteration << " iterations" << std::endl); std::vector initialLambda; - std::vector> initialSubsimplex; + std::vector> initialSubsimplex; if(cacheSubsimplices){ initialLambda = lambdaCache[0]; initialSubsimplex = subSimplexCache[0]; } else { - auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution); - initialSubsimplex= temp.first; + auto temp = computeSubSimplexAndLambdas(beliefList[0].probabilities, gridResolution, pomdp.getNumberOfStates()); + initialSubsimplex = temp.first; initialLambda = temp.second; } @@ -659,10 +644,10 @@ namespace storm { // current ID -> action -> reward std::map> beliefActionRewards; //Use caching to avoid multiple computation of the subsimplices and lambdas - std::map>> subSimplexCache; + std::map>> subSimplexCache; std::map> lambdaCache; - std::pair>, std::vector> temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution); + auto temp = computeSubSimplexAndLambdas(initialBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); if(cacheSubsimplices) { subSimplexCache[0] = temp.first; lambdaCache[0] = temp.second; @@ -784,7 +769,7 @@ namespace storm { uint64_t numChoices = pomdp.getNumberOfChoices(pomdp.getStatesWithObservation(beliefList[currentBeliefId].observation).front()); // for targets, we only consider one action with one transition if (beliefIsTarget[currentBeliefId]) { - // add a self-loop to target states + // add a self-loop to target states targetStates.push_back(beliefStateMap.left.at(currentBeliefId)); transitions.push_back({{{beliefStateMap.left.at(currentBeliefId), storm::utility::one()}}}); } else if (counter > maxModelSize) { @@ -946,8 +931,8 @@ namespace storm { storm::pomdp::Belief nextBelief = beliefList[nextBelieves[currentBelief.id][action][observation]]; // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief - auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution); - std::vector> subSimplex = temp.first; + auto temp = computeSubSimplexAndLambdas(nextBelief.probabilities, gridResolution, pomdp.getNumberOfStates()); + std::vector> subSimplex = temp.first; std::vector lambdas = temp.second; auto sum = storm::utility::zero(); @@ -993,13 +978,17 @@ namespace storm { template uint64_t ApproximatePOMDPModelchecker::getBeliefIdInVector( std::vector> const &grid, uint32_t observation, - std::vector &probabilities) { + std::map &probabilities) { // TODO This one is quite slow for (auto const &belief : grid) { if (belief.observation == observation) { bool same = true; - for (size_t i = 0; i < belief.probabilities.size(); ++i) { - if (!cc.isEqual(belief.probabilities[i], probabilities[i])) { + for (auto const &probEntry : belief.probabilities) { + if (probabilities.find(probEntry.first) == probabilities.end()) { + same = false; + break; + } + if (!cc.isEqual(probEntry.second, probabilities[probEntry.first])) { same = false; break; } @@ -1009,7 +998,6 @@ namespace storm { } } } - return -1; } @@ -1020,12 +1008,13 @@ namespace storm { "POMDP contains more than one initial state"); STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, "POMDP does not contain an initial state"); - std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); + std::map distribution; uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { if (pomdp.getInitialStates()[state] == 1) { distribution[state] = storm::utility::one(); observation = pomdp.getObservation(state); + break; } } return storm::pomdp::Belief{id, observation, distribution}; @@ -1048,8 +1037,7 @@ namespace storm { // TODO this can probably be condensed if (statesWithObservation.size() == 1) { // If there is only one state with the observation, we can directly add the corresponding belief - std::vector distribution(pomdp.getNumberOfStates(), - storm::utility::zero()); + std::map distribution; distribution[statesWithObservation.front()] = storm::utility::one(); storm::pomdp::Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE( @@ -1068,17 +1056,19 @@ namespace storm { uint64_t index = 0; while (!done) { - std::vector distribution(pomdp.getNumberOfStates(), - storm::utility::zero()); + std::map distribution; for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) { - distribution[statesWithObservation[i]] = (helper[i] - helper[i + 1]) / - storm::utility::convertNumber( - gridResolution); + if (helper[i] - helper[i + 1] > ValueType(0)) { + distribution[statesWithObservation[i]] = (helper[i] - helper[i + 1]) / + storm::utility::convertNumber( + gridResolution); + } + } + if (helper[statesWithObservation.size() - 1] > ValueType(0)) { + distribution[statesWithObservation.back()] = + helper[statesWithObservation.size() - 1] / + storm::utility::convertNumber(gridResolution); } - distribution[statesWithObservation.back()] = - helper[statesWithObservation.size() - 1] / - storm::utility::convertNumber(gridResolution); - storm::pomdp::Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE("Add Belief " << std::to_string(newId) << " [(" << std::to_string(observation) << ")," << distribution << "]"); beliefList.push_back(belief); @@ -1111,20 +1101,24 @@ namespace storm { } template - std::pair>, std::vector> + std::pair>, std::vector> ApproximatePOMDPModelchecker::computeSubSimplexAndLambdas( - std::vector &probabilities, uint64_t resolution) { + std::map &probabilities, uint64_t resolution, uint64_t nrStates) { + + //TODO this can also be simplified using the sparse vector interpretation + // This is the Freudenthal Triangulation as described in Lovejoy (a whole lotta math) // Variable names are based on the paper - uint64_t probSize = probabilities.size(); - std::vector x(probSize); - std::vector v(probSize); - std::vector d(probSize); + std::vector x(nrStates); + std::vector v(nrStates); + std::vector d(nrStates); auto convResolution = storm::utility::convertNumber(resolution); - for (size_t i = 0; i < probSize; ++i) { - for (size_t j = i; j < probSize; ++j) { - x[i] += convResolution * probabilities[j]; + for (size_t i = 0; i < nrStates; ++i) { + for (auto const &probEntry : probabilities) { + if (probEntry.first >= i) { + x[i] += convResolution * probEntry.second; + } } v[i] = storm::utility::floor(x[i]); d[i] = x[i] - v[i]; @@ -1132,14 +1126,14 @@ namespace storm { auto p = storm::utility::vector::getSortedIndices(d); - std::vector> qs(probSize, std::vector(probSize)); - for (size_t i = 0; i < probSize; ++i) { + std::vector> qs(nrStates, std::vector(nrStates)); + for (size_t i = 0; i < nrStates; ++i) { if (i == 0) { - for (size_t j = 0; j < probSize; ++j) { + for (size_t j = 0; j < nrStates; ++j) { qs[i][j] = v[j]; } } else { - for (size_t j = 0; j < probSize; ++j) { + for (size_t j = 0; j < nrStates; ++j) { if (j == p[i - 1]) { qs[i][j] = qs[i - 1][j] + storm::utility::one(); } else { @@ -1148,18 +1142,22 @@ namespace storm { } } } - std::vector> subSimplex(probSize, std::vector(probSize)); - for (size_t j = 0; j < probSize; ++j) { - for (size_t i = 0; i < probSize - 1; ++i) { - subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; + std::vector> subSimplex(nrStates); + for (size_t j = 0; j < nrStates; ++j) { + for (size_t i = 0; i < nrStates - 1; ++i) { + if (cc.isLess(storm::utility::zero(), qs[j][i] - qs[j][i + 1])) { + subSimplex[j][i] = (qs[j][i] - qs[j][i + 1]) / convResolution; + } } - subSimplex[j][probSize - 1] = qs[j][probSize - 1] / convResolution; + if (cc.isLess(storm::utility::zero(), qs[j][nrStates - 1])) { + subSimplex[j][nrStates - 1] = qs[j][nrStates - 1] / convResolution; + } } - std::vector lambdas(probSize, storm::utility::zero()); + std::vector lambdas(nrStates, storm::utility::zero()); auto sum = storm::utility::zero(); - for (size_t i = 1; i < probSize; ++i) { + for (size_t i = 1; i < nrStates; ++i) { lambdas[i] = d[p[i - 1]] - d[p[i]]; sum += d[p[i - 1]] - d[p[i]]; } @@ -1177,17 +1175,16 @@ namespace storm { uint64_t actionIndex) { std::map res; // the id is not important here as we immediately discard the belief (very hacky, I don't like it either) - std::vector postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, 0).probabilities; - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - uint32_t observation = pomdp.getObservation(state); - if (postProbabilities[state] != storm::utility::zero()) { - if (res.count(observation) == 0) { - res[observation] = postProbabilities[state]; - } else { - res[observation] += postProbabilities[state]; - } + std::map postProbabilities = getBeliefAfterAction(pomdp, belief, actionIndex, 0).probabilities; + for (auto const &probEntry : postProbabilities) { + uint32_t observation = pomdp.getObservation(probEntry.first); + if (res.count(observation) == 0) { + res[observation] = probEntry.second; + } else { + res[observation] += probEntry.second; } } + return res; } @@ -1195,12 +1192,13 @@ namespace storm { storm::pomdp::Belief ApproximatePOMDPModelchecker::getBeliefAfterAction(storm::models::sparse::Pomdp const &pomdp, storm::pomdp::Belief &belief, uint64_t actionIndex, uint64_t id) { - std::vector distributionAfter(pomdp.getNumberOfStates(), storm::utility::zero()); + std::map distributionAfter; uint32_t observation = 0; - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - if (belief.probabilities[state] != storm::utility::zero()) { - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { + for (auto const &probEntry : belief.probabilities) { + uint64_t state = probEntry.first; + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + if (entry.getValue() > 0) { observation = pomdp.getObservation(entry.getColumn()); distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); } @@ -1215,14 +1213,13 @@ namespace storm { std::vector &beliefIsTarget, std::set const &targetObservations, storm::pomdp::Belief &belief, uint64_t actionIndex, uint32_t observation, uint64_t id) { storm::utility::Stopwatch distrWatch(true); - std::vector distributionAfter(pomdp.getNumberOfStates()); //, storm::utility::zero()); - for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { - if (belief.probabilities[state] != storm::utility::zero()) { - auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); - for (auto const &entry : row) { - if (pomdp.getObservation(entry.getColumn()) == observation) { - distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); - } + std::map distributionAfter; + for (auto const &probEntry : belief.probabilities) { + uint64_t state = probEntry.first; + auto row = pomdp.getTransitionMatrix().getRow(pomdp.getChoiceIndex(storm::storage::StateActionPair(state, actionIndex))); + for (auto const &entry : row) { + if (pomdp.getObservation(entry.getColumn()) == observation) { + distributionAfter[entry.getColumn()] += belief.probabilities[state] * entry.getValue(); } } } @@ -1230,12 +1227,12 @@ namespace storm { // We have to normalize the distribution storm::utility::Stopwatch normalizationWatch(true); auto sum = storm::utility::zero(); - for (ValueType const &entry : distributionAfter) { - sum += entry; + for (auto const &entry : distributionAfter) { + sum += entry.second; } - for (size_t i = 0; i < pomdp.getNumberOfStates(); ++i) { - distributionAfter[i] /= sum; + for (auto const &entry : distributionAfter) { + distributionAfter[entry.first] /= sum; } normalizationWatch.stop(); if (getBeliefIdInVector(beliefList, observation, distributionAfter) != uint64_t(-1)) { @@ -1259,7 +1256,8 @@ namespace storm { uint64_t action, storm::pomdp::Belief &belief) { auto result = storm::utility::zero(); for (size_t i = 0; i < belief.probabilities.size(); ++i) { - result += belief.probabilities[i] * pomdp.getUniqueRewardModel().getTotalStateActionReward(i, action, pomdp.getTransitionMatrix()); + for (auto const &probEntry : belief.probabilities) + result += probEntry.second * pomdp.getUniqueRewardModel().getTotalStateActionReward(probEntry.first, action, pomdp.getTransitionMatrix()); } return result; } diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h index 5e8a95025..5108d3135 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.h @@ -200,8 +200,8 @@ namespace storm { * @param gridResolution * @return */ - std::pair>, std::vector> - computeSubSimplexAndLambdas(std::vector &probabilities, uint64_t gridResolution); + std::pair>, std::vector> + computeSubSimplexAndLambdas(std::map &probabilities, uint64_t gridResolution, uint64_t nrStates); /** @@ -269,7 +269,7 @@ namespace storm { * @return */ uint64_t getBeliefIdInVector(std::vector> const &grid, uint32_t observation, - std::vector &probabilities); + std::map &probabilities); /** * @param transitions data structure that contains the transition information of the form: origin-state -> action -> (successor-state -> probability) @@ -285,7 +285,7 @@ namespace storm { std::map>> &observationProbabilities, std::map>> &nextBelieves, std::map> &beliefActionRewards, - std::map>> &subSimplexCache, + std::map>> &subSimplexCache, std::map> &lambdaCache, std::map &result, std::map> &chosenActions, uint64_t gridResolution, bool min, bool computeRewards); diff --git a/src/storm-pomdp/storage/Belief.h b/src/storm-pomdp/storage/Belief.h index bf4df4c9e..ed7591f1b 100644 --- a/src/storm-pomdp/storage/Belief.h +++ b/src/storm-pomdp/storage/Belief.h @@ -6,7 +6,7 @@ namespace storm { uint64_t id; uint32_t observation; //TODO make this sparse? - std::vector probabilities; + std::map probabilities; }; } } \ No newline at end of file