Browse Source

Working version of over-approximation

tempestpy_adaptions
Alexander Bork 5 years ago
parent
commit
74cfecd011
  1. 1
      src/storm-pomdp-cli/storm-pomdp.cpp
  2. 73
      src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

1
src/storm-pomdp-cli/storm-pomdp.cpp

@ -114,6 +114,7 @@ int main(const int argc, const char** argv) {
// For ease of testing
storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::RationalNumber> checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker<storm::RationalNumber>();
checker.computeReachabilityProbability(*pomdp, std::set<uint32_t>({7}), false, 10);
std::shared_ptr<storm::logic::Formula const> formula;
if (!symbolicInput.properties.empty()) {

73
src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp

@ -68,27 +68,31 @@ namespace storm {
}
}
STORM_LOG_DEBUG("End of Section 1");
// Value Iteration
auto cc = storm::utility::ConstantsComparator<ValueType>();
while (!finished && iteration < maxIterations) {
STORM_LOG_DEBUG("Iteration " << std::to_string(iteration));
STORM_PRINT("Iteration " << std::to_string(iteration) << std::endl);
bool improvement = false;
for (size_t i = 0; i < beliefGrid.size(); ++i) {
bool isTarget = beliefIsKnown[i];
if (!isTarget) {
Belief<ValueType> currentBelief = beliefGrid[i];
STORM_LOG_DEBUG(
"Check Belief " << currentBelief.id << ": ||" << currentBelief.observation << "|"
<< currentBelief.probabilities << "||");
// we can take any state with the observation as they have the same number of choices
uint64_t numChoices = pomdp.getNumberOfChoices(
pomdp.getStatesWithObservation(currentBelief.observation).front());
STORM_LOG_DEBUG("Number choices: " << std::to_string(numChoices));
// Initialize the values for the value iteration
ValueType bestValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
ValueType chosenValue = min ? storm::utility::infinity<ValueType>()
: -storm::utility::infinity<ValueType>();
uint64_t chosenActionIndex = std::numeric_limits<uint64_t>::infinity();
ValueType currentValue;
for (uint64_t action = 0; action < numChoices; ++action) {
currentValue = storm::utility::zero<ValueType>(); // simply change this for rewards?
for (auto iter = observationProbabilities[i][action].begin();
iter != observationProbabilities[i][action].end(); ++iter) {
uint32_t observation = iter->first;
@ -96,7 +100,7 @@ namespace storm {
// compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief
std::pair<std::vector<std::vector<ValueType>>, std::vector<ValueType>> temp = computeSubSimplexAndLambdas(
currentBelief.probabilities, gridResolution);
nextBelief.probabilities, gridResolution);
std::vector<std::vector<ValueType>> subSimplex = temp.first;
std::vector<ValueType> lambdas = temp.second;
@ -107,21 +111,37 @@ namespace storm {
getBeliefIdInGrid(beliefGrid, observation, subSimplex[j]));
}
}
currentValue += iter->second * sum;
}
// Update the selected actions
auto cc = storm::utility::ConstantsComparator<ValueType>();
if ((min && cc.isLess(storm::utility::zero<ValueType>(), bestValue - currentValue)) ||
(!min && cc.isLess(storm::utility::zero<ValueType>(), currentValue - bestValue))) {
improvement = true;
bestValue = currentValue;
if ((min && cc.isLess(storm::utility::zero<ValueType>(), chosenValue - currentValue)) ||
(!min &&
cc.isLess(storm::utility::zero<ValueType>(), currentValue - chosenValue))) {
chosenValue = currentValue;
chosenActionIndex = action;
}
// TODO tie breaker?
}
result[currentBelief.id] = bestValue;
result[currentBelief.id] = chosenValue;
// Check if the iteration brought an improvement
if ((min && cc.isLess(storm::utility::zero<ValueType>(),
result_backup[currentBelief.id] - result[currentBelief.id])) ||
(!min && cc.isLess(storm::utility::zero<ValueType>(),
result[currentBelief.id] - result_backup[currentBelief.id]))) {
if (min) {
STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: "
<< result[currentBelief.id] << std::endl << "Delta: "
<< result_backup[currentBelief.id] - result[currentBelief.id]
<< std::endl);
} else {
STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: "
<< result[currentBelief.id] << std::endl << "Delta: "
<< result_backup[currentBelief.id] - result[currentBelief.id]
<< std::endl);
}
improvement = true;
}
}
}
finished = !improvement;
@ -150,7 +170,7 @@ namespace storm {
}
}
STORM_LOG_DEBUG("Over-Approximation Result: " << overApprox);
STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl);
}
template<typename ValueType, typename RewardModelType>
@ -171,8 +191,10 @@ namespace storm {
template<typename ValueType, typename RewardModelType>
Belief<ValueType> ApproximatePOMDPModelchecker<ValueType, RewardModelType>::getInitialBelief(
storm::models::sparse::Pomdp<ValueType, RewardModelType> const &pomdp, uint64_t id) {
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() > 1,
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2,
"POMDP contains more than one initial state");
STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1,
"POMDP does not contain an initial state");
std::vector<ValueType> distribution(pomdp.getNumberOfStates(), storm::utility::zero<ValueType>());
uint32_t observation = 0;
for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) {
@ -212,8 +234,8 @@ namespace storm {
} else {
// Otherwise we have to enumerate all possible distributions with regards to the grid
// helper is used to derive the distribution of the belief
std::vector<uint64_t> helper(statesWithObservation.size(), 0);
helper[0] = gridResolution;
std::vector<ValueType> helper(statesWithObservation.size(), ValueType(0));
helper[0] = storm::utility::convertNumber<ValueType>(gridResolution);
bool done = false;
uint64_t index = 0;
@ -221,11 +243,13 @@ namespace storm {
std::vector<ValueType> distribution(pomdp.getNumberOfStates(),
storm::utility::zero<ValueType>());
for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) {
distribution[statesWithObservation[i]] = ValueType(
double(helper[i] - helper[i + 1]) / gridResolution);
distribution[statesWithObservation[i]] = (helper[i] - helper[i + 1]) /
storm::utility::convertNumber<ValueType>(
gridResolution);
}
distribution[statesWithObservation.back()] = ValueType(
double(helper[statesWithObservation.size() - 1]) / gridResolution);
distribution[statesWithObservation.back()] =
helper[statesWithObservation.size() - 1] /
storm::utility::convertNumber<ValueType>(gridResolution);
Belief<ValueType> belief = {newId, observation, distribution};
STORM_LOG_TRACE(
@ -233,7 +257,8 @@ namespace storm {
<< ")," << distribution << "]");
grid.push_back(belief);
beliefIsKnown.push_back(isTarget);
if (helper[statesWithObservation.size() - 1] == gridResolution) {
if (helper[statesWithObservation.size() - 1] ==
storm::utility::convertNumber<ValueType>(gridResolution)) {
// If the last entry of helper is the gridResolution, we have enumerated all necessary distributions
done = true;
} else {
@ -281,10 +306,10 @@ namespace storm {
std::vector<std::vector<ValueType>> qs;
for (size_t i = 0; i < probabilities.size(); ++i) {
std::vector<ValueType> q;
std::vector<ValueType> q(probabilities.size(), storm::utility::zero<ValueType>());
if (i == 0) {
for (size_t j = 0; j < probabilities.size(); ++j) {
q[i] = v[i];
q[j] = v[j];
}
qs.push_back(q);
} else {

Loading…
Cancel
Save