diff --git a/src/storm-pomdp-cli/storm-pomdp.cpp b/src/storm-pomdp-cli/storm-pomdp.cpp index 730ce3ec8..880dc29ad 100644 --- a/src/storm-pomdp-cli/storm-pomdp.cpp +++ b/src/storm-pomdp-cli/storm-pomdp.cpp @@ -114,6 +114,7 @@ int main(const int argc, const char** argv) { // For ease of testing storm::pomdp::modelchecker::ApproximatePOMDPModelchecker checker = storm::pomdp::modelchecker::ApproximatePOMDPModelchecker(); + checker.computeReachabilityProbability(*pomdp, std::set({7}), false, 10); std::shared_ptr formula; if (!symbolicInput.properties.empty()) { diff --git a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp index 52694180f..0688e1cf2 100644 --- a/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp +++ b/src/storm-pomdp/modelchecker/ApproximatePOMDPModelchecker.cpp @@ -68,27 +68,31 @@ namespace storm { } } + STORM_LOG_DEBUG("End of Section 1"); // Value Iteration + auto cc = storm::utility::ConstantsComparator(); while (!finished && iteration < maxIterations) { - STORM_LOG_DEBUG("Iteration " << std::to_string(iteration)); + STORM_PRINT("Iteration " << std::to_string(iteration) << std::endl); bool improvement = false; for (size_t i = 0; i < beliefGrid.size(); ++i) { bool isTarget = beliefIsKnown[i]; if (!isTarget) { Belief currentBelief = beliefGrid[i]; + STORM_LOG_DEBUG( + "Check Belief " << currentBelief.id << ": ||" << currentBelief.observation << "|" + << currentBelief.probabilities << "||"); // we can take any state with the observation as they have the same number of choices uint64_t numChoices = pomdp.getNumberOfChoices( pomdp.getStatesWithObservation(currentBelief.observation).front()); - + STORM_LOG_DEBUG("Number choices: " << std::to_string(numChoices)); // Initialize the values for the value iteration - ValueType bestValue = min ? storm::utility::infinity() - : -storm::utility::infinity(); + ValueType chosenValue = min ? storm::utility::infinity() + : -storm::utility::infinity(); uint64_t chosenActionIndex = std::numeric_limits::infinity(); ValueType currentValue; for (uint64_t action = 0; action < numChoices; ++action) { currentValue = storm::utility::zero(); // simply change this for rewards? - for (auto iter = observationProbabilities[i][action].begin(); iter != observationProbabilities[i][action].end(); ++iter) { uint32_t observation = iter->first; @@ -96,7 +100,7 @@ namespace storm { // compute subsimplex and lambdas according to the Lovejoy paper to approximate the next belief std::pair>, std::vector> temp = computeSubSimplexAndLambdas( - currentBelief.probabilities, gridResolution); + nextBelief.probabilities, gridResolution); std::vector> subSimplex = temp.first; std::vector lambdas = temp.second; @@ -107,21 +111,37 @@ namespace storm { getBeliefIdInGrid(beliefGrid, observation, subSimplex[j])); } } - currentValue += iter->second * sum; } // Update the selected actions - auto cc = storm::utility::ConstantsComparator(); - if ((min && cc.isLess(storm::utility::zero(), bestValue - currentValue)) || - (!min && cc.isLess(storm::utility::zero(), currentValue - bestValue))) { - improvement = true; - bestValue = currentValue; + if ((min && cc.isLess(storm::utility::zero(), chosenValue - currentValue)) || + (!min && + cc.isLess(storm::utility::zero(), currentValue - chosenValue))) { + chosenValue = currentValue; chosenActionIndex = action; } // TODO tie breaker? } - result[currentBelief.id] = bestValue; + result[currentBelief.id] = chosenValue; + // Check if the iteration brought an improvement + if ((min && cc.isLess(storm::utility::zero(), + result_backup[currentBelief.id] - result[currentBelief.id])) || + (!min && cc.isLess(storm::utility::zero(), + result[currentBelief.id] - result_backup[currentBelief.id]))) { + if (min) { + STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: " + << result[currentBelief.id] << std::endl << "Delta: " + << result_backup[currentBelief.id] - result[currentBelief.id] + << std::endl); + } else { + STORM_PRINT("Old: " << result_backup[currentBelief.id] << ", New: " + << result[currentBelief.id] << std::endl << "Delta: " + << result_backup[currentBelief.id] - result[currentBelief.id] + << std::endl); + } + improvement = true; + } } } finished = !improvement; @@ -150,7 +170,7 @@ namespace storm { } } - STORM_LOG_DEBUG("Over-Approximation Result: " << overApprox); + STORM_PRINT("Over-Approximation Result: " << overApprox << std::endl); } template @@ -171,8 +191,10 @@ namespace storm { template Belief ApproximatePOMDPModelchecker::getInitialBelief( storm::models::sparse::Pomdp const &pomdp, uint64_t id) { - STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() > 1, + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() < 2, "POMDP contains more than one initial state"); + STORM_LOG_ASSERT(pomdp.getInitialStates().getNumberOfSetBits() == 1, + "POMDP does not contain an initial state"); std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); uint32_t observation = 0; for (uint64_t state = 0; state < pomdp.getNumberOfStates(); ++state) { @@ -212,8 +234,8 @@ namespace storm { } else { // Otherwise we have to enumerate all possible distributions with regards to the grid // helper is used to derive the distribution of the belief - std::vector helper(statesWithObservation.size(), 0); - helper[0] = gridResolution; + std::vector helper(statesWithObservation.size(), ValueType(0)); + helper[0] = storm::utility::convertNumber(gridResolution); bool done = false; uint64_t index = 0; @@ -221,11 +243,13 @@ namespace storm { std::vector distribution(pomdp.getNumberOfStates(), storm::utility::zero()); for (size_t i = 0; i < statesWithObservation.size() - 1; ++i) { - distribution[statesWithObservation[i]] = ValueType( - double(helper[i] - helper[i + 1]) / gridResolution); + distribution[statesWithObservation[i]] = (helper[i] - helper[i + 1]) / + storm::utility::convertNumber( + gridResolution); } - distribution[statesWithObservation.back()] = ValueType( - double(helper[statesWithObservation.size() - 1]) / gridResolution); + distribution[statesWithObservation.back()] = + helper[statesWithObservation.size() - 1] / + storm::utility::convertNumber(gridResolution); Belief belief = {newId, observation, distribution}; STORM_LOG_TRACE( @@ -233,7 +257,8 @@ namespace storm { << ")," << distribution << "]"); grid.push_back(belief); beliefIsKnown.push_back(isTarget); - if (helper[statesWithObservation.size() - 1] == gridResolution) { + if (helper[statesWithObservation.size() - 1] == + storm::utility::convertNumber(gridResolution)) { // If the last entry of helper is the gridResolution, we have enumerated all necessary distributions done = true; } else { @@ -281,10 +306,10 @@ namespace storm { std::vector> qs; for (size_t i = 0; i < probabilities.size(); ++i) { - std::vector q; + std::vector q(probabilities.size(), storm::utility::zero()); if (i == 0) { for (size_t j = 0; j < probabilities.size(); ++j) { - q[i] = v[i]; + q[j] = v[j]; } qs.push_back(q); } else {