#include "storm/modelchecker/multiobjective/pcaa/StandardPcaaWeightVectorChecker.h" #include #include #include "storm/adapters/RationalFunctionAdapter.h" #include "storm/models/sparse/Mdp.h" #include "storm/models/sparse/MarkovAutomaton.h" #include "storm/models/sparse/StandardRewardModel.h" #include "storm/modelchecker/prctl/helper/SparseDtmcPrctlHelper.h" #include "storm/modelchecker/prctl/helper/DsMpiUpperRewardBoundsComputer.h" #include "storm/modelchecker/prctl/helper/BaierUpperRewardBoundsComputer.h" #include "storm/modelchecker/multiobjective/preprocessing/SparseMultiObjectiveRewardAnalysis.h" #include "storm/solver/MinMaxLinearEquationSolver.h" #include "storm/utility/graph.h" #include "storm/utility/macros.h" #include "storm/utility/vector.h" #include "storm/logic/Formulas.h" #include "storm/transformer/GoalStateMerger.h" #include "storm/exceptions/IllegalFunctionCallException.h" #include "storm/exceptions/UnexpectedException.h" #include "storm/exceptions/NotImplementedException.h" #include "storm/exceptions/NotSupportedException.h" #include "storm/exceptions/UncheckedRequirementException.h" namespace storm { namespace modelchecker { namespace multiobjective { template StandardPcaaWeightVectorChecker::StandardPcaaWeightVectorChecker(preprocessing::SparseMultiObjectivePreprocessorResult const& preprocessorResult) : PcaaWeightVectorChecker(preprocessorResult.objectives) { // Intantionally left empty } template void StandardPcaaWeightVectorChecker::initialize(preprocessing::SparseMultiObjectivePreprocessorResult const& preprocessorResult) { auto rewardAnalysis = preprocessing::SparseMultiObjectiveRewardAnalysis::analyze(preprocessorResult); STORM_LOG_THROW(rewardAnalysis.rewardFinitenessType != preprocessing::RewardFinitenessType::Infinite, storm::exceptions::NotSupportedException, "There is no Pareto optimal scheduler that yields finite reward for all objectives. This is not supported."); STORM_LOG_THROW(rewardAnalysis.totalRewardLessInfinityEStates, storm::exceptions::UnexpectedException, "The set of states with reward < infinity for some scheduler has not been computed during preprocessing."); STORM_LOG_THROW(preprocessorResult.containsOnlyTrivialObjectives(), storm::exceptions::NotSupportedException, "At least one objective was not reduced to an expected (long run, total or cumulative) reward objective during preprocessing. This is not supported by the considered weight vector checker."); STORM_LOG_THROW(preprocessorResult.preprocessedModel->getInitialStates().getNumberOfSetBits() == 1, storm::exceptions::NotSupportedException, "The model has multiple initial states."); // Build a subsystem of the preprocessor result model that discards states that yield infinite reward for all schedulers. // We can also merge the states that will have reward zero anyway. storm::storage::BitVector maybeStates = rewardAnalysis.totalRewardLessInfinityEStates.get() & ~rewardAnalysis.reward0AStates; storm::storage::BitVector finiteTotalRewardChoices = preprocessorResult.preprocessedModel->getTransitionMatrix().getRowFilter(rewardAnalysis.totalRewardLessInfinityEStates.get(), rewardAnalysis.totalRewardLessInfinityEStates.get()); std::set relevantRewardModels; for (auto const& obj : this->objectives) { obj.formula->gatherReferencedRewardModels(relevantRewardModels); } storm::transformer::GoalStateMerger merger(*preprocessorResult.preprocessedModel); auto mergerResult = merger.mergeTargetAndSinkStates(maybeStates, rewardAnalysis.reward0AStates, storm::storage::BitVector(maybeStates.size(), false), std::vector(relevantRewardModels.begin(), relevantRewardModels.end()), finiteTotalRewardChoices); // Initialize data specific for the considered model type initializeModelTypeSpecificData(*mergerResult.model); // Initilize general data of the model transitionMatrix = std::move(mergerResult.model->getTransitionMatrix()); initialState = *mergerResult.model->getInitialStates().begin(); totalReward0EStates = rewardAnalysis.totalReward0EStates % maybeStates; if (mergerResult.targetState) { // There is an additional state in the result totalReward0EStates.resize(totalReward0EStates.size() + 1, true); // The overapproximation for the possible ec choices consists of the states that can reach the target states with prob. 0 and the target state itself. storm::storage::BitVector targetStateAsVector(transitionMatrix.getRowGroupCount(), false); targetStateAsVector.set(*mergerResult.targetState, true); ecChoicesHint = transitionMatrix.getRowFilter(storm::utility::graph::performProb0E(transitionMatrix, transitionMatrix.getRowGroupIndices(), transitionMatrix.transpose(true), storm::storage::BitVector(targetStateAsVector.size(), true), targetStateAsVector)); ecChoicesHint.set(transitionMatrix.getRowGroupIndices()[*mergerResult.targetState], true); } else { ecChoicesHint = storm::storage::BitVector(transitionMatrix.getRowCount(), true); } // set data for unbounded objectives lraObjectives = storm::storage::BitVector(this->objectives.size(), false); objectivesWithNoUpperTimeBound = storm::storage::BitVector(this->objectives.size(), false); actionsWithoutRewardInUnboundedPhase = storm::storage::BitVector(transitionMatrix.getRowCount(), true); for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) { auto const& formula = *this->objectives[objIndex].formula; if (formula.getSubformula().isTotalRewardFormula()) { objectivesWithNoUpperTimeBound.set(objIndex, true); actionsWithoutRewardInUnboundedPhase &= storm::utility::vector::filterZero(actionRewards[objIndex]); } if (formula.getSubformula().isLongRunAverageRewardFormula()) { lraObjectives.set(objIndex, true); objectivesWithNoUpperTimeBound.set(objIndex, true); } } // Set data for LRA objectives (if available) if (!lraObjectives.empty()) { lraMecDecomposition = LraMecDecomposition(); lraMecDecomposition->mecs = storm::storage::MaximalEndComponentDecomposition(transitionMatrix, transitionMatrix.transpose(true), totalReward0EStates, actionsWithoutRewardInUnboundedPhase); lraMecDecomposition->auxMecValues.resize(lraMecDecomposition->mecs.size()); } // initialize data for the results checkHasBeenCalled = false; objectiveResults.resize(this->objectives.size()); offsetsToUnderApproximation.resize(this->objectives.size(), storm::utility::zero()); offsetsToOverApproximation.resize(this->objectives.size(), storm::utility::zero()); optimalChoices.resize(transitionMatrix.getRowGroupCount(), 0); } template void StandardPcaaWeightVectorChecker::check(Environment const& env, std::vector const& weightVector) { checkHasBeenCalled = true; STORM_LOG_INFO("Invoked WeightVectorChecker with weights " << std::endl << "\t" << storm::utility::vector::toString(storm::utility::vector::convertNumericVector(weightVector))); // Prepare and invoke weighted infinite horizon (long run average) phase std::vector weightedRewardVector(transitionMatrix.getRowCount(), storm::utility::zero()); if (!lraObjectives.empty()) { boost::optional> weightedStateRewardVector; for (auto objIndex : lraObjectives) { ValueType weight = storm::solver::minimize(this->objectives[objIndex].formula->getOptimalityType()) ? -weightVector[objIndex] : weightVector[objIndex]; storm::utility::vector::addScaledVector(weightedRewardVector, actionRewards[objIndex], weight); if (!stateRewards[objIndex].empty()) { weightedStateRewardVector->resize(transitionMatrix.getRowGroupCount(), storm::utility::zero()); storm::utility::vector::addScaledVector(weightedStateRewardVector.get(), stateRewards[objIndex], weight); } } infiniteHorizonWeightedPhase(env, weightedRewardVector, weightedStateRewardVector); // Clear all values of the weighted reward vector weightedRewardVector.assign(weightedRewardVector.size(), storm::utility::zero()); } // Prepare and invoke weighted indefinite horizon (unbounded total reward) phase auto totalRewardObjectives = objectivesWithNoUpperTimeBound & ~lraObjectives; for (auto objIndex : totalRewardObjectives) { if (storm::solver::minimize(this->objectives[objIndex].formula->getOptimalityType())) { storm::utility::vector::addScaledVector(weightedRewardVector, actionRewards[objIndex], -weightVector[objIndex]); } else { storm::utility::vector::addScaledVector(weightedRewardVector, actionRewards[objIndex], weightVector[objIndex]); } } unboundedWeightedPhase(env, weightedRewardVector, weightVector); unboundedIndividualPhase(env, weightVector); // Only invoke boundedPhase if necessarry, i.e., if there is at least one objective with a time bound for (auto const& obj : this->objectives) { if (!obj.formula->getSubformula().isTotalRewardFormula()) { boundedPhase(env, weightVector, weightedRewardVector); break; } } STORM_LOG_INFO("Weight vector check done. Lower bounds for results in initial state: " << storm::utility::vector::toString(storm::utility::vector::convertNumericVector(getUnderApproximationOfInitialStateResults()))); // Validate that the results are sufficiently precise ValueType resultingWeightedPrecision = storm::utility::abs(storm::utility::vector::dotProduct(getOverApproximationOfInitialStateResults(), weightVector) - storm::utility::vector::dotProduct(getUnderApproximationOfInitialStateResults(), weightVector)); resultingWeightedPrecision /= storm::utility::sqrt(storm::utility::vector::dotProduct(weightVector, weightVector)); STORM_LOG_THROW(resultingWeightedPrecision <= this->getWeightedPrecision(), storm::exceptions::UnexpectedException, "The desired precision was not reached"); } template std::vector::ValueType> StandardPcaaWeightVectorChecker::getUnderApproximationOfInitialStateResults() const { STORM_LOG_THROW(checkHasBeenCalled, storm::exceptions::IllegalFunctionCallException, "Tried to retrieve results but check(..) has not been called before."); std::vector res; res.reserve(this->objectives.size()); for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) { res.push_back(this->objectiveResults[objIndex][initialState] + this->offsetsToUnderApproximation[objIndex]); } return res; } template std::vector::ValueType> StandardPcaaWeightVectorChecker::getOverApproximationOfInitialStateResults() const { STORM_LOG_THROW(checkHasBeenCalled, storm::exceptions::IllegalFunctionCallException, "Tried to retrieve results but check(..) has not been called before."); std::vector res; res.reserve(this->objectives.size()); for (uint_fast64_t objIndex = 0; objIndex < this->objectives.size(); ++objIndex) { res.push_back(this->objectiveResults[objIndex][initialState] + this->offsetsToOverApproximation[objIndex]); } return res; } template storm::storage::Scheduler::ValueType> StandardPcaaWeightVectorChecker::computeScheduler() const { STORM_LOG_THROW(this->checkHasBeenCalled, storm::exceptions::IllegalFunctionCallException, "Tried to retrieve results but check(..) has not been called before."); for (auto const& obj : this->objectives) { STORM_LOG_THROW(obj.formula->getSubformula().isTotalRewardFormula() || obj.formula->getSubformula().isLongRunAverageRewardFormula(), storm::exceptions::NotImplementedException, "Scheduler retrival is only implemented for objectives without time-bound."); } storm::storage::Scheduler result(this->optimalChoices.size()); uint_fast64_t state = 0; for (auto const& choice : optimalChoices) { result.setChoice(choice, state); ++state; } return result; } template void computeSchedulerProb1(storm::storage::SparseMatrix const& transitionMatrix, storm::storage::SparseMatrix const& backwardTransitions, storm::storage::BitVector const& consideredStates, storm::storage::BitVector& statesToReach, std::vector& choices) { std::vector stack; storm::storage::BitVector& processedStates = statesToReach; stack.insert(stack.end(), processedStates.begin(), processedStates.end()); uint64_t currentState = 0; while (!stack.empty()) { currentState = stack.back(); stack.pop_back(); for (auto const& predecessorEntry : backwardTransitions.getRow(currentState)) { auto predecessor = predecessorEntry.getColumn(); if (consideredStates.get(predecessor) & !processedStates.get(predecessor)) { // Find a choice leading to an already processed state (such a choice has to exist since this is a predecessor of the currentState) auto const& groupStart = transitionMatrix.getRowGroupIndices()[predecessor]; auto const& groupEnd = transitionMatrix.getRowGroupIndices()[predecessor + 1]; uint64_t row = groupStart; for (; row < groupEnd; ++row) { bool hasSuccessorInProcessedStates = false; for (auto const& successorOfPredecessor : transitionMatrix.getRow(row)) { if (processedStates.get(successorOfPredecessor.getColumn())) { hasSuccessorInProcessedStates = true; break; } } if (hasSuccessorInProcessedStates) { choices[predecessor] = row - groupStart; processedStates.set(predecessor, true); stack.push_back(predecessor); break; } } STORM_LOG_ASSERT(row < groupEnd, "Unable to find choice at a predecessor of a processed state that leads to a processed state."); } } } STORM_LOG_ASSERT(consideredStates.isSubsetOf(processedStates), "Not all states have been processed."); } template std::vector computeValidInitialScheduler(storm::storage::SparseMatrix const& matrix, storm::storage::BitVector const& rowsWithSumLessOne) { std::vector result(matrix.getRowGroupCount()); auto const& groups = matrix.getRowGroupIndices(); auto backwardsTransitions = matrix.transpose(true); storm::storage::BitVector processedStates(result.size(), false); for (uint64_t state = 0; state < result.size(); ++state) { if (rowsWithSumLessOne.getNextSetIndex(groups[state]) < groups[state + 1]) { result[state] = rowsWithSumLessOne.getNextSetIndex(groups[state]) - groups[state]; processedStates.set(state, true); } } computeSchedulerProb1(matrix, backwardsTransitions, ~processedStates, processedStates, result); return result; } /*! * Computes a scheduler taking the choices from the given set only finitely often * @pre such a scheduler exists */ template std::vector computeSchedulerFinitelyOften(storm::storage::SparseMatrix const& transitionMatrix, storm::storage::SparseMatrix const& backwardTransitions, storm::storage::BitVector const& finitelyOftenChoices) { std::vector result(transitionMatrix.getRowGroupCount(), 0); auto badStates = transitionMatrix.getRowGroupFilter(finitelyOftenChoices, true); // badStates shall only be reached finitely often auto reachBadWithProbGreater0AStates = storm::utility::graph::performProbGreater0A(transitionMatrix, transitionMatrix.getRowGroupIndices(), backwardTransitions, ~badStates, badStates, false, 0, ~finitelyOftenChoices); // States in ~reachBadWithProbGreater0AStates can avoid bad states forever by only taking ~finitelyOftenChoices. We compute a scheduler for these states achieving exactly this auto avoidBadStates = ~reachBadWithProbGreater0AStates; for (auto state : avoidBadStates) { auto const& groupStart = transitionMatrix.getRowGroupIndices()[state]; auto const& groupEnd = transitionMatrix.getRowGroupIndices()[state + 1]; for (auto choice = finitelyOftenChoices.getNextUnsetIndex(groupStart); choice < groupEnd; choice = finitelyOftenChoices.getNextUnsetIndex(choice + 1)) { bool allSuccessorsAvoidBadStates = true; for (auto const& element : transitionMatrix.getRow(choice)) { if (!avoidBadStates.get(element.getColumn())) { allSuccessorsAvoidBadStates = false; break; } } if (allSuccessorsAvoidBadStates) { result[state] = choice - groupStart; break; } } } // Finally, we need to take care of states that will reach a bad state with prob greater 0 (including the bad states themselves). // due to the precondition, we know that it has to be possible to eventually avoid the bad states for ever. // Perform a backwards search from the avoid states and store choices with prob. 1 computeSchedulerProb1(transitionMatrix, backwardTransitions, reachBadWithProbGreater0AStates, avoidBadStates, result); return result; } template void StandardPcaaWeightVectorChecker::infiniteHorizonWeightedPhase(Environment const& env, std::vector const& weightedActionRewardVector, boost::optional> const& weightedStateRewardVector) { // Compute the optimal (weighted) lra value for each mec, keeping track of the optimal choices STORM_LOG_ASSERT(lraMecDecomposition, "Mec decomposition for lra computations not initialized."); storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper helper = createNondetInfiniteHorizonHelper(this->transitionMatrix); helper.provideLongRunComponentDecomposition(lraMecDecomposition->mecs); helper.setOptimizationDirection(storm::solver::OptimizationDirection::Maximize); helper.setProduceScheduler(true); for (uint64_t mecIndex = 0; mecIndex < lraMecDecomposition->mecs.size(); ++mecIndex) { auto const& mec = lraMecDecomposition->mecs[mecIndex]; auto actionValueGetter = [&weightedActionRewardVector] (uint64_t const& a) { return weightedActionRewardVector[a]; }; typename storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper::ValueGetter stateValueGetter; if (weightedStateRewardVector) { stateValueGetter = [&weightedStateRewardVector] (uint64_t const& s) { return weightedStateRewardVector.get()[s]; }; } else { stateValueGetter = [] (uint64_t const&) { return storm::utility::zero(); }; } lraMecDecomposition->auxMecValues[mecIndex] = helper.computeLraForComponent(env, stateValueGetter, actionValueGetter, mec); } // Extract the produced optimal choices for the MECs this->optimalChoices = std::move(helper.getProducedOptimalChoices()); } template void StandardPcaaWeightVectorChecker::unboundedWeightedPhase(Environment const& env, std::vector const& weightedRewardVector, std::vector const& weightVector) { if (this->objectivesWithNoUpperTimeBound.empty() || (this->lraObjectives.empty() && !storm::utility::vector::hasNonZeroEntry(weightedRewardVector))) { this->weightedResult = std::vector(transitionMatrix.getRowGroupCount(), storm::utility::zero()); // Get an arbitrary scheduler that yields finite reward for all objectives this->optimalChoices = computeSchedulerFinitelyOften(transitionMatrix, transitionMatrix.transpose(true), ~actionsWithoutRewardInUnboundedPhase); return; } updateEcQuotient(weightedRewardVector); // Set up the choice values storm::utility::vector::selectVectorValues(ecQuotient->auxChoiceValues, ecQuotient->ecqToOriginalChoiceMapping, weightedRewardVector); std::map ecqStateToOptimalMecMap; if (!lraObjectives.empty()) { // We also need to assign a value for each ecQuotientChoice that corresponds to "staying" in the eliminated EC. (at this point these choices should all have a value of zero). // Since each of the eliminated ECs has to contain *at least* one LRA EC, we need to find the largest value among the contained LRA ECs storm::storage::BitVector foundEcqChoices(ecQuotient->matrix.getRowCount(), false); // keeps track of choices we have already seen before for (uint64_t mecIndex = 0; mecIndex < lraMecDecomposition->mecs.size(); ++mecIndex) { auto const& mec = lraMecDecomposition->mecs[mecIndex]; auto const& mecValue = lraMecDecomposition->auxMecValues[mecIndex]; uint64_t ecqState = ecQuotient->originalToEcqStateMapping[mec.begin()->first]; uint64_t ecqChoice = ecQuotient->ecqStayInEcChoices.getNextSetIndex(ecQuotient->matrix.getRowGroupIndices()[ecqState]); STORM_LOG_ASSERT(ecqChoice < ecQuotient->matrix.getRowGroupIndices()[ecqState + 1], "Unable to find choice that represents staying inside the (eliminated) ec."); auto& ecqChoiceValue = ecQuotient->auxChoiceValues[ecqChoice]; auto insertionRes = ecqStateToOptimalMecMap.emplace(ecqState, mecIndex); if (insertionRes.second) { // We have seen this ecqState for the first time. STORM_LOG_ASSERT(storm::utility::isZero(ecqChoiceValue), "Expected a total reward of zero for choices that represent staying in an EC for ever."); ecqChoiceValue = mecValue; } else { if (mecValue > ecqChoiceValue) { // found a larger value ecqChoiceValue = mecValue; insertionRes.first->second = mecIndex; } } } } storm::solver::GeneralMinMaxLinearEquationSolverFactory solverFactory; std::unique_ptr> solver = solverFactory.create(env, ecQuotient->matrix); solver->setTrackScheduler(true); solver->setHasUniqueSolution(true); solver->setOptimizationDirection(storm::solver::OptimizationDirection::Maximize); auto req = solver->getRequirements(env, storm::solver::OptimizationDirection::Maximize); setBoundsToSolver(*solver, req.lowerBounds(), req.upperBounds(), weightVector, objectivesWithNoUpperTimeBound, ecQuotient->matrix, ecQuotient->rowsWithSumLessOne, ecQuotient->auxChoiceValues); if (solver->hasLowerBound()) { req.clearLowerBounds(); } if (solver->hasUpperBound()) { req.clearUpperBounds(); } if (req.validInitialScheduler()) { solver->setInitialScheduler(computeValidInitialScheduler(ecQuotient->matrix, ecQuotient->rowsWithSumLessOne)); req.clearValidInitialScheduler(); } STORM_LOG_THROW(!req.hasEnabledCriticalRequirement(), storm::exceptions::UncheckedRequirementException, "Solver requirements " + req.getEnabledRequirementsAsString() + " not checked."); solver->setRequirementsChecked(true); // Use the (0...0) vector as initial guess for the solution. std::fill(ecQuotient->auxStateValues.begin(), ecQuotient->auxStateValues.end(), storm::utility::zero()); solver->solveEquations(env, ecQuotient->auxStateValues, ecQuotient->auxChoiceValues); this->weightedResult = std::vector(transitionMatrix.getRowGroupCount()); transformEcqSolutionToOriginalModel(ecQuotient->auxStateValues, solver->getSchedulerChoices(), ecqStateToOptimalMecMap, this->weightedResult, this->optimalChoices); } template void StandardPcaaWeightVectorChecker::unboundedIndividualPhase(Environment const& env,std::vector const& weightVector) { if (objectivesWithNoUpperTimeBound.getNumberOfSetBits() == 1 && storm::utility::isOne(weightVector[*objectivesWithNoUpperTimeBound.begin()])) { uint_fast64_t objIndex = *objectivesWithNoUpperTimeBound.begin(); objectiveResults[objIndex] = weightedResult; if (storm::solver::minimize(this->objectives[objIndex].formula->getOptimalityType())) { storm::utility::vector::scaleVectorInPlace(objectiveResults[objIndex], -storm::utility::one()); } for (uint_fast64_t objIndex2 = 0; objIndex2 < this->objectives.size(); ++objIndex2) { if (objIndex != objIndex2) { objectiveResults[objIndex2] = std::vector(transitionMatrix.getRowGroupCount(), storm::utility::zero()); } } } else { storm::storage::SparseMatrix deterministicMatrix = transitionMatrix.selectRowsFromRowGroups(this->optimalChoices, true); // TODO: why diagonal entries? storm::storage::SparseMatrix deterministicBackwardTransitions = deterministicMatrix.transpose(); std::vector deterministicStateRewards(deterministicMatrix.getRowCount()); // allocate here storm::solver::GeneralLinearEquationSolverFactory linearEquationSolverFactory; auto infiniteHorizonHelper = createDetInfiniteHorizonHelper(deterministicMatrix); infiniteHorizonHelper.provideBackwardTransitions(deterministicBackwardTransitions); // We compute an estimate for the results of the individual objectives which is obtained from the weighted result and the result of the objectives computed so far. // Note that weightedResult = Sum_{i=1}^{n} w_i * objectiveResult_i. std::vector weightedSumOfUncheckedObjectives = weightedResult; ValueType sumOfWeightsOfUncheckedObjectives = storm::utility::vector::sum_if(weightVector, objectivesWithNoUpperTimeBound); for (uint_fast64_t const &objIndex : storm::utility::vector::getSortedIndices(weightVector)) { auto const& obj = this->objectives[objIndex]; if (objectivesWithNoUpperTimeBound.get(objIndex)) { offsetsToUnderApproximation[objIndex] = storm::utility::zero(); offsetsToOverApproximation[objIndex] = storm::utility::zero(); if (lraObjectives.get(objIndex)) { auto actionValueGetter = [&] (uint64_t const& a) { return actionRewards[objIndex][transitionMatrix.getRowGroupIndices()[a] + this->optimalChoices[a]]; }; typename storm::modelchecker::helper::SparseNondeterministicInfiniteHorizonHelper::ValueGetter stateValueGetter; if (stateRewards[objIndex].empty()) { stateValueGetter = [] (uint64_t const&) { return storm::utility::zero(); }; } else { stateValueGetter = [&] (uint64_t const& s) { return stateRewards[objIndex][s]; }; } objectiveResults[objIndex] = infiniteHorizonHelper.computeLongRunAverageValues(env, stateValueGetter, actionValueGetter); } else { // i.e. a total reward objective storm::utility::vector::selectVectorValues(deterministicStateRewards, this->optimalChoices, transitionMatrix.getRowGroupIndices(), actionRewards[objIndex]); storm::storage::BitVector statesWithRewards = ~storm::utility::vector::filterZero(deterministicStateRewards); // As maybestates we pick the states from which a state with reward is reachable storm::storage::BitVector maybeStates = storm::utility::graph::performProbGreater0(deterministicBackwardTransitions, storm::storage::BitVector(deterministicMatrix.getRowCount(), true), statesWithRewards); // Compute the estimate for this objective if (!storm::utility::isZero(weightVector[objIndex])) { objectiveResults[objIndex] = weightedSumOfUncheckedObjectives; ValueType scalingFactor = storm::utility::one() / sumOfWeightsOfUncheckedObjectives; if (storm::solver::minimize(obj.formula->getOptimalityType())) { scalingFactor *= -storm::utility::one(); } storm::utility::vector::scaleVectorInPlace(objectiveResults[objIndex], scalingFactor); storm::utility::vector::clip(objectiveResults[objIndex], obj.lowerResultBound, obj.upperResultBound); } // Make sure that the objectiveResult is initialized correctly objectiveResults[objIndex].resize(transitionMatrix.getRowGroupCount(), storm::utility::zero()); if (!maybeStates.empty()) { bool needEquationSystem = linearEquationSolverFactory.getEquationProblemFormat(env) == storm::solver::LinearEquationSolverProblemFormat::EquationSystem; storm::storage::SparseMatrix submatrix = deterministicMatrix.getSubmatrix( true, maybeStates, maybeStates, needEquationSystem); if (needEquationSystem) { // Converting the matrix from the fixpoint notation to the form needed for the equation // system. That is, we go from x = A*x + b to (I-A)x = b. submatrix.convertToEquationSystem(); } // Prepare solution vector and rhs of the equation system. std::vector x = storm::utility::vector::filterVector(objectiveResults[objIndex], maybeStates); std::vector b = storm::utility::vector::filterVector(deterministicStateRewards, maybeStates); // Now solve the resulting equation system. std::unique_ptr> solver = linearEquationSolverFactory.create(env, submatrix); auto req = solver->getRequirements(env); solver->clearBounds(); storm::storage::BitVector submatrixRowsWithSumLessOne = deterministicMatrix.getRowFilter(maybeStates, maybeStates) % maybeStates; submatrixRowsWithSumLessOne.complement(); this->setBoundsToSolver(*solver, req.lowerBounds(), req.upperBounds(), objIndex, submatrix, submatrixRowsWithSumLessOne, b); if (solver->hasLowerBound()) { req.clearLowerBounds(); } if (solver->hasUpperBound()) { req.clearUpperBounds(); } STORM_LOG_THROW(!req.hasEnabledCriticalRequirement(), storm::exceptions::UncheckedRequirementException, "Solver requirements " + req.getEnabledRequirementsAsString() + " not checked."); solver->solveEquations(env, x, b); // Set the result for this objective accordingly storm::utility::vector::setVectorValues(objectiveResults[objIndex], maybeStates, x); } storm::utility::vector::setVectorValues(objectiveResults[objIndex], ~maybeStates, storm::utility::zero()); } // Update the estimate for the next objectives. if (!storm::utility::isZero(weightVector[objIndex])) { storm::utility::vector::addScaledVector(weightedSumOfUncheckedObjectives, objectiveResults[objIndex], -weightVector[objIndex]); sumOfWeightsOfUncheckedObjectives -= weightVector[objIndex]; } } else { objectiveResults[objIndex] = std::vector(transitionMatrix.getRowGroupCount(), storm::utility::zero()); } } } } template void StandardPcaaWeightVectorChecker::updateEcQuotient(std::vector const& weightedRewardVector) { // Check whether we need to update the currently cached ecElimResult storm::storage::BitVector newTotalReward0Choices = storm::utility::vector::filterZero(weightedRewardVector); storm::storage::BitVector zeroLraRewardChoices(weightedRewardVector.size(), true); if (lraMecDecomposition) { for (uint64_t mecIndex = 0; mecIndex < lraMecDecomposition->mecs.size(); ++mecIndex) { if (!storm::utility::isZero(lraMecDecomposition->auxMecValues[mecIndex])) { // The mec has a non-zero value, so flag all its choices as non-zero auto const& mec = lraMecDecomposition->mecs[mecIndex]; for (auto const& stateChoices : mec) { for (auto const& choice : stateChoices.second) { zeroLraRewardChoices.set(choice, false); } } } } } storm::storage::BitVector newReward0Choices = newTotalReward0Choices & zeroLraRewardChoices; if (!ecQuotient || ecQuotient->origReward0Choices != newReward0Choices) { // It is sufficient to consider the states from which a transition with non-zero reward is reachable. (The remaining states always have reward zero). auto nonZeroRewardStates = transitionMatrix.getRowGroupFilter(newReward0Choices, true); nonZeroRewardStates.complement(); storm::storage::BitVector subsystemStates = storm::utility::graph::performProbGreater0E(transitionMatrix.transpose(true), storm::storage::BitVector(transitionMatrix.getRowGroupCount(), true), nonZeroRewardStates); // Remove neutral end components, i.e., ECs in which no total reward is earned. // Note that such ECs contain one (or maybe more) LRA ECs. auto ecElimResult = storm::transformer::EndComponentEliminator::transform(transitionMatrix, subsystemStates, ecChoicesHint & newTotalReward0Choices, totalReward0EStates); storm::storage::BitVector rowsWithSumLessOne(ecElimResult.matrix.getRowCount(), false); for (uint64_t row = 0; row < rowsWithSumLessOne.size(); ++row) { if (ecElimResult.matrix.getRow(row).getNumberOfEntries() == 0) { rowsWithSumLessOne.set(row, true); } else { for (auto const& entry : transitionMatrix.getRow(ecElimResult.newToOldRowMapping[row])) { if (!subsystemStates.get(entry.getColumn())) { rowsWithSumLessOne.set(row, true); break; } } } } ecQuotient = EcQuotient(); ecQuotient->matrix = std::move(ecElimResult.matrix); ecQuotient->ecqToOriginalChoiceMapping = std::move(ecElimResult.newToOldRowMapping); ecQuotient->originalToEcqStateMapping = std::move(ecElimResult.oldToNewStateMapping); ecQuotient->ecqToOriginalStateMapping.resize(ecQuotient->matrix.getRowGroupCount()); for (uint64_t state = 0; state < ecQuotient->originalToEcqStateMapping.size(); ++state) { uint64_t ecqState = ecQuotient->originalToEcqStateMapping[state]; if (ecqState < ecQuotient->matrix.getRowGroupCount()) { ecQuotient->ecqToOriginalStateMapping[ecqState].insert(state); } } ecQuotient->ecqStayInEcChoices = std::move(ecElimResult.sinkRows); ecQuotient->origReward0Choices = std::move(newReward0Choices); ecQuotient->rowsWithSumLessOne = std::move(rowsWithSumLessOne); ecQuotient->auxStateValues.resize(ecQuotient->matrix.getRowGroupCount()); ecQuotient->auxChoiceValues.resize(ecQuotient->matrix.getRowCount()); } } template void StandardPcaaWeightVectorChecker::setBoundsToSolver(storm::solver::AbstractEquationSolver& solver, bool requiresLower, bool requiresUpper, uint64_t objIndex, storm::storage::SparseMatrix const& transitions, storm::storage::BitVector const& rowsWithSumLessOne, std::vector const& rewards) const { // Check whether bounds are already available if (this->objectives[objIndex].lowerResultBound) { solver.setLowerBound(this->objectives[objIndex].lowerResultBound.get()); } if (this->objectives[objIndex].upperResultBound) { solver.setUpperBound(this->objectives[objIndex].upperResultBound.get()); } if ((requiresLower && !solver.hasLowerBound()) || (requiresUpper && !solver.hasUpperBound())) { computeAndSetBoundsToSolver(solver, requiresLower, requiresUpper, transitions, rowsWithSumLessOne, rewards); } } template void StandardPcaaWeightVectorChecker::setBoundsToSolver(storm::solver::AbstractEquationSolver& solver, bool requiresLower, bool requiresUpper, std::vector const& weightVector, storm::storage::BitVector const& objectiveFilter, storm::storage::SparseMatrix const& transitions, storm::storage::BitVector const& rowsWithSumLessOne, std::vector const& rewards) const { // Check whether bounds are already available boost::optional lowerBound = this->computeWeightedResultBound(true, weightVector, objectiveFilter & ~lraObjectives); if (lowerBound) { if (!lraObjectives.empty()) { auto min = std::min_element(lraMecDecomposition->auxMecValues.begin(), lraMecDecomposition->auxMecValues.end()); if (min != lraMecDecomposition->auxMecValues.end()) { lowerBound.get() += *min; } } solver.setLowerBound(lowerBound.get()); } boost::optional upperBound = this->computeWeightedResultBound(false, weightVector, objectiveFilter); if (upperBound) { if (!lraObjectives.empty()) { auto max = std::max_element(lraMecDecomposition->auxMecValues.begin(), lraMecDecomposition->auxMecValues.end()); if (max != lraMecDecomposition->auxMecValues.end()) { upperBound.get() += *max; } } solver.setUpperBound(upperBound.get()); } if ((requiresLower && !solver.hasLowerBound()) || (requiresUpper && !solver.hasUpperBound())) { computeAndSetBoundsToSolver(solver, requiresLower, requiresUpper, transitions, rowsWithSumLessOne, rewards); } } template void StandardPcaaWeightVectorChecker::computeAndSetBoundsToSolver(storm::solver::AbstractEquationSolver& solver, bool requiresLower, bool requiresUpper, storm::storage::SparseMatrix const& transitions, storm::storage::BitVector const& rowsWithSumLessOne, std::vector const& rewards) const { // Compute the one step target probs std::vector oneStepTargetProbs(transitions.getRowCount(), storm::utility::zero()); for (auto row : rowsWithSumLessOne) { oneStepTargetProbs[row] = storm::utility::one() - transitions.getRowSum(row); } if (requiresLower && !solver.hasLowerBound()) { // Compute lower bounds std::vector negativeRewards; negativeRewards.reserve(transitions.getRowCount()); uint64_t row = 0; for (auto const& rew : rewards) { if (rew < storm::utility::zero()) { negativeRewards.resize(row, storm::utility::zero()); negativeRewards.push_back(-rew); } ++row; } if (!negativeRewards.empty()) { negativeRewards.resize(row, storm::utility::zero()); std::vector lowerBounds = storm::modelchecker::helper::DsMpiMdpUpperRewardBoundsComputer(transitions, negativeRewards, oneStepTargetProbs).computeUpperBounds(); storm::utility::vector::scaleVectorInPlace(lowerBounds, -storm::utility::one()); solver.setLowerBounds(std::move(lowerBounds)); } else { solver.setLowerBound(storm::utility::zero()); } } // Compute upper bounds if (requiresUpper && !solver.hasUpperBound()) { std::vector positiveRewards; positiveRewards.reserve(transitions.getRowCount()); uint64_t row = 0; for (auto const& rew : rewards) { if (rew > storm::utility::zero()) { positiveRewards.resize(row, storm::utility::zero()); positiveRewards.push_back(rew); } ++row; } if (!positiveRewards.empty()) { positiveRewards.resize(row, storm::utility::zero()); solver.setUpperBound(storm::modelchecker::helper::BaierUpperRewardBoundsComputer(transitions, positiveRewards, oneStepTargetProbs).computeUpperBound()); } else { solver.setUpperBound(storm::utility::zero()); } } } template void StandardPcaaWeightVectorChecker::transformEcqSolutionToOriginalModel(std::vector const& ecqSolution, std::vector const& ecqOptimalChoices, std::map const& ecqStateToOptimalMecMap, std::vector& originalSolution, std::vector& originalOptimalChoices) const { auto backwardsTransitions = transitionMatrix.transpose(true); // Keep track of states for which no choice has been set yet. storm::storage::BitVector unprocessedStates(transitionMatrix.getRowGroupCount(), true); // For each eliminated ec, keep track of the states (within the ec) that we want to reach and the states for which a choice needs to be set // (Declared already at this point to avoid expensive allocations in each loop iteration) storm::storage::BitVector ecStatesToReach(transitionMatrix.getRowGroupCount(), false); storm::storage::BitVector ecStatesToProcess(transitionMatrix.getRowGroupCount(), false); // Run through each state of the ec quotient as well as the associated state(s) of the original model for (uint64_t ecqState = 0; ecqState < ecqSolution.size(); ++ecqState) { uint64_t ecqChoice = ecQuotient->matrix.getRowGroupIndices()[ecqState] + ecqOptimalChoices[ecqState]; uint_fast64_t origChoice = ecQuotient->ecqToOriginalChoiceMapping[ecqChoice]; auto const& origStates = ecQuotient->ecqToOriginalStateMapping[ecqState]; STORM_LOG_ASSERT(!origStates.empty(), "Unexpected empty set of original states."); bool origStatesNeedSchedulerComputation = false; if (ecQuotient->ecqStayInEcChoices.get(origChoice) && !ecqStateToOptimalMecMap.empty()) { // The current ecqState represents an elimnated EC and we need to stay in this EC and we need to make sure that optimal MEC decisions are performed within this EC. STORM_LOG_ASSERT(ecqStateToOptimalMecMap.count(ecqState) > 0, "No Lra Mec associated to given eliminated EC"); auto const& lraMec = lraMecDecomposition->mecs[ecqStateToOptimalMecMap.at(ecqState)]; if (lraMec.size() == origStates.size()) { // LRA mec and eliminated EC coincide for (auto const& state : origStates) { STORM_LOG_ASSERT(lraMec.containsState(state), "Expected state to be contained in the lra mec."); // Note that the optimal choice for this state has already been set in the infinite horizon phase. unprocessedStates.set(state, false); originalSolution[state] = ecqSolution[ecqState]; } } else { // LRA mec is proper subset of eliminated ec. There are also other states for which we have to set choices leading to the LRA MEC inside. STORM_LOG_ASSERT(lraMec.size() < origStates.size(), "Lra Mec should be a proper subset of the eliminated ec."); origStatesNeedSchedulerComputation = true; for (auto const& state : origStates) { if (lraMec.containsState(state)) { ecStatesToReach.set(state, true); // Note that the optimal choice for this state has already been set in the infinite horizon phase. } else { ecStatesToProcess.set(state, true); } unprocessedStates.set(state, false); originalSolution[state] = ecqSolution[ecqState]; } } } else { // In this case, we can safely take the origChoice at the corresponding state (say 's'). // For all other origStates associated with ecqState (if there are any), we make sure that the state 's' is reached almost surely. if (origStates.size() > 1) { origStatesNeedSchedulerComputation = true; for (auto const& state : origStates) { // Check if the orig choice originates from this state auto groupStart = transitionMatrix.getRowGroupIndices()[state]; auto groupEnd = transitionMatrix.getRowGroupIndices()[state + 1]; if (origChoice >= groupStart && origChoice < groupEnd) { originalOptimalChoices[state] = origChoice - groupStart; ecStatesToReach.set(state, true); } else { STORM_LOG_ASSERT(origStates.size() > 1, "Multiple original states expected."); ecStatesToProcess.set(state, true); } unprocessedStates.set(state, false); originalSolution[state] = ecqSolution[ecqState]; } } else { // There is just one state so we take the associated choice. auto state = *origStates.begin(); auto groupStart = transitionMatrix.getRowGroupIndices()[state]; originalOptimalChoices[state] = origChoice - groupStart; STORM_LOG_ASSERT(originalOptimalChoices[state] >= 0 && originalOptimalChoices[state] < transitionMatrix.getRowGroupSize(state), "Invalid choice: " << originalOptimalChoices[state] << " at a state with " << transitionMatrix.getRowGroupSize(state) << " choices."); originalSolution[state] = ecqSolution[ecqState]; unprocessedStates.set(state, false); } } if (origStatesNeedSchedulerComputation) { computeSchedulerProb1(transitionMatrix, backwardsTransitions, ecStatesToProcess, ecStatesToReach, originalOptimalChoices); // These states have only been altered if the origStatesNeedSchedulerComputation flag has been set to true. Hence, they only need to be cleared in this if-branch. ecStatesToProcess.clear(); ecStatesToReach.clear(); } } // The states that still not have been processed, there is no associated state of the ec quotient. // This is because the value for these states will be 0 under all (lra optimal-) schedulers. // We thus do not change the choices at these states (to remain LRA optimality) and just set the result to 0 storm::utility::vector::setVectorValues(originalSolution, unprocessedStates, storm::utility::zero()); } template class StandardPcaaWeightVectorChecker>; template class StandardPcaaWeightVectorChecker>; #ifdef STORM_HAVE_CARL template class StandardPcaaWeightVectorChecker>; template class StandardPcaaWeightVectorChecker>; #endif } } }