diff --git a/examples/shields/02_post_shield_extraction.py b/examples/shields/02_post_shield_extraction.py index 4e7199a..6bb0128 100644 --- a/examples/shields/02_post_shield_extraction.py +++ b/examples/shields/02_post_shield_extraction.py @@ -41,7 +41,7 @@ def post_shield_extraction(): for state_id in model.states: choices = shield.construct().get_choice(state_id) - print(F"Allowed choices in state {state_id}, are {choices.choice_map} ") + print(F"Applied corrections in state {state_id}, are {choices.choice_map} ") diff --git a/examples/shields/03_optimal_shield_extraction.py b/examples/shields/03_optimal_shield_extraction.py index 6b03f14..ffdb009 100644 --- a/examples/shields/03_optimal_shield_extraction.py +++ b/examples/shields/03_optimal_shield_extraction.py @@ -29,9 +29,11 @@ def optimal_shield_extraction(): shield = result.shield - for state_id in model.states: + state_ids = [x for x in model.states] + + for state_id in state_ids[0:50]: choices = shield.construct().get_choice(state_id) - print(F"Allowed choices in state {state_id}, are {choices.choice_map} ") + print(F"Corrections in state {state_id}, are {choices.choice_map} ") diff --git a/examples/shields/04_pre_shield_export.py b/examples/shields/04_pre_shield_export.py index 3b4daef..be5e4dd 100644 --- a/examples/shields/04_pre_shield_export.py +++ b/examples/shields/04_pre_shield_export.py @@ -17,7 +17,7 @@ to a file def pre_schield(): path = stormpy.examples.files.prism_mdp_lava_simple - formula_str = "
Pmax=? [G !\"AgentIsInLavaAndNotDone\"]" + formula_str = "Pmax=? [G !\"AgentIsInLavaAndNotDone\"]" program = stormpy.parse_prism_program(path) formulas = stormpy.parse_properties_for_prism_program(formula_str, program) diff --git a/examples/shields/06_optimal_shield_export.py b/examples/shields/06_optimal_shield_export.py index f8087f5..7a3a870 100644 --- a/examples/shields/06_optimal_shield_export.py +++ b/examples/shields/06_optimal_shield_export.py @@ -16,7 +16,7 @@ to a file def optimal_shield_export(): path = stormpy.examples.files.prism_smg_lights - formula_str = "< > R{\"differenceWithInterferenceCost\"}min=? [ LRA ]" + formula_str = " < > R{\"differenceWithInterferenceCost\"}min=? [ LRA ]" program = stormpy.parse_prism_program(path) formulas = stormpy.parse_properties_for_prism_program(formula_str, program) diff --git a/examples/shields/07_pre_shield_simulator.py b/examples/shields/07_pre_shield_simulator.py index f08d7ff..a5433ea 100644 --- a/examples/shields/07_pre_shield_simulator.py +++ b/examples/shields/07_pre_shield_simulator.py @@ -37,22 +37,24 @@ def example_pre_shield_simulator(): pre_scheduler = shield.construct() simulator = stormpy.simulator.create_simulator(model, seed=42) - final_outcomes = dict() - for n in range(1000): - while not simulator.is_done(): - current_state = simulator.get_current_state() - choices = pre_scheduler.get_choice(current_state).choice_map - index = random.randint(0, len(choices) - 1) - selected_action = choices[index] - state_string = model.state_valuations.get_string(current_state) - print(F"Simulator is in state {state_string}. Allowed Choices are {choices}. Selected Action: {selected_action}") - observation, reward = simulator.step(selected_action[1]) - if observation not in final_outcomes: - final_outcomes[observation] = 1 - else: - final_outcomes[observation] += 1 - simulator.restart() + while not simulator.is_done(): + current_state = simulator.get_current_state() + state_string = model.state_valuations.get_string(current_state) + print(F"Simulator is in state {state_string}.") + choices = [x for x in pre_scheduler.get_choice(current_state).choice_map if x[0] > 0] + choice_labels = [model.choice_labeling.get_labels_of_choice(model.get_choice_index(current_state, choice[1])) for choice in choices] + + if not choices: + break + + index = random.randint(0, len(choices) - 1) + selected_action = choices[index] + choice_label = model.choice_labeling.get_labels_of_choice(model.get_choice_index(current_state, selected_action[1])) + print(F"Allowed Choices are {choice_labels}. Selected Action: {choice_label}") + observation, reward = simulator.step(selected_action[1]) + + if __name__ == '__main__': diff --git a/examples/shields/08_post_shield_simulator.py b/examples/shields/08_post_shield_simulator.py index e69de29..5f51c99 100644 --- a/examples/shields/08_post_shield_simulator.py +++ b/examples/shields/08_post_shield_simulator.py @@ -0,0 +1,68 @@ +import stormpy +import stormpy.core +import stormpy.simulator + +import stormpy.shields + +import stormpy.examples +import stormpy.examples.files + + +""" +Simulating a model with the usage of a post shield +""" + +def example_post_shield_simulator(): + path = stormpy.examples.files.prism_mdp_lava_simple + formula_str = " Pmax=? [G !\"AgentIsInLavaAndNotDone\"]; Pmax=? [ F \"AgentIsInLavaAndNotDone\" ];" + + program = stormpy.parse_prism_program(path) + formulas = stormpy.parse_properties_for_prism_program(formula_str, program) + + options = stormpy.BuilderOptions([p.raw_formula for p in formulas]) + options.set_build_state_valuations(True) + options.set_build_choice_labels(True) + options.set_build_all_labels() + model = stormpy.build_sparse_model_with_options(program, options) + + initial_state = model.initial_states[0] + assert initial_state == 0 + result = stormpy.model_checking(model, formulas[0]) + result2 = stormpy.model_checking(model, formulas[1], extract_scheduler=True) + + assert result.has_shield + assert result2.has_scheduler + + shield = result.shield + scheduler = result2.scheduler + + post_scheduler = shield.construct() + + simulator = stormpy.simulator.create_simulator(model, seed=42) + + while not simulator.is_done(): + current_state = simulator.get_current_state() + state_string = model.state_valuations.get_string(current_state) + # print(F"Simulator is in state {state_string}.") + + sched_choice = scheduler.get_choice(current_state).get_deterministic_choice() + # print(F"Scheduler choice {model.choice_labeling.get_labels_of_choice(model.get_choice_index(current_state, sched_choice))}") + + corrections = post_scheduler.get_choice(current_state).choice_map + # print(corrections) + correction_labels = [(model.get_label_of_choice(current_state, correction[0]), model.get_label_of_choice(current_state, correction[1])) for correction in corrections] + # print(F"Correction Choices are {correction_labels}.") + + applied_correction = next((x[1] for x in corrections if x[0] == sched_choice), None) + + if applied_correction != None and applied_correction != sched_choice: + print(F"Correction applied changed choice {sched_choice} to {applied_correction}") + sched_choice = applied_correction + + observation, reward = simulator.step(sched_choice) + + + + +if __name__ == '__main__': + example_post_shield_simulator() \ No newline at end of file diff --git a/examples/shields/09_optimal_shield_simulator.py b/examples/shields/09_optimal_shield_simulator.py index e69de29..0d55c4c 100644 --- a/examples/shields/09_optimal_shield_simulator.py +++ b/examples/shields/09_optimal_shield_simulator.py @@ -0,0 +1,53 @@ +import stormpy +import stormpy.core +import stormpy.simulator + + +import stormpy.shields + +import stormpy.examples +import stormpy.examples.files + +import random + + +def optimal_shield_simulator(): + path = stormpy.examples.files.prism_smg_lights + formula_str = " < > R{\"differenceWithInterferenceCost\"}min=? [ LRA ]" + + program = stormpy.parse_prism_program(path) + formulas = stormpy.parse_properties_for_prism_program(formula_str, program) + + options = stormpy.BuilderOptions([p.raw_formula for p in formulas]) + options.set_build_state_valuations(True) + options.set_build_choice_labels(True) + options.set_build_all_labels() + model = stormpy.build_sparse_model_with_options(program, options) + + result = stormpy.model_checking(model, formulas[0], extract_scheduler=True) + + assert result.has_scheduler + assert result.has_shield + + shield = result.shield + + scheduler = shield.construct() + simulator = stormpy.simulator.create_simulator(model)#, seed=42) + + print(simulator) + while not simulator.is_done(): + current_state = simulator.get_current_state() + state_string = model.state_valuations.get_string(current_state) + # print(F"Simulator is in state {state_string}.") + temp = scheduler.get_choice(current_state) + # print(F"Correction map is {temp.choice_map}") + # print([model.get_label_of_choice(current_state, x) for x in simulator.available_actions()]) + print(F"Available actions {simulator.available_actions()}") + for action in simulator.available_actions(): + print(F"Action: {action} ActionLabel: {model.get_label_of_choice(current_state, action)}") + observation, reward = simulator.step() + + + +if __name__ == '__main__': + optimal_shield_simulator() \ No newline at end of file diff --git a/lib/stormpy/examples/files/mdp/simple.prism b/lib/stormpy/examples/files/mdp/simple.prism index 77e3777..0305221 100644 --- a/lib/stormpy/examples/files/mdp/simple.prism +++ b/lib/stormpy/examples/files/mdp/simple.prism @@ -32,17 +32,3 @@ module Agent endmodule -// Pmax=? [G !'AgentIsInLavaAndNotDone']; -// Model handling soll auch das Shield zurückgeben -// Im SparseMdpPrctlModelChecker wird das shield mit createShield erstellt -// es soll irgendwie im CheckResult landen welches im model-handling.h verifyWithSparseEngine (Methode) gehandelt wird. - -// Result für den MDP Typen ist im SparseMdpPrctlHelper.computeUntilProbabilities -// MDPSparseModelCheckingHelperReturnType ist der Typ von dem - -// Das ergebnis vom SparseMdpPrctlModelChecker ist ein ExplicitQuantitativeCheckResult - -// Prinzipieller ablauf -// PRISM -> Parser -> ModelEngine | -// model_handlling.h -> AbstractMC -> SparseMDPML -> SparseMdpPrctlHelper -// Property -> Formula Parser -> | diff --git a/lib/stormpy/examples/files/smg/robot_controller.prism b/lib/stormpy/examples/files/smg/robot_controller.prism index e5073ea..cb78e6b 100644 --- a/lib/stormpy/examples/files/smg/robot_controller.prism +++ b/lib/stormpy/examples/files/smg/robot_controller.prism @@ -9,7 +9,7 @@ player adverseryRobot endplayer // (w+1)x16 - grid -const int w; +const int w = 2; const int width = w; const int height = 15; diff --git a/src/storage/model.cpp b/src/storage/model.cpp index ef482a1..58f8a8e 100644 --- a/src/storage/model.cpp +++ b/src/storage/model.cpp @@ -223,6 +223,8 @@ void define_sparse_model(py::module& m, std::string const& vtSuffix) { .def("get_choice_index", [](SparseMdp const& mdp, uint64_t state, uint64_t actOff) { return mdp.getNondeterministicChoiceIndices()[state]+actOff; }, py::arg("state"), py::arg("action_offset"), "gets the choice index for the offset action from the given state.") .def("apply_scheduler", [](SparseMdp const& mdp, storm::storage::Scheduler const& scheduler, bool dropUnreachableStates) { return mdp.applyScheduler(scheduler, dropUnreachableStates); } , "apply scheduler", "scheduler"_a, "drop_unreachable_states"_a = true) .def("__str__", &getModelInfoPrinter) + .def("get_label_of_choice", [](SparseMdp const& mdp, uint64_t state, uint64_t actOff) {return mdp.getChoiceLabeling().getLabelsOfChoice(mdp.getNondeterministicChoiceIndices()[state]+actOff);}, py::arg("state"), py::arg("action_offset")) + ; py::class_ , std::shared_ptr >> smg(m, ("Sparse" + vtSuffix + "Smg").c_str(), "SMG in sparse representation", model); @@ -231,6 +233,9 @@ void define_sparse_model(py::module& m, std::string const& vtSuffix) { .def(py::init const&>(), py::arg("components")) .def("get_player_of_state", &Smg ::getPlayerOfState, py::arg("state_index")) .def("get_player_index", &Smg ::getPlayerIndex, py::arg("player_name")) + .def("get_choice_index", [](Smg const& smg, uint64_t state, uint64_t actOff) { return smg.getNondeterministicChoiceIndices()[state]+actOff; }, py::arg("state"), py::arg("action_offset"), "gets the choice index for the offset action from the given state.") + .def("get_label_of_choice", [](Smg const& smg, uint64_t state, uint64_t actOff) {return smg.getChoiceLabeling().getLabelsOfChoice(smg.getNondeterministicChoiceIndices()[state]+actOff);}, py::arg("state"), py::arg("action_offset")) + ; py::class_ , std::shared_ptr >> stg(m, ("Sparse" + vtSuffix + "Stg").c_str(), "STG in sparse representation", model);