From 233bf8b2ba879820027895dd27568221b109d095 Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Mon, 8 Oct 2018 22:20:02 +0200 Subject: [PATCH] some explanation for iterating over MDPs and POMDPs --- doc/source/advanced_topics.rst | 1 + examples/exploration/01-exploration.py | 30 ++++++ examples/exploration/02-exploration.py | 38 +++++++ lib/stormpy/examples/files.py | 4 + lib/stormpy/examples/files/mdp/maze_2.nm | 126 +++++++++++++++++++++++ 5 files changed, 199 insertions(+) create mode 100644 examples/exploration/01-exploration.py create mode 100644 examples/exploration/02-exploration.py create mode 100644 lib/stormpy/examples/files/mdp/maze_2.nm diff --git a/doc/source/advanced_topics.rst b/doc/source/advanced_topics.rst index dc4a8e3..2ed8d80 100644 --- a/doc/source/advanced_topics.rst +++ b/doc/source/advanced_topics.rst @@ -9,6 +9,7 @@ This guide is a collection of examples meant to bridge the gap between the getti :caption: Contents: doc/building_models + doc/exploration doc/reward_models doc/shortest_paths doc/parametric_models diff --git a/examples/exploration/01-exploration.py b/examples/exploration/01-exploration.py new file mode 100644 index 0000000..fd22d2e --- /dev/null +++ b/examples/exploration/01-exploration.py @@ -0,0 +1,30 @@ +import stormpy +import stormpy.core + +import stormpy.examples +import stormpy.examples.files + + +def example_exploration_01(): + """ + Example to exploration of MDPs. + :return: + """ + program = stormpy.parse_prism_program(stormpy.examples.files.prism_pomdp_maze) + prop = "R=? [F \"goal\"]" + properties = stormpy.parse_properties_for_prism_program(prop, program, None) + model = stormpy.build_model(program, properties) + print(model.model_type) + + + for state in model.states: + if state.id in model.initial_states: + print(state) + for action in state.actions: + for transition in action.transitions: + print("From state {} by action {}, with probability {}, go to state {}".format(state, action, transition.value(), + transition.column)) + + +if __name__ == '__main__': + example_exploration_01() \ No newline at end of file diff --git a/examples/exploration/02-exploration.py b/examples/exploration/02-exploration.py new file mode 100644 index 0000000..951a29a --- /dev/null +++ b/examples/exploration/02-exploration.py @@ -0,0 +1,38 @@ +import stormpy +import stormpy.core + +import stormpy.examples +import stormpy.examples.files + + +def example_exploration_02(): + """ + Example to exploration of POMDPs. + :return: + """ + program = stormpy.parse_prism_program(stormpy.examples.files.prism_pomdp_maze) + prop = "R=? [F \"goal\"]" + properties = stormpy.parse_properties_for_prism_program(prop, program, None) + model = stormpy.build_model(program, properties) + print(model.model_type) + # Internally, POMDPs are just MDPs with additional observation information. + # Thus, data structure exploration for MDPs can be applied as before. + initial_state = model.initial_states[0] + + for state in model.states: + if state.id in model.initial_states: + print(state) + for action in state.actions: + for transition in action.transitions: + print("From state {} by action {}, with probability {}, go to state {}".format(state, action, transition.value(), + transition.column)) + + print(model.nr_observations) + + for state in model.states: + print("State {} has observation id {}".format(state.id, model.observations[state.id])) + + + +if __name__ == '__main__': + example_exploration_02() \ No newline at end of file diff --git a/lib/stormpy/examples/files.py b/lib/stormpy/examples/files.py index cb4718d..748b419 100644 --- a/lib/stormpy/examples/files.py +++ b/lib/stormpy/examples/files.py @@ -27,6 +27,10 @@ jani_dtmc_die = _path("dtmc", "die.jani") """Jani Version of Knuth Yao Die Example""" prism_mdp_coin_2_2 = _path("mdp", "coin2-2.nm") """Prism example for coin MDP""" +prism_mdp_maze = _path("mdp", "maze_2.nm") +"""Prism example for the maze MDP""" +prism_pomdp_maze = _path("pomdp", "maze_2.prism") +"""Prism example for the maze POMDP""" dft_galileo_hecs = _path("dft", "hecs.dft") """DFT example for HECS (Galileo format)""" dft_json_and = _path("dft", "and.json") diff --git a/lib/stormpy/examples/files/mdp/maze_2.nm b/lib/stormpy/examples/files/mdp/maze_2.nm new file mode 100644 index 0000000..396c48b --- /dev/null +++ b/lib/stormpy/examples/files/mdp/maze_2.nm @@ -0,0 +1,126 @@ + + +// maze example (POMDP) +// slightly extends that presented in +// Littman, Cassandra and Kaelbling +// Learning policies for partially observable environments: Scaling up +// Technical Report CS, Brown University +// gxn 29/01/16 +// Made into a MDP for documentation of stormpy. + +// state space (value of variable "s") + +// 0 1 2 3 4 +// 5 6 7 +// 8 9 10 +// 11 13 12 + +// 13 is the target + +mdp + + +module maze + + s : [-1..13]; + + // initialisation + [] s=-1 -> 1/13 : (s'=0) + + 1/13 : (s'=1) + + 1/13 : (s'=2) + + 1/13 : (s'=3) + + 1/13 : (s'=4) + + 1/13 : (s'=5) + + 1/13 : (s'=6) + + 1/13 : (s'=7) + + 1/13 : (s'=8) + + 1/13 : (s'=9) + + 1/13 : (s'=10) + + 1/13 : (s'=11) + + 1/13 : (s'=12); + + // moving around the maze + + [east] s=0 -> (s'=1); + [west] s=0 -> (s'=0); + [north] s=0 -> (s'=0); + [south] s=0 -> (s'=5); + + [east] s=1 -> (s'=2); + [west] s=1 -> (s'=0); + [north] s=1 -> (s'=1); + [south] s=1 -> (s'=1); + + [east] s=2 -> (s'=3); + [west] s=2 -> (s'=1); + [north] s=2 -> (s'=2); + [south] s=2 -> (s'=6); + + [east] s=3 -> (s'=4); + [west] s=3 -> (s'=2); + [north] s=3 -> (s'=3); + [south] s=3 -> (s'=3); + + [east] s=4 -> (s'=4); + [west] s=4 -> (s'=3); + [north] s=4 -> (s'=4); + [south] s=4 -> (s'=7); + + [east] s=5 -> (s'=5); + [west] s=5 -> (s'=5); + [north] s=5 -> (s'=0); + [south] s=5 -> (s'=8); + + [east] s=6 -> (s'=6); + [west] s=6 -> (s'=6); + [north] s=6 -> (s'=2); + [south] s=6 -> (s'=9); + + [east] s=7 -> (s'=7); + [west] s=7 -> (s'=7); + [north] s=7 -> (s'=4); + [south] s=7 -> (s'=10); + + [east] s=8 -> (s'=8); + [west] s=8 -> (s'=8); + [north] s=8 -> (s'=5); + [south] s=8 -> (s'=11); + + [east] s=9 -> (s'=9); + [west] s=9 -> (s'=9); + [north] s=9 -> (s'=6); + [south] s=9 -> (s'=13); + + [east] s=10 -> (s'=9); + [west] s=10 -> (s'=9); + [north] s=10 -> (s'=7); + [south] s=10 -> (s'=12); + [east] s=11 -> (s'=11); + [west] s=11 -> (s'=11); + [north] s=11 -> (s'=8); + [south] s=11 -> (s'=11); + + [east] s=12 -> (s'=12); + [west] s=12 -> (s'=12); + [north] s=12 -> (s'=10); + [south] s=12 -> (s'=12); + + // loop when we reach the target + [done] s=13 -> true; + +endmodule + +// reward structure (number of steps to reach the target) +rewards + + [east] true : 1; + [west] true : 1; + [north] true : 1; + [south] true : 1; + +endrewards + +// target observation +label "goal" = s=13; + +