diff --git a/doc/source/advanced_topics.rst b/doc/source/advanced_topics.rst index dc4a8e3..2ed8d80 100644 --- a/doc/source/advanced_topics.rst +++ b/doc/source/advanced_topics.rst @@ -9,6 +9,7 @@ This guide is a collection of examples meant to bridge the gap between the getti :caption: Contents: doc/building_models + doc/exploration doc/reward_models doc/shortest_paths doc/parametric_models diff --git a/doc/source/doc/exploration.rst b/doc/source/doc/exploration.rst new file mode 100644 index 0000000..5285b2a --- /dev/null +++ b/doc/source/doc/exploration.rst @@ -0,0 +1,106 @@ +**************** +Exploring Models +**************** + +Background +===================== + +Often, stormpy is used as a testbed for new algorithms. +An essential step is to transfer the (low-level) descriptions of an MDP or other state-based model into +an own algorithm. In this section, we discuss some of the functionality. + +Reading MDPs +===================== + +.. seealso:: `01-exploration.py `_ + +In :doc:`../getting_started`, we briefly iterated over a DTMC. In this section, we explore an MDP:: + + >>> import doctest + >>> doctest.ELLIPSIS_MARKER = '-etc-' # doctest:+ELLIPSIS + >>> import stormpy + >>> import stormpy.examples + >>> import stormpy.examples.files + >>> program = stormpy.parse_prism_program(stormpy.examples.files.prism_mdp_maze) + >>> prop = "R=? [F \"goal\"]" + + >>> properties = stormpy.parse_properties_for_prism_program(prop, program, None) + >>> model = stormpy.build_model(program, properties) + +The iteration over the model is as before, but now, for every action, we can have several transitions:: + + >>> for state in model.states: + ... if state.id in model.initial_states: + ... print("State {} is initial".format(state.id)) + ... for action in state.actions: + ... for transition in action.transitions: + ... print("From state {} by action {}, with probability {}, go to state {}".format(state, action, transition.value(), transition.column)) + -etc- + +The output (omitted for brievety) contains sentences like: + + From state 1 by action 0, with probability 1.0, go to state 2 + From state 1 by action 1, with probability 1.0, go to state 1 + + + +Internally, storm can hold hints to the origin of the actions, which may be helpful to give meaning and for debugging. +As the availability and the encoding of this data depends on the input model, we discuss these features in :doc:`highlevel_models`. + + +Storm currently supports deterministic rewards on states or actions. More information can be found in that :doc:`reward_models`. + + +Reading POMDPs +====================== +.. seealso:: `02-exploration.py `_ + + +Internally, POMDPs extend MDPs. Thus, iterating over the MDP is done as before. + + >>> import stormpy + >>> import stormpy.examples + >>> import stormpy.examples.files + >>> program = stormpy.parse_prism_program(stormpy.examples.files.prism_pomdp_maze) + >>> prop = "R=? [F \"goal\"]" + + >>> properties = stormpy.parse_properties_for_prism_program(prop, program, None) + >>> model = stormpy.build_model(program, properties) + +Indeed, all that changed in the code above is the example we use. +And, that the model type now is a POMDP:: + + >>> print(model.model_type) + ModelType.POMDP + +Additionally, POMDPs have a set of observations, which are internally just numbered by an integer from 0 to the number of observations -1 :: + + >>> print(model.nr_observations) + 8 + >>> for state in model.states: + ... print("State {} has observation id {}".format(state.id, model.observations[state.id])) + State 0 has observation id 6 + State 1 has observation id 1 + State 2 has observation id 4 + State 3 has observation id 7 + State 4 has observation id 4 + State 5 has observation id 3 + State 6 has observation id 0 + State 7 has observation id 0 + State 8 has observation id 0 + State 9 has observation id 0 + State 10 has observation id 0 + State 11 has observation id 0 + State 12 has observation id 2 + State 13 has observation id 2 + State 14 has observation id 4 + State 15 has observation id 5 + + + + + +Reading MAs +====================== + +To be continued... \ No newline at end of file diff --git a/examples/exploration/01-exploration.py b/examples/exploration/01-exploration.py new file mode 100644 index 0000000..fd22d2e --- /dev/null +++ b/examples/exploration/01-exploration.py @@ -0,0 +1,30 @@ +import stormpy +import stormpy.core + +import stormpy.examples +import stormpy.examples.files + + +def example_exploration_01(): + """ + Example to exploration of MDPs. + :return: + """ + program = stormpy.parse_prism_program(stormpy.examples.files.prism_pomdp_maze) + prop = "R=? [F \"goal\"]" + properties = stormpy.parse_properties_for_prism_program(prop, program, None) + model = stormpy.build_model(program, properties) + print(model.model_type) + + + for state in model.states: + if state.id in model.initial_states: + print(state) + for action in state.actions: + for transition in action.transitions: + print("From state {} by action {}, with probability {}, go to state {}".format(state, action, transition.value(), + transition.column)) + + +if __name__ == '__main__': + example_exploration_01() \ No newline at end of file diff --git a/examples/exploration/02-exploration.py b/examples/exploration/02-exploration.py new file mode 100644 index 0000000..951a29a --- /dev/null +++ b/examples/exploration/02-exploration.py @@ -0,0 +1,38 @@ +import stormpy +import stormpy.core + +import stormpy.examples +import stormpy.examples.files + + +def example_exploration_02(): + """ + Example to exploration of POMDPs. + :return: + """ + program = stormpy.parse_prism_program(stormpy.examples.files.prism_pomdp_maze) + prop = "R=? [F \"goal\"]" + properties = stormpy.parse_properties_for_prism_program(prop, program, None) + model = stormpy.build_model(program, properties) + print(model.model_type) + # Internally, POMDPs are just MDPs with additional observation information. + # Thus, data structure exploration for MDPs can be applied as before. + initial_state = model.initial_states[0] + + for state in model.states: + if state.id in model.initial_states: + print(state) + for action in state.actions: + for transition in action.transitions: + print("From state {} by action {}, with probability {}, go to state {}".format(state, action, transition.value(), + transition.column)) + + print(model.nr_observations) + + for state in model.states: + print("State {} has observation id {}".format(state.id, model.observations[state.id])) + + + +if __name__ == '__main__': + example_exploration_02() \ No newline at end of file diff --git a/lib/stormpy/examples/files.py b/lib/stormpy/examples/files.py index cb4718d..748b419 100644 --- a/lib/stormpy/examples/files.py +++ b/lib/stormpy/examples/files.py @@ -27,6 +27,10 @@ jani_dtmc_die = _path("dtmc", "die.jani") """Jani Version of Knuth Yao Die Example""" prism_mdp_coin_2_2 = _path("mdp", "coin2-2.nm") """Prism example for coin MDP""" +prism_mdp_maze = _path("mdp", "maze_2.nm") +"""Prism example for the maze MDP""" +prism_pomdp_maze = _path("pomdp", "maze_2.prism") +"""Prism example for the maze POMDP""" dft_galileo_hecs = _path("dft", "hecs.dft") """DFT example for HECS (Galileo format)""" dft_json_and = _path("dft", "and.json") diff --git a/lib/stormpy/examples/files/mdp/maze_2.nm b/lib/stormpy/examples/files/mdp/maze_2.nm new file mode 100644 index 0000000..396c48b --- /dev/null +++ b/lib/stormpy/examples/files/mdp/maze_2.nm @@ -0,0 +1,126 @@ + + +// maze example (POMDP) +// slightly extends that presented in +// Littman, Cassandra and Kaelbling +// Learning policies for partially observable environments: Scaling up +// Technical Report CS, Brown University +// gxn 29/01/16 +// Made into a MDP for documentation of stormpy. + +// state space (value of variable "s") + +// 0 1 2 3 4 +// 5 6 7 +// 8 9 10 +// 11 13 12 + +// 13 is the target + +mdp + + +module maze + + s : [-1..13]; + + // initialisation + [] s=-1 -> 1/13 : (s'=0) + + 1/13 : (s'=1) + + 1/13 : (s'=2) + + 1/13 : (s'=3) + + 1/13 : (s'=4) + + 1/13 : (s'=5) + + 1/13 : (s'=6) + + 1/13 : (s'=7) + + 1/13 : (s'=8) + + 1/13 : (s'=9) + + 1/13 : (s'=10) + + 1/13 : (s'=11) + + 1/13 : (s'=12); + + // moving around the maze + + [east] s=0 -> (s'=1); + [west] s=0 -> (s'=0); + [north] s=0 -> (s'=0); + [south] s=0 -> (s'=5); + + [east] s=1 -> (s'=2); + [west] s=1 -> (s'=0); + [north] s=1 -> (s'=1); + [south] s=1 -> (s'=1); + + [east] s=2 -> (s'=3); + [west] s=2 -> (s'=1); + [north] s=2 -> (s'=2); + [south] s=2 -> (s'=6); + + [east] s=3 -> (s'=4); + [west] s=3 -> (s'=2); + [north] s=3 -> (s'=3); + [south] s=3 -> (s'=3); + + [east] s=4 -> (s'=4); + [west] s=4 -> (s'=3); + [north] s=4 -> (s'=4); + [south] s=4 -> (s'=7); + + [east] s=5 -> (s'=5); + [west] s=5 -> (s'=5); + [north] s=5 -> (s'=0); + [south] s=5 -> (s'=8); + + [east] s=6 -> (s'=6); + [west] s=6 -> (s'=6); + [north] s=6 -> (s'=2); + [south] s=6 -> (s'=9); + + [east] s=7 -> (s'=7); + [west] s=7 -> (s'=7); + [north] s=7 -> (s'=4); + [south] s=7 -> (s'=10); + + [east] s=8 -> (s'=8); + [west] s=8 -> (s'=8); + [north] s=8 -> (s'=5); + [south] s=8 -> (s'=11); + + [east] s=9 -> (s'=9); + [west] s=9 -> (s'=9); + [north] s=9 -> (s'=6); + [south] s=9 -> (s'=13); + + [east] s=10 -> (s'=9); + [west] s=10 -> (s'=9); + [north] s=10 -> (s'=7); + [south] s=10 -> (s'=12); + [east] s=11 -> (s'=11); + [west] s=11 -> (s'=11); + [north] s=11 -> (s'=8); + [south] s=11 -> (s'=11); + + [east] s=12 -> (s'=12); + [west] s=12 -> (s'=12); + [north] s=12 -> (s'=10); + [south] s=12 -> (s'=12); + + // loop when we reach the target + [done] s=13 -> true; + +endmodule + +// reward structure (number of steps to reach the target) +rewards + + [east] true : 1; + [west] true : 1; + [north] true : 1; + [south] true : 1; + +endrewards + +// target observation +label "goal" = s=13; + +