Merge branch 'master' into wrap_highlevel

8 years ago · 3ab8262f08
6 changed files with 305 additions and 0 deletions
--- a/doc/source/advanced_topics.rst
+++ b/doc/source/advanced_topics.rst
@ -9,6 +9,7 @@ This guide is a collection of examples meant to bridge the gap between the getti
    :caption: Contents:
    doc/building_models
    doc/exploration
    doc/reward_models
    doc/shortest_paths
    doc/parametric_models
--- a/doc/source/doc/exploration.rst
+++ b/doc/source/doc/exploration.rst
@ -0,0 +1,106 @@
 ****************
 Exploring Models
 ****************
 Background
 =====================
 Often, stormpy is used as a testbed for new algorithms.
 An essential step is to transfer the (low-level) descriptions of an MDP or other state-based model into
 an own algorithm. In this section, we discuss some of the functionality.
 Reading MDPs
 =====================
 .. seealso:: `01-exploration.py <https://github.com/moves-rwth/stormpy/blob/master/examples/exploration/01-exploration.py>`_
 In :doc:`../getting_started`, we briefly iterated over a DTMC. In this section, we explore an MDP::
    >>> import doctest
    >>> doctest.ELLIPSIS_MARKER = '-etc-' # doctest:+ELLIPSIS
    >>> import stormpy
    >>> import stormpy.examples
    >>> import stormpy.examples.files
    >>> program = stormpy.parse_prism_program(stormpy.examples.files.prism_mdp_maze)
    >>> prop = "R=? [F \"goal\"]"
    >>> properties = stormpy.parse_properties_for_prism_program(prop, program, None)
    >>> model = stormpy.build_model(program, properties)
 The iteration over the model is as before, but now, for every action, we can have several transitions::
    >>> for state in model.states:
    ...    if state.id in model.initial_states:
    ...        print("State {} is initial".format(state.id))
    ...    for action in state.actions:
    ...        for transition in action.transitions:
    ...            print("From state {} by action {}, with probability {}, go to state {}".format(state, action, transition.value(), transition.column))
    -etc-
 The output (omitted for brievety) contains sentences like:
    From state 1 by action 0, with probability 1.0, go to state 2
    From state 1 by action 1, with probability 1.0, go to state 1
 Internally, storm can hold hints to the origin of the actions, which may be helpful to give meaning and for debugging.
 As the availability and the encoding of this data depends on the input model, we discuss these features in :doc:`highlevel_models`.
 Storm currently supports deterministic rewards on states or actions. More information can be found in that :doc:`reward_models`.
 Reading POMDPs
 ======================
 .. seealso:: `02-exploration.py <https://github.com/moves-rwth/stormpy/blob/master/examples/exploration/01-exploration.py>`_
 Internally, POMDPs extend MDPs. Thus, iterating over the MDP is done as before.
    >>> import stormpy
    >>> import stormpy.examples
    >>> import stormpy.examples.files
    >>> program = stormpy.parse_prism_program(stormpy.examples.files.prism_pomdp_maze)
    >>> prop = "R=? [F \"goal\"]"
    >>> properties = stormpy.parse_properties_for_prism_program(prop, program, None)
    >>> model = stormpy.build_model(program, properties)
 Indeed, all that changed in the code above is the example we use.
 And, that the model type now is a POMDP::
    >>> print(model.model_type)
    ModelType.POMDP
 Additionally, POMDPs have a set of observations, which are internally just numbered by an integer from 0 to the number of observations -1 ::
    >>> print(model.nr_observations)
    8
    >>> for state in model.states:
    ...     print("State {} has observation id {}".format(state.id, model.observations[state.id]))
    State 0 has observation id 6
    State 1 has observation id 1
    State 2 has observation id 4
    State 3 has observation id 7
    State 4 has observation id 4
    State 5 has observation id 3
    State 6 has observation id 0
    State 7 has observation id 0
    State 8 has observation id 0
    State 9 has observation id 0
    State 10 has observation id 0
    State 11 has observation id 0
    State 12 has observation id 2
    State 13 has observation id 2
    State 14 has observation id 4
    State 15 has observation id 5
 Reading MAs
 ======================
 To be continued...
--- a/examples/exploration/01-exploration.py
+++ b/examples/exploration/01-exploration.py
@ -0,0 +1,30 @@
 import stormpy
 import stormpy.core
 import stormpy.examples
 import stormpy.examples.files
 def example_exploration_01():
    """
    Example to exploration of MDPs.
    :return:
    """
    program = stormpy.parse_prism_program(stormpy.examples.files.prism_pomdp_maze)
    prop = "R=? [F \"goal\"]"
    properties = stormpy.parse_properties_for_prism_program(prop, program, None)
    model = stormpy.build_model(program, properties)
    print(model.model_type)
    for state in model.states:
        if state.id in model.initial_states:
            print(state)
        for action in state.actions:
            for transition in action.transitions:
                print("From state {} by action {}, with probability {}, go to state {}".format(state, action, transition.value(),
                                                                                               transition.column))
 if __name__ == '__main__':
    example_exploration_01()
--- a/examples/exploration/02-exploration.py
+++ b/examples/exploration/02-exploration.py
@ -0,0 +1,38 @@
 import stormpy
 import stormpy.core
 import stormpy.examples
 import stormpy.examples.files
 def example_exploration_02():
    """
    Example to exploration of POMDPs.
    :return:
    """
    program = stormpy.parse_prism_program(stormpy.examples.files.prism_pomdp_maze)
    prop = "R=? [F \"goal\"]"
    properties = stormpy.parse_properties_for_prism_program(prop, program, None)
    model = stormpy.build_model(program, properties)
    print(model.model_type)
    # Internally, POMDPs are just MDPs with additional observation information.
    # Thus, data structure exploration for MDPs can be applied as before.
    initial_state = model.initial_states[0]
    for state in model.states:
        if state.id in model.initial_states:
            print(state)
        for action in state.actions:
            for transition in action.transitions:
                print("From state {} by action {}, with probability {}, go to state {}".format(state, action, transition.value(),
                                                                                  transition.column))
    print(model.nr_observations)
    for state in model.states:
        print("State {} has observation id {}".format(state.id, model.observations[state.id]))
 if __name__ == '__main__':
    example_exploration_02()
--- a/lib/stormpy/examples/files.py
+++ b/lib/stormpy/examples/files.py
@ -27,6 +27,10 @@ jani_dtmc_die = _path("dtmc", "die.jani")
 """Jani Version of Knuth Yao Die Example"""
 prism_mdp_coin_2_2 = _path("mdp", "coin2-2.nm")
 """Prism example for coin MDP"""
 prism_mdp_maze = _path("mdp", "maze_2.nm")
 """Prism example for the maze MDP"""
 prism_pomdp_maze = _path("pomdp", "maze_2.prism")
 """Prism example for the maze POMDP"""
 dft_galileo_hecs = _path("dft", "hecs.dft")
 """DFT example for HECS (Galileo format)"""
 dft_json_and = _path("dft", "and.json")
--- a/lib/stormpy/examples/files/mdp/maze_2.nm
+++ b/lib/stormpy/examples/files/mdp/maze_2.nm
@ -0,0 +1,126 @@
 // maze example (POMDP)
 // slightly extends that presented in
 // Littman, Cassandra and Kaelbling
 // Learning policies for partially observable environments: Scaling up
 // Technical Report CS, Brown University
 // gxn 29/01/16
 // Made into a MDP for documentation of stormpy.
 // state space (value of variable "s")
 //  0  1  2  3  4
 //  5     6     7
 //  8     9    10
 // 11     13   12
 // 13 is the target
 mdp
 module maze
 	s : [-1..13];
 	// initialisation
 	[] s=-1 -> 1/13 : (s'=0)
 			 + 1/13 : (s'=1)
 			 + 1/13 : (s'=2)
 			 + 1/13 : (s'=3)
 			 + 1/13 : (s'=4)
 			 + 1/13 : (s'=5)
 			 + 1/13 : (s'=6)
 			 + 1/13 : (s'=7)
 			 + 1/13 : (s'=8)
 			 + 1/13 : (s'=9)
 			 + 1/13 : (s'=10)
 			 + 1/13 : (s'=11)
 			 + 1/13 : (s'=12);
 	// moving around the maze
 	[east] s=0 -> (s'=1);
 	[west] s=0 -> (s'=0);
 	[north] s=0 -> (s'=0);
 	[south] s=0 -> (s'=5);
 	[east] s=1 -> (s'=2);
 	[west] s=1 -> (s'=0);
 	[north] s=1 -> (s'=1);
 	[south] s=1 -> (s'=1);
 	[east] s=2 -> (s'=3);
 	[west] s=2 -> (s'=1);
 	[north] s=2 -> (s'=2);
 	[south] s=2 -> (s'=6);
 	[east] s=3 -> (s'=4);
 	[west] s=3 -> (s'=2);
 	[north] s=3 -> (s'=3);
 	[south] s=3 -> (s'=3);
 	[east] s=4 -> (s'=4);
 	[west] s=4 -> (s'=3);
 	[north] s=4 -> (s'=4);
 	[south] s=4 -> (s'=7);
 	[east] s=5 -> (s'=5);
 	[west] s=5 -> (s'=5);
 	[north] s=5 -> (s'=0);
 	[south] s=5 -> (s'=8);
 	[east] s=6 -> (s'=6);
 	[west] s=6 -> (s'=6);
 	[north] s=6 -> (s'=2);
 	[south] s=6 -> (s'=9);
 	[east] s=7 -> (s'=7);
 	[west] s=7 -> (s'=7);
 	[north] s=7 -> (s'=4);
 	[south] s=7 -> (s'=10);
 	[east] s=8 -> (s'=8);
 	[west] s=8 -> (s'=8);
 	[north] s=8 -> (s'=5);
 	[south] s=8 -> (s'=11);
 	[east] s=9 -> (s'=9);
 	[west] s=9 -> (s'=9);
 	[north] s=9 -> (s'=6);
 	[south] s=9 -> (s'=13);
 	[east] s=10 -> (s'=9);
 	[west] s=10 -> (s'=9);
 	[north] s=10 -> (s'=7);
 	[south] s=10 -> (s'=12);
 	[east] s=11 -> (s'=11);
 	[west] s=11 -> (s'=11);
 	[north] s=11 -> (s'=8);
 	[south] s=11 -> (s'=11);
 	[east] s=12 -> (s'=12);
 	[west] s=12 -> (s'=12);
 	[north] s=12 -> (s'=10);
 	[south] s=12 -> (s'=12);
 	// loop when we reach the target
 	[done] s=13 -> true;
 endmodule
 // reward structure (number of steps to reach the target)
 rewards
 	[east] true : 1;
 	[west] true : 1;
 	[north] true : 1;
 	[south] true : 1;
 endrewards
 // target observation
 label "goal" = s=13;