From de2c4ad8e5b92ecb8397096bc6deadaeb8d50f88 Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Thu, 7 Dec 2017 17:29:57 +0100 Subject: [PATCH] reward model docu --- doc/source/reward_models.rst | 57 ++++++++++++++++++++++ examples/reward_models/01-reward-models.py | 30 ++++++++++++ 2 files changed, 87 insertions(+) create mode 100644 examples/reward_models/01-reward-models.py diff --git a/doc/source/reward_models.rst b/doc/source/reward_models.rst index 2bf2619..11956d1 100644 --- a/doc/source/reward_models.rst +++ b/doc/source/reward_models.rst @@ -2,6 +2,63 @@ Reward Models ************** +In :doc:`getting_started`, we mainly looked at probabilities in the Markov models and properties that refer to these probabilities. +In this section, we discuss reward models. + Exploring reward models ------------------------ +.. seealso:: `01-reward-models.py `_ + +We consider the die again, but with another property which talks about the expected reward:: + + >>> import stormpy + >>> import stormpy.examples + >>> import stormpy.examples.files + >>> program = stormpy.parse_prism_program(stormpy.examples.files.prism_dtmc_die) + >>> prop = "R=? [F \"done\"]" + + >>> properties = stormpy.parse_properties_for_prism_program(prop, program, None) + >>> model = stormpy.build_model(program, properties) + >>> assert len(model.reward_models) == 1 + +The model now has a reward model, as the property talks about rewards. +We can do model checking analogous to probabilities:: + + + >>> initial_state = model.initial_states[0] + >>> result = stormpy.model_checking(model, properties[0]) + >>> print("Result: {}".format(result.at(initial_state))) + Result: 3.6666666666666665 + + +The reward model has a name which we can obtain as follows:: + + >>> reward_model_name = list(model.reward_models.keys())[0] + >>> print(reward_model_name) + coin_flips + +We discuss later how to work with multiple reward models. +Rewards come in multiple fashions, as state rewards, state-action rewards and as transition rewards. +In this example, we only have state-action rewards. These rewards are a vector, over which we can trivially iterate:: + + >>> assert not model.reward_models[reward_model_name].has_state_rewards + >>> assert model.reward_models[reward_model_name].has_state_action_rewards + >>> assert not model.reward_models[reward_model_name].has_transition_rewards + >>> for reward in model.reward_models[reward_model_name].state_action_rewards: + ... print(reward) + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 1.0 + 0.0 + 0.0 + 0.0 + 0.0 + 0.0 + 0.0 + + diff --git a/examples/reward_models/01-reward-models.py b/examples/reward_models/01-reward-models.py new file mode 100644 index 0000000..47c586c --- /dev/null +++ b/examples/reward_models/01-reward-models.py @@ -0,0 +1,30 @@ +import stormpy +import stormpy.core + +import stormpy.examples +import stormpy.examples.files + + +def example_reward_models_01(): + program = stormpy.parse_prism_program(stormpy.examples.files.prism_dtmc_die) + prop = "R=? [F \"done\"]" + properties = stormpy.parse_properties_for_prism_program(prop, program, None) + model = stormpy.build_model(program, properties) + initial_state = model.initial_states[0] + result = stormpy.model_checking(model, properties[0]) + print("Result: {}".format(result.at(initial_state))) + + assert len(model.reward_models) == 1 + reward_model_name = list(model.reward_models.keys())[0] + print(reward_model_name) + assert reward_model_name == "coin_flips" + assert not model.reward_models[reward_model_name].has_state_rewards + assert model.reward_models[reward_model_name].has_state_action_rewards + for reward in model.reward_models[reward_model_name].state_action_rewards: + print(reward) + assert not model.reward_models[reward_model_name].has_transition_rewards + + + +if __name__ == '__main__': + example_reward_models_01() \ No newline at end of file