From 28dc5ddb4b46de5ece39fa8362cca5490eebde5f Mon Sep 17 00:00:00 2001 From: Sebastian Junges Date: Thu, 18 Jun 2020 12:30:06 -0700 Subject: [PATCH] simulator presents rewards in step and restart function --- examples/simulator/01-simulator.py | 5 ++--- examples/simulator/02-simulator.py | 8 ++++---- lib/stormpy/simulator.py | 8 ++++++-- src/core/simulator.cpp | 1 + 4 files changed, 13 insertions(+), 9 deletions(-) diff --git a/examples/simulator/01-simulator.py b/examples/simulator/01-simulator.py index f248d73..bc41a2b 100644 --- a/examples/simulator/01-simulator.py +++ b/examples/simulator/01-simulator.py @@ -16,13 +16,12 @@ def example_simulator_01(): final_outcomes = dict() for n in range(1000): while not simulator.is_done(): - observation = simulator.step() + observation, reward = simulator.step() if observation not in final_outcomes: final_outcomes[observation] = 1 else: final_outcomes[observation] += 1 simulator.restart() - print(final_outcomes) options = stormpy.BuilderOptions([]) options.set_build_state_valuations() @@ -32,7 +31,7 @@ def example_simulator_01(): final_outcomes = dict() for n in range(1000): while not simulator.is_done(): - observation = simulator.step() + observation, reward = simulator.step() if observation not in final_outcomes: final_outcomes[observation] = 1 else: diff --git a/examples/simulator/02-simulator.py b/examples/simulator/02-simulator.py index e23a0da..44107a5 100644 --- a/examples/simulator/02-simulator.py +++ b/examples/simulator/02-simulator.py @@ -19,14 +19,14 @@ def example_simulator_02(): paths = [] for m in range(5): path = [] - state = simulator.restart() + state, reward = simulator.restart() path = [f"{state}"] for n in range(20): actions = simulator.available_actions() select_action = random.randint(0,len(actions)-1) #print(f"Randomly select action nr: {select_action} from actions {actions}") path.append(f"--act={actions[select_action]}-->") - state = simulator.step(actions[select_action]) + state, reward = simulator.step(actions[select_action]) #print(state) path.append(f"{state}") if simulator.is_done(): @@ -48,14 +48,14 @@ def example_simulator_02(): paths = [] for m in range(5): path = [] - state = simulator.restart() + state, reward = simulator.restart() path = [f"{state}"] for n in range(20): actions = simulator.available_actions() select_action = random.randint(0,len(actions)-1) #print(f"Randomly select action nr: {select_action} from actions {actions}") path.append(f"--act={actions[select_action]}-->") - state = simulator.step(actions[select_action]) + state, reward = simulator.step(actions[select_action]) #print(state) path.append(f"{state}") if simulator.is_done(): diff --git a/lib/stormpy/simulator.py b/lib/stormpy/simulator.py index d2e1ab4..81cc480 100644 --- a/lib/stormpy/simulator.py +++ b/lib/stormpy/simulator.py @@ -143,11 +143,15 @@ class SparseSimulator(Simulator): def _report_result(self): if self._full_observe: - return self._report_state() + return self._report_state(), self._report_rewards() else: - return self._report_observation() + return self._report_observation(), self._report_rewards() + + def _report_rewards(self): + return self._engine.get_last_reward() def step(self, action=None): + if action is None: if self._model.is_nondeterministic_model and self.nr_available_actions() > 1: raise RuntimeError("Must specify an action in nondeterministic models.") diff --git a/src/core/simulator.cpp b/src/core/simulator.cpp index 263a44d..d6dce90 100644 --- a/src/core/simulator.cpp +++ b/src/core/simulator.cpp @@ -6,6 +6,7 @@ void define_sparse_model_simulator(py::module& m) { dtsmsd.def(py::init const&>()); dtsmsd.def("set_seed", &storm::simulator::DiscreteTimeSparseModelSimulator::setSeed, py::arg("seed")); dtsmsd.def("step", &storm::simulator::DiscreteTimeSparseModelSimulator::step, py::arg("action")); + dtsmsd.def("get_last_reward", &storm::simulator::DiscreteTimeSparseModelSimulator::getLastRewards); dtsmsd.def("get_current_state", &storm::simulator::DiscreteTimeSparseModelSimulator::getCurrentState); dtsmsd.def("reset_to_initial_state", &storm::simulator::DiscreteTimeSparseModelSimulator::resetToInitial);