Browse Source

simulator presents rewards in step and restart function

refactoring
Sebastian Junges 5 years ago
parent
commit
28dc5ddb4b
  1. 5
      examples/simulator/01-simulator.py
  2. 8
      examples/simulator/02-simulator.py
  3. 8
      lib/stormpy/simulator.py
  4. 1
      src/core/simulator.cpp

5
examples/simulator/01-simulator.py

@ -16,13 +16,12 @@ def example_simulator_01():
final_outcomes = dict()
for n in range(1000):
while not simulator.is_done():
observation = simulator.step()
observation, reward = simulator.step()
if observation not in final_outcomes:
final_outcomes[observation] = 1
else:
final_outcomes[observation] += 1
simulator.restart()
print(final_outcomes)
options = stormpy.BuilderOptions([])
options.set_build_state_valuations()
@ -32,7 +31,7 @@ def example_simulator_01():
final_outcomes = dict()
for n in range(1000):
while not simulator.is_done():
observation = simulator.step()
observation, reward = simulator.step()
if observation not in final_outcomes:
final_outcomes[observation] = 1
else:

8
examples/simulator/02-simulator.py

@ -19,14 +19,14 @@ def example_simulator_02():
paths = []
for m in range(5):
path = []
state = simulator.restart()
state, reward = simulator.restart()
path = [f"{state}"]
for n in range(20):
actions = simulator.available_actions()
select_action = random.randint(0,len(actions)-1)
#print(f"Randomly select action nr: {select_action} from actions {actions}")
path.append(f"--act={actions[select_action]}-->")
state = simulator.step(actions[select_action])
state, reward = simulator.step(actions[select_action])
#print(state)
path.append(f"{state}")
if simulator.is_done():
@ -48,14 +48,14 @@ def example_simulator_02():
paths = []
for m in range(5):
path = []
state = simulator.restart()
state, reward = simulator.restart()
path = [f"{state}"]
for n in range(20):
actions = simulator.available_actions()
select_action = random.randint(0,len(actions)-1)
#print(f"Randomly select action nr: {select_action} from actions {actions}")
path.append(f"--act={actions[select_action]}-->")
state = simulator.step(actions[select_action])
state, reward = simulator.step(actions[select_action])
#print(state)
path.append(f"{state}")
if simulator.is_done():

8
lib/stormpy/simulator.py

@ -143,11 +143,15 @@ class SparseSimulator(Simulator):
def _report_result(self):
if self._full_observe:
return self._report_state()
return self._report_state(), self._report_rewards()
else:
return self._report_observation()
return self._report_observation(), self._report_rewards()
def _report_rewards(self):
return self._engine.get_last_reward()
def step(self, action=None):
if action is None:
if self._model.is_nondeterministic_model and self.nr_available_actions() > 1:
raise RuntimeError("Must specify an action in nondeterministic models.")

1
src/core/simulator.cpp

@ -6,6 +6,7 @@ void define_sparse_model_simulator(py::module& m) {
dtsmsd.def(py::init<storm::models::sparse::Model<double> const&>());
dtsmsd.def("set_seed", &storm::simulator::DiscreteTimeSparseModelSimulator<double>::setSeed, py::arg("seed"));
dtsmsd.def("step", &storm::simulator::DiscreteTimeSparseModelSimulator<double>::step, py::arg("action"));
dtsmsd.def("get_last_reward", &storm::simulator::DiscreteTimeSparseModelSimulator<double>::getLastRewards);
dtsmsd.def("get_current_state", &storm::simulator::DiscreteTimeSparseModelSimulator<double>::getCurrentState);
dtsmsd.def("reset_to_initial_state", &storm::simulator::DiscreteTimeSparseModelSimulator<double>::resetToInitial);
Loading…
Cancel
Save