Browse Source

SparseModelMemoryProduct: Fixed incorrect computation of state-action rewards under a randomized policy.

tempestpy_adaptions
Tim Quatmann 5 years ago
parent
commit
7ffe322e06
  1. 2
      src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp

2
src/storm/storage/memorystructure/SparseModelMemoryProduct.cpp

@ -410,7 +410,7 @@ namespace storm {
if (isStateReachable(modelState, memoryState)) { if (isStateReachable(modelState, memoryState)) {
if (scheduler && scheduler->getChoice(modelState, memoryState).isDefined()) { if (scheduler && scheduler->getChoice(modelState, memoryState).isDefined()) {
ValueType factor = scheduler->getChoice(modelState, memoryState).getChoiceAsDistribution().getProbability(rowOffset); ValueType factor = scheduler->getChoice(modelState, memoryState).getChoiceAsDistribution().getProbability(rowOffset);
stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)]] = factor * modelStateActionReward;
stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)]] += factor * modelStateActionReward;
} else { } else {
stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)] + rowOffset] = modelStateActionReward; stateActionRewards.get()[resultTransitionMatrix.getRowGroupIndices()[getResultState(modelState, memoryState)] + rowOffset] = modelStateActionReward;
} }

Loading…
Cancel
Save