You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
2.2 KiB

  1. // maze example (POMDP)
  2. // slightly extends that presented in
  3. // Littman, Cassandra and Kaelbling
  4. // Learning policies for partially observable environments: Scaling up
  5. // Technical Report CS, Brown University
  6. // gxn 29/01/16
  7. // Made into a MDP for documentation of stormpy.
  8. // state space (value of variable "s")
  9. // 0 1 2 3 4
  10. // 5 6 7
  11. // 8 9 10
  12. // 11 13 12
  13. // 13 is the target
  14. mdp
  15. module maze
  16. s : [-1..13];
  17. // initialisation
  18. [] s=-1 -> 1/13 : (s'=0)
  19. + 1/13 : (s'=1)
  20. + 1/13 : (s'=2)
  21. + 1/13 : (s'=3)
  22. + 1/13 : (s'=4)
  23. + 1/13 : (s'=5)
  24. + 1/13 : (s'=6)
  25. + 1/13 : (s'=7)
  26. + 1/13 : (s'=8)
  27. + 1/13 : (s'=9)
  28. + 1/13 : (s'=10)
  29. + 1/13 : (s'=11)
  30. + 1/13 : (s'=12);
  31. // moving around the maze
  32. [east] s=0 -> (s'=1);
  33. [west] s=0 -> (s'=0);
  34. [north] s=0 -> (s'=0);
  35. [south] s=0 -> (s'=5);
  36. [east] s=1 -> (s'=2);
  37. [west] s=1 -> (s'=0);
  38. [north] s=1 -> (s'=1);
  39. [south] s=1 -> (s'=1);
  40. [east] s=2 -> (s'=3);
  41. [west] s=2 -> (s'=1);
  42. [north] s=2 -> (s'=2);
  43. [south] s=2 -> (s'=6);
  44. [east] s=3 -> (s'=4);
  45. [west] s=3 -> (s'=2);
  46. [north] s=3 -> (s'=3);
  47. [south] s=3 -> (s'=3);
  48. [east] s=4 -> (s'=4);
  49. [west] s=4 -> (s'=3);
  50. [north] s=4 -> (s'=4);
  51. [south] s=4 -> (s'=7);
  52. [east] s=5 -> (s'=5);
  53. [west] s=5 -> (s'=5);
  54. [north] s=5 -> (s'=0);
  55. [south] s=5 -> (s'=8);
  56. [east] s=6 -> (s'=6);
  57. [west] s=6 -> (s'=6);
  58. [north] s=6 -> (s'=2);
  59. [south] s=6 -> (s'=9);
  60. [east] s=7 -> (s'=7);
  61. [west] s=7 -> (s'=7);
  62. [north] s=7 -> (s'=4);
  63. [south] s=7 -> (s'=10);
  64. [east] s=8 -> (s'=8);
  65. [west] s=8 -> (s'=8);
  66. [north] s=8 -> (s'=5);
  67. [south] s=8 -> (s'=11);
  68. [east] s=9 -> (s'=9);
  69. [west] s=9 -> (s'=9);
  70. [north] s=9 -> (s'=6);
  71. [south] s=9 -> (s'=13);
  72. [east] s=10 -> (s'=9);
  73. [west] s=10 -> (s'=9);
  74. [north] s=10 -> (s'=7);
  75. [south] s=10 -> (s'=12);
  76. [east] s=11 -> (s'=11);
  77. [west] s=11 -> (s'=11);
  78. [north] s=11 -> (s'=8);
  79. [south] s=11 -> (s'=11);
  80. [east] s=12 -> (s'=12);
  81. [west] s=12 -> (s'=12);
  82. [north] s=12 -> (s'=10);
  83. [south] s=12 -> (s'=12);
  84. // loop when we reach the target
  85. [done] s=13 -> true;
  86. endmodule
  87. // reward structure (number of steps to reach the target)
  88. rewards
  89. [east] true : 1;
  90. [west] true : 1;
  91. [north] true : 1;
  92. [south] true : 1;
  93. endrewards
  94. // target observation
  95. label "goal" = s=13;