You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
1.5 KiB

  1. // 3x3 grid
  2. // based on Littman, Cassandra and Kaelbling
  3. // Learning policies for partially observable environments: Scaling up
  4. // Technical Report CS, Brown University
  5. pomdp
  6. // only the target is observable which is in the south east corner
  7. observables
  8. o
  9. endobservables
  10. module grid
  11. x : [0..2]; // x coordinate
  12. y : [0..2]; // y coordinate
  13. o : [0..2]; // observables
  14. // 0 - initial observation
  15. // 1 - in the grid (not target)
  16. // 2 - observe target
  17. // initially randomly placed within the grid (not at the target)
  18. [] o=0 -> 1/8 : (o'=1) & (x'=0) & (y'=0)
  19. + 1/8 : (o'=1) & (x'=0) & (y'=1)
  20. + 1/8 : (o'=1) & (x'=0) & (y'=2)
  21. + 1/8 : (o'=1) & (x'=1) & (y'=0)
  22. + 1/8 : (o'=1) & (x'=1) & (y'=1)
  23. + 1/8 : (o'=1) & (x'=1) & (y'=2)
  24. // + 1/8 : (o'=1) & (x'=2) & (y'=0) the target
  25. + 1/8 : (o'=1) & (x'=2) & (y'=1)
  26. + 1/8 : (o'=1) & (x'=2) & (y'=2);
  27. // move around the grid
  28. [east] o=1 & !(x=1 & y=0) -> (x'=min(x+1,2)); // not reached target
  29. [east] o=1 & x=1 & y=0 -> (x'=min(x+1,2)) & (o'=2);
  30. [west] o=1 -> (x'=max(x-1,0)); // not reached target
  31. [north] o=1 -> (y'=min(y+1,2)); // reached target
  32. [south] o=1 & !(x=2 & y=1) -> (y'=max(y-1,0)); // not reached target
  33. [south] o=1 & x=2 & y=1 -> (y'=max(y-1,0)) & (o'=2); // reached target
  34. // reached target
  35. [done] o=2 -> true;
  36. endmodule
  37. // reward structure for number of steps to reach the target
  38. rewards
  39. [east] true : 1;
  40. [west] true : 1;
  41. [north] true : 1;
  42. [south] true : 1;
  43. endrewards
  44. // target observation
  45. label "goal" = o=2;