You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
73 lines
2.1 KiB
73 lines
2.1 KiB
// GRID WORLD MODEL OF A SEMIAUTONOMOUS EXPLORING ROBOT
|
|
// Sebastian Junges, RWTH Aachen University
|
|
// As described in
|
|
// Junges, Jansen, Dehnert, Topcu, Katoen:
|
|
// Safety Constrained Reinforcement Learning
|
|
// Proc. of TACAS’16
|
|
|
|
mdp
|
|
|
|
//PARAMETERS
|
|
//The difference of the reliability of the channels between the worst and at the best position
|
|
const double pLDiff=0.1;
|
|
const double pHDiff=0.1;
|
|
//Scaling factor for the minimum reliability of the channels
|
|
const double pL;//=8/9;
|
|
const double pH;//=1;
|
|
|
|
//CONSTANTS
|
|
//The minimum reliablities
|
|
const double pLMin=pL*(1-pLDiff);
|
|
const double pHMin=pH*(1-pHDiff);
|
|
|
|
// Grid size
|
|
const int Xsize;
|
|
const int Ysize;
|
|
// Number of tries before an error
|
|
const int MAXTRIES;
|
|
// Ball within the robot has to move.
|
|
const int B;
|
|
|
|
|
|
formula T = (xLoc = Xsize & yLoc = Ysize);
|
|
|
|
|
|
module robot
|
|
xLoc : [1..Ysize] init 1;
|
|
yLoc : [1..Xsize] init 1;
|
|
unreported : [0..B] init 0;
|
|
hasSendNow : bool init false;
|
|
tries : [0..MAXTRIES] init 0;
|
|
|
|
[up] xLoc < Xsize & !T & hasSendNow -> 1:(xLoc'=xLoc+1) & (unreported' = 0) & (hasSendNow'=false);
|
|
[up] xLoc < Xsize & !T & !hasSendNow -> 1:(xLoc'=xLoc+1) & (unreported'=min(unreported+1, B));
|
|
[right] yLoc < Ysize & !T & hasSendNow -> 1:(yLoc'=yLoc+1) & (unreported' = 0)& (hasSendNow'=false);
|
|
[right] yLoc < Ysize & !T & !hasSendNow -> 1:(yLoc'=yLoc+1) & (unreported'=min(unreported+1,B));
|
|
[sendL] !hasSendNow & !T & tries < MAXTRIES -> (pLMin + pLDiff * xLoc/Xsize):(hasSendNow'=true) & (tries'=0) + (1 - pLMin - pLDiff * xLoc/Xsize): (tries'=tries+1);
|
|
[sendH] !hasSendNow & !T & tries < MAXTRIES -> (pHMin + pHDiff * yLoc/Ysize):(hasSendNow'=true) & (tries'=0) + (1 - pHMin - pHDiff * yLoc/Ysize): (tries'=tries+1);
|
|
[done] T -> 1:true;
|
|
endmodule
|
|
|
|
rewards "sendbased"
|
|
[up] true: 0.03;
|
|
[right] true: 0.03;
|
|
[sendL] true: max(10, min(11 + xLoc - yLoc, 20));
|
|
[sendH] true: min(13 + xLoc + yLoc, 24);
|
|
endrewards
|
|
|
|
rewards "sendbased_lower"
|
|
[up] true: 0.03;
|
|
[right] true: 0.03;
|
|
[sendL] true: 10;
|
|
[sendH] true: 12;
|
|
endrewards
|
|
|
|
rewards "sendbased_upper"
|
|
[up] true: 0.03;
|
|
[right] true: 0.03;
|
|
[sendL] true: 20;
|
|
[sendH] true: 24;
|
|
endrewards
|
|
|
|
label "Target" = T;
|
|
label "Crash" = unreported=B;
|