You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

121 lines
3.8 KiB

3 months ago
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "## Example how to combine shielding with rllibs ppo algorithm."
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": null,
  13. "metadata": {},
  14. "outputs": [],
  15. "source": [
  16. "import gymnasium as gym\n",
  17. "\n",
  18. "import minigrid\n",
  19. "\n",
  20. "from ray import tune, air\n",
  21. "from ray.tune import register_env\n",
  22. "from ray.rllib.algorithms.ppo import PPOConfig\n",
  23. "from ray.tune.logger import pretty_print\n",
  24. "from ray.rllib.models import ModelCatalog\n",
  25. "\n",
  26. "\n",
  27. "from torch_action_mask_model import TorchActionMaskModel\n",
  28. "from wrappers import OneHotShieldingWrapper, MiniGridShieldingWrapper\n",
  29. "from shieldhandlers import MiniGridShieldHandler, create_shield_query\n",
  30. " "
  31. ]
  32. },
  33. {
  34. "cell_type": "code",
  35. "execution_count": null,
  36. "metadata": {},
  37. "outputs": [],
  38. "source": [
  39. "def shielding_env_creater(config):\n",
  40. " name = config.get(\"name\", \"MiniGrid-LavaCrossingS9N1-v0\")\n",
  41. " framestack = config.get(\"framestack\", 4)\n",
  42. " \n",
  43. " shield_creator = MiniGridShieldHandler(\"grid.txt\", \"./main\", \"grid.prism\", \"Pmax=? [G !\\\"AgentIsInLavaAndNotDone\\\"]\")\n",
  44. " \n",
  45. " env = gym.make(name)\n",
  46. " env = MiniGridShieldingWrapper(env, shield_creator=shield_creator, shield_query_creator=create_shield_query ,mask_actions=True)\n",
  47. " env = OneHotShieldingWrapper(env, config.vector_index if hasattr(config, \"vector_index\") else 0,\n",
  48. " framestack=framestack)\n",
  49. " \n",
  50. " return env\n",
  51. "\n",
  52. "\n",
  53. "def register_minigrid_shielding_env():\n",
  54. " env_name = \"mini-grid-shielding\"\n",
  55. " register_env(env_name, shielding_env_creater)\n",
  56. " ModelCatalog.register_custom_model(\n",
  57. " \"shielding_model\", \n",
  58. " TorchActionMaskModel)"
  59. ]
  60. },
  61. {
  62. "cell_type": "code",
  63. "execution_count": null,
  64. "metadata": {},
  65. "outputs": [],
  66. "source": [
  67. "register_minigrid_shielding_env()\n",
  68. "\n",
  69. "\n",
  70. "config = (PPOConfig()\n",
  71. " .rollouts(num_rollout_workers=1)\n",
  72. " .resources(num_gpus=0)\n",
  73. " .environment(env=\"mini-grid-shielding\", env_config={\"name\": \"MiniGrid-LavaCrossingS9N1-v0\"})\n",
  74. " .framework(\"torch\")\n",
  75. " .rl_module(_enable_rl_module_api = False)\n",
  76. " .training(_enable_learner_api=False ,model={\n",
  77. " \"custom_model\": \"shielding_model\"\n",
  78. " }))\n",
  79. "\n",
  80. "tuner = tune.Tuner(\"PPO\",\n",
  81. " tune_config=tune.TuneConfig(\n",
  82. " metric=\"episode_reward_mean\",\n",
  83. " mode=\"max\",\n",
  84. " num_samples=1,\n",
  85. " \n",
  86. " ),\n",
  87. " run_config=air.RunConfig(\n",
  88. " stop = {\"episode_reward_mean\": 94,\n",
  89. " \"timesteps_total\": 12000,\n",
  90. " \"training_iteration\": 12}, \n",
  91. " checkpoint_config=air.CheckpointConfig(checkpoint_at_end=True, num_to_keep=2 ),\n",
  92. " ),\n",
  93. " param_space=config,)\n",
  94. "\n",
  95. "tuner.fit()"
  96. ]
  97. }
  98. ],
  99. "metadata": {
  100. "kernelspec": {
  101. "display_name": "env",
  102. "language": "python",
  103. "name": "python3"
  104. },
  105. "language_info": {
  106. "codemirror_mode": {
  107. "name": "ipython",
  108. "version": 3
  109. },
  110. "file_extension": ".py",
  111. "mimetype": "text/x-python",
  112. "name": "python",
  113. "nbconvert_exporter": "python",
  114. "pygments_lexer": "ipython3",
  115. "version": "3.10.12"
  116. },
  117. "orig_nbformat": 4
  118. },
  119. "nbformat": 4,
  120. "nbformat_minor": 2
  121. }