Engineering AI Agents

# aima_gridworld_env.py

import gymnasium as gym
from gymnasium import spaces
from minigrid.core.grid import Grid
from minigrid.minigrid_env import MiniGridEnv

class AIMAGridworldEnv(MiniGridEnv):
    """
    4×3 Gridworld from AIMA Ch.17, Fig 17.1:
      - Grid size: width=4, height=3
      - Wall at (1,1)
      - Terminal +1 at (3,2); Terminal –1 at (3,1)
      - Step cost for non-terminal moves (default 0.0)
      - Deterministic actions: 0=right,1=down,2=left,3=up
    """
    def __init__(self, step_cost: float = 0.0):
        super().__init__(
            grid_size=(4, 3),
            max_steps=100,
            see_through_walls=True
        )
        # override action space to 4 direct moves
        self.action_space = spaces.Discrete(4)
        self.step_cost = step_cost
        self.goal_reward = 1.0
        self.pit_reward = -1.0

        # start state (as in Fig 17.1): lower-left corner
        self.start_pos = (0, 0)
        self.start_dir = 0

    def _gen_grid(self, width, height):
        # create empty grid and fill outer walls
        self.grid = Grid(width, height)
        self.grid.wall_rect(0, 0, width, height)

        # inner wall at (1,1)
        self.grid.set(1, 1, Wall())

        # place goal (+1) at (3,2)
        goal = Goal()
        self.grid.set(3, 2, goal)

        # place pit (-1) at (3,1) using Lava
        lava = Lava()
        self.grid.set(3, 1, lava)

        # set agent start
        self.start_pos = self.start_pos
        self.start_dir = self.start_dir

        self.mission = "reach +1 and avoid –1"

    def step(self, action):
        """
        Interpret action ∈ {0,1,2,3} as a move in (→,↓,←,↑).
        Walls/boundaries block movement.
        Returns (obs, reward, terminated, truncated, info).
        """
        # map action to dx,dy
        dirs = {
            0: (1, 0),   # right
            1: (0, 1),   # down
            2: (-1, 0),  # left
            3: (0, -1),  # up
        }
        dx, dy = dirs[action]
        x, y = self.agent_pos
        nx, ny = x + dx, y + dy

        # check for wall or out-of-bounds
        if not (0 <= nx < self.width and 0 <= ny < self.height) or \
           not self.grid.get(nx, ny).can_overlap(self.agent):
            nx, ny = x, y

        self.agent_pos = (nx, ny)
        obs = self.gen_obs()

        # terminal checks
        if (nx, ny) == (3, 2):
            return obs, self.goal_reward, True, False, {}
        if (nx, ny) == (3, 1):
            return obs, self.pit_reward, True, False, {}

        # non-terminal step cost
        return obs, self.step_cost, False, False, {}

import numpy as np
from minigrid.wrappers import FullyObsWrapper

# wrap for full‐grid observations
env = FullyObsWrapper(AIMAGridworldEnv(step_cost=-0.04))

# now enumerate and solve under uniform random policy
v, states = solve_policy_linear_minigrid(env, gamma=0.99)

# print as before
for idx, sdict in enumerate(states):
    pos = sdict['agent_pos']
    d   = sdict['agent_dir']
    print(f"s={idx:2d}, pos={pos}, dir={d}  V={v[idx]:6.3f}")

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[4], line 5
      2 from minigrid.wrappers import FullyObsWrapper
      4 # wrap for full‐grid observations
----> 5 env = FullyObsWrapper(AIMAGridworldEnv(step_cost=-0.04))
      7 # now enumerate and solve under uniform random policy
      8 v, states = solve_policy_linear_minigrid(env, gamma=0.99)

Cell In[3], line 18, in AIMAGridworldEnv.__init__(self, step_cost)
     17 def __init__(self, step_cost: float = 0.0):
---> 18     super().__init__(
     19         grid_size=(4, 3),
     20         max_steps=100,
     21         see_through_walls=True
     22     )
     23     # override action space to 4 direct moves
     24     self.action_space = spaces.Discrete(4)

TypeError: MiniGridEnv.__init__() missing 1 required positional argument: 'mission_space'