Adding Environments
To create a new environment, subclass Environment and implement four methods.
Template
import numpy as np
from tinyrl import Environment
class MyEnv(Environment):
def __init__(self):
self.state_dim = 4 # observation dimensionality
self.n_actions = 2 # for discrete action spaces
# self.action_dim = 3 # for continuous action spaces
self.max_steps = 200 # episode step limit
# ... your state variables ...
def reset(self) -> np.ndarray:
"""Reset to initial state, return first observation."""
self.steps = 0
# ... reset your state ...
return self._get_obs()
def step(self, action) -> tuple[np.ndarray, float, bool]:
"""Take action, return (observation, reward, done)."""
self.steps += 1
# ... apply action, compute reward ...
done = self.steps >= self.max_steps # or some goal condition
return self._get_obs(), reward, done
def _get_obs(self) -> np.ndarray:
"""Return current state as numpy array."""
# ... build observation vector ...
return np.array([...], dtype=np.float32)
def render(self, action=None, step_num=0):
"""Print or display the current state."""
print(f"Step {step_num}: {self._get_obs()}")
Checklist
- Set
state_dimandmax_stepsin__init__ - Set
n_actions(discrete) oraction_dim(continuous) reset()returns the initial observation and resets all statestep()returns(obs, reward, done)— make suredone=Truewhen the episode should end_get_obs()returns a numpy array with shape(state_dim,)render()displays something useful for debugging
Using with Runner
Once implemented, your environment works with Runner automatically:
from tinyrl import Runner
env = MyEnv()
runner = Runner(env)
result = runner.run_episode(my_policy, visualize=True)
runner.plot()