Agent Class

The Agent class extends the Behavior class and represents a machine learning agent that interacts with the game. It implements a reinforcement learning approach for predicting actions.

Exemple

Here is an example from the FlappyBird game, where the agent is a bird that learns to fly by jumping over obstacles.

class Fly(Agent):

    def __init__(self, model_path: str = None):
        super().__init__(
            observation_space=spaces.Dict({
                "velocity": spaces.Box(-float('inf'), 300, shape=(2,), dtype=float),
                "X": spaces.Box(-float('inf'), float('inf'), shape=(1,), dtype=float),
                "Y": spaces.Box(-float('inf'), float('inf'), shape=(1,), dtype=float),
                "Y1": spaces.Box(-float('inf'), float('inf'), shape=(1,), dtype=float),
            }),
            action_space=spaces.Discrete(2),
            max_episode_length=1000,
            total_timesteps=int(3_000_000),
            algorithm_type="MultiInputPolicy",
            algorithm=stable_baselines3.PPO,
            save_path=model_path if model_path is not None else "./Tests/PipeSizeTest/models/fly",
            eval_freq=10000,
        )

        self.points = 0

    def observation(self):
        velocity = self.context.Game.Bird.velocity
        return {"velocity": np.array(velocity, dtype=float),
                "X": np.array([self.context.X], dtype=float),
                "Y": np.array([self.context.Y], dtype=float),
                "Y1": np.array([self.context.Y1], dtype=float)}

    def reward(self):
        if self.context.score > self.points:
            self.points = self.context.score
            return 1

        if self.context.dead or self.context.Game.Bird.position[1] < 0:
            return -1
        return 0

    def terminated(self):
        terminated = self.context.dead != 0 or self.context.score >= 2 or self.context.Game.Bird.position[1] < 0
        if terminated:
            self.points = 0
        return terminated

    def actions(self, raw_actions):
        if raw_actions == 1:
            return [GodotAction("jump")]
        return []

Note

As you can see I defined a attribute points to store the current score of the agent. In the terminated method, I reset the points to 0 when the episode is terminated.