Код с игрой и реализация агента предоставлены. внизу — игрок — синий, монстр — красный, выход — зеленый, начало — желтый.
Игра выглядит так:

import pygame
import random
import numpy as np
ROWS, COLS = 11, 11
TILE_SIZE = 32
WIDTH, HEIGHT = COLS * TILE_SIZE, ROWS * TILE_SIZE
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
BLUE = (0, 0, 255)
maze = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
[1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]
class Monster:
def __init__(self, pos):
self.pos = pos
def move(self, target_pos, maze, available_actions):
row, col = self.pos
target_row, target_col = target_pos
if random.random() < 0.1:
if available_actions:
action = random.choice(available_actions)
if action == 0: # up
row -= 1
elif action == 1: # down
row += 1
elif action == 2: # left
col -= 1
elif action == 3: # right
col += 1
else:
if row < target_row and maze[row + 1][col] == 0:
row += 1
elif row > target_row and maze[row - 1][col] == 0:
row -= 1
elif col < target_col and maze[row][col + 1] == 0:
col += 1
elif col > target_col and maze[row][col - 1] == 0:
col -= 1
self.pos = (row, col)
class Maze:
def __init__(self, exit_pos = (ROWS - 2, COLS - 1), player_pos = (1, 0), monster_pos = (5, 4)):
self.action_space = [0, 1, 2, 3]
self.screen = None
self.maze = maze
self.exit_pos = exit_pos
self.player_pos = player_pos
self.monster = Monster(monster_pos)
def reset(self):
self.player_pos = (1, 0)
self.monster.pos = (5, 4)
return self.get_actual_state()
def get_actual_state(self):
return (self.player_pos, self.monster.pos)
def step(self, action):
self.player_pos = self.move_player(self.player_pos, action)
monster_actions = self.get_possible_actions(self.monster.pos)
self.monster.move(self.player_pos, self.maze, monster_actions)
done = False
step_reward = -1.0
if self.player_pos == self.monster.pos:
print(f"Agent zginął po spotkaniu z potworem. Koniec gry!")
step_reward = -100.0
done = True
elif self.player_pos == self.exit_pos:
print(f"Wygrana!")
step_reward = 1.0
done = True
return self.get_actual_state(), step_reward, done
def move_player(self, player_pos, action):
row, col = player_pos
if action == 0 and row > 0 and self.maze[row - 1][col] == 0: # up
row -= 1
elif action == 1 and row < ROWS - 1 and self.maze[row + 1][col] == 0: # down
row += 1
elif action == 2 and col > 0 and self.maze[row][col - 1] == 0: # left
col -= 1
elif action == 3 and col < COLS - 1 and self.maze[row][col + 1] == 0: # right
col += 1
return row, col
def render(self):
if not self.screen:
self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
self.screen.fill(BLACK)
self.draw_grid()
pygame.display.flip()
def draw_grid(self):
for row in range(ROWS):
for col in range(COLS):
# Kolor tła (białe dla ścieżek, czarne dla ścian)
color = WHITE if self.maze[row][col] == 0 else BLACK
pygame.draw.rect(self.screen, color, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE))
pygame.draw.rect(self.screen, BLACK, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE), 1)
# Rysowanie wyjścia (zielony)
pygame.draw.rect(self.screen, (0, 255, 0),
(self.exit_pos[1] * TILE_SIZE, self.exit_pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))
# Rysowanie startu (żółty)
pygame.draw.rect(self.screen, (255, 255, 0),
(0 * TILE_SIZE, 1 * TILE_SIZE, TILE_SIZE, TILE_SIZE))
# Rysowanie gracza (niebieski)
pygame.draw.rect(self.screen, BLUE,
(self.player_pos[1] * TILE_SIZE, self.player_pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))
# Rysowanie potwora (czerwony)
pygame.draw.rect(self.screen, (255, 0, 0),
(self.monster.pos[1] * TILE_SIZE, self.monster.pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))
def get_possible_actions(self, pos):
row, col = pos
actions = []
if row > 0 and self.maze[row - 1][col] == 0:
actions.append(0) # up
if row < ROWS - 1 and self.maze[row + 1][col] == 0:
actions.append(1) # down
if col > 0 and self.maze[row][col - 1] == 0:
actions.append(2) # left
if col < COLS - 1 and self.maze[row][col + 1] == 0:
actions.append(3) # right
return actions
def close(self):
pygame.quit()
def get_all_states(self):
return [(row, col) for row in range(ROWS) for col in range(COLS) if self.maze[row][col] == 0]
class ApproximateQLearningAgent:
def __init__(self, alpha, epsilon, discount, num_features, get_legal_actions, min_epsilon = 0.05, epsilon_decay = 0.995):
self.alpha = alpha
self.epsilon = epsilon
self.discount = discount
self.weights = np.random.uniform(-0.1, 0.1, (4, num_features))
self.min_epsilon = min_epsilon
self.epsilon_decay = epsilon_decay
self.get_legal_actions = get_legal_actions
def get_features(self, state):
player_pos, monster_pos = state
player_row, player_col = player_pos
exit_row, exit_col = (ROWS - 2, COLS - 1)
monster_row, monster_col = monster_pos
bias = 1.0
exit_dist = (abs(player_row - exit_row) + abs(player_col - exit_col)) / (ROWS + COLS - 2)
monster_dist = (abs(player_row - monster_row) + abs(player_col - monster_col)) / (ROWS + COLS - 2)
available_moves = len(self.get_legal_actions(state)) / 4.0
collision_risk = 1.0 if abs(player_row - monster_row) + abs(player_col - monster_col)
Подробнее здесь: https://stackoverflow.com/questions/793 ... unner-game
Мобильная версия