Задача с линейной аппроксимацией Q-функции для игры в лабиринт

Задача с линейной аппроксимацией Q-функции для игры в лабиринт ⇐ Python

1 сообщение • Страница 1 из 1

Anonymous

Задача с линейной аппроксимацией Q-функции для игры в лабиринт

Цитата

Сообщение Anonymous » 17 янв 2025, 22:23

У меня проблема с плохим обучением агента при использовании алгоритма аппроксимации Q-функции. Агент стоит на одном месте и не совершает никаких движений. Состояние в моей игре — это позиция игрока и позиция монстра, т. е. ((x, y), (t, w)). Признаками, которые я выбрал для аппроксимации, являются расстояние игрока от выхода, расстояние игрока от монстра, количество доступных действий и возможное столкновение с существом.
Код с игрой и реализация агента предоставлены. внизу — игрок — синий, монстр — красный, выход — зеленый, начало — желтый.
Игра выглядит так:

import pygame
import random
import numpy as np

ROWS, COLS = 11, 11
TILE_SIZE = 32
WIDTH, HEIGHT = COLS * TILE_SIZE, ROWS * TILE_SIZE
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
BLUE = (0, 0, 255)

maze = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
[1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]

class Monster:
def __init__(self, pos):
self.pos = pos

def move(self, target_pos, maze, available_actions):
row, col = self.pos
target_row, target_col = target_pos

if random.random() < 0.1:
if available_actions:
action = random.choice(available_actions)
if action == 0: # up
row -= 1
elif action == 1: # down
row += 1
elif action == 2: # left
col -= 1
elif action == 3: # right
col += 1
else:
if row < target_row and maze[row + 1][col] == 0:
row += 1
elif row > target_row and maze[row - 1][col] == 0:
row -= 1
elif col < target_col and maze[row][col + 1] == 0:
col += 1
elif col > target_col and maze[row][col - 1] == 0:
col -= 1

self.pos = (row, col)

class Maze:
def __init__(self, exit_pos = (ROWS - 2, COLS - 1), player_pos = (1, 0), monster_pos = (5, 4)):
self.action_space = [0, 1, 2, 3]
self.screen = None
self.maze = maze
self.exit_pos = exit_pos
self.player_pos = player_pos
self.monster = Monster(monster_pos)

def reset(self):
self.player_pos = (1, 0)
self.monster.pos = (5, 4)
return self.get_actual_state()

def get_actual_state(self):
return (self.player_pos, self.monster.pos)

def step(self, action):
self.player_pos = self.move_player(self.player_pos, action)

monster_actions = self.get_possible_actions(self.monster.pos)
self.monster.move(self.player_pos, self.maze, monster_actions)

done = False
step_reward = -1.0

if self.player_pos == self.monster.pos:
print(f"Agent zginął po spotkaniu z potworem. Koniec gry!")
step_reward = -100.0
done = True

elif self.player_pos == self.exit_pos:
print(f"Wygrana!")
step_reward = 1.0
done = True

return self.get_actual_state(), step_reward, done

def move_player(self, player_pos, action):
row, col = player_pos
if action == 0 and row > 0 and self.maze[row - 1][col] == 0: # up
row -= 1
elif action == 1 and row < ROWS - 1 and self.maze[row + 1][col] == 0: # down
row += 1
elif action == 2 and col > 0 and self.maze[row][col - 1] == 0: # left
col -= 1
elif action == 3 and col < COLS - 1 and self.maze[row][col + 1] == 0: # right
col += 1
return row, col

def render(self):
if not self.screen:
self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
self.screen.fill(BLACK)
self.draw_grid()
pygame.display.flip()

def draw_grid(self):
for row in range(ROWS):
for col in range(COLS):
# Kolor tła (białe dla ścieżek, czarne dla ścian)
color = WHITE if self.maze[row][col] == 0 else BLACK
pygame.draw.rect(self.screen, color, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE))
pygame.draw.rect(self.screen, BLACK, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE), 1)

# Rysowanie wyjścia (zielony)
pygame.draw.rect(self.screen, (0, 255, 0),
(self.exit_pos[1] * TILE_SIZE, self.exit_pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))

# Rysowanie startu (żółty)
pygame.draw.rect(self.screen, (255, 255, 0),
(0 * TILE_SIZE, 1 * TILE_SIZE, TILE_SIZE, TILE_SIZE))

# Rysowanie gracza (niebieski)
pygame.draw.rect(self.screen, BLUE,
(self.player_pos[1] * TILE_SIZE, self.player_pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))

# Rysowanie potwora (czerwony)
pygame.draw.rect(self.screen, (255, 0, 0),
(self.monster.pos[1] * TILE_SIZE, self.monster.pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))

def get_possible_actions(self, pos):
row, col = pos
actions = []
if row > 0 and self.maze[row - 1][col] == 0:
actions.append(0) # up
if row < ROWS - 1 and self.maze[row + 1][col] == 0:
actions.append(1) # down
if col > 0 and self.maze[row][col - 1] == 0:
actions.append(2) # left
if col < COLS - 1 and self.maze[row][col + 1] == 0:
actions.append(3) # right
return actions

def close(self):
pygame.quit()

def get_all_states(self):
return [(row, col) for row in range(ROWS) for col in range(COLS) if self.maze[row][col] == 0]

class ApproximateQLearningAgent:
def __init__(self, alpha, epsilon, discount, num_features, get_legal_actions, min_epsilon = 0.05, epsilon_decay = 0.995):
self.alpha = alpha
self.epsilon = epsilon
self.discount = discount
self.weights = np.random.uniform(-0.1, 0.1, (4, num_features))
self.min_epsilon = min_epsilon
self.epsilon_decay = epsilon_decay
self.get_legal_actions = get_legal_actions

def get_features(self, state):
player_pos, monster_pos = state
player_row, player_col = player_pos
exit_row, exit_col = (ROWS - 2, COLS - 1)
monster_row, monster_col = monster_pos

bias = 1.0
exit_dist = (abs(player_row - exit_row) + abs(player_col - exit_col)) / (ROWS + COLS - 2)
monster_dist = (abs(player_row - monster_row) + abs(player_col - monster_col)) / (ROWS + COLS - 2)
available_moves = len(self.get_legal_actions(state)) / 4.0
collision_risk = 1.0 if abs(player_row - monster_row) + abs(player_col - monster_col)

Подробнее здесь: https://stackoverflow.com/questions/793 ... unner-game

1737141830

Anonymous

У меня проблема с плохим обучением агента при использовании алгоритма аппроксимации Q-функции. Агент стоит на одном месте и не совершает никаких движений. Состояние в моей игре — это позиция игрока и позиция монстра, т. е. ((x, y), (t, w)). Признаками, которые я выбрал для аппроксимации, являются расстояние игрока от выхода, расстояние игрока от монстра, количество доступных действий и возможное столкновение с существом.
Код с игрой и реализация агента предоставлены. внизу — [b]игрок — синий, монстр — красный, выход — зеленый, начало — желтый[/b].
Игра выглядит так:
[img]https://i.sstatic.net/4cDYMwLj.png[/img]

import pygame
import random
import numpy as np

ROWS, COLS = 11, 11
TILE_SIZE = 32
WIDTH, HEIGHT = COLS * TILE_SIZE, ROWS * TILE_SIZE
WHITE = (255, 255, 255)
BLACK = (0, 0, 0)
BLUE = (0, 0, 255)

maze = [[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1],
[1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
[1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1],
[1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1],
[1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]]

class Monster:
def __init__(self, pos):
self.pos = pos

def move(self, target_pos, maze, available_actions):
row, col = self.pos
target_row, target_col = target_pos

if random.random() < 0.1:
if available_actions:
action = random.choice(available_actions)
if action == 0:  # up
row -= 1
elif action == 1:  # down
row += 1
elif action == 2:  # left
col -= 1
elif action == 3:  # right
col += 1
else:
if row < target_row and maze[row + 1][col] == 0:
row += 1
elif row > target_row and maze[row - 1][col] == 0:
row -= 1
elif col < target_col and maze[row][col + 1] == 0:
col += 1
elif col > target_col and maze[row][col - 1] == 0:
col -= 1

self.pos = (row, col)

class Maze:
def __init__(self, exit_pos = (ROWS - 2, COLS - 1), player_pos = (1, 0), monster_pos = (5, 4)):
self.action_space = [0, 1, 2, 3]
self.screen = None
self.maze = maze
self.exit_pos = exit_pos
self.player_pos = player_pos
self.monster = Monster(monster_pos)

def reset(self):
self.player_pos = (1, 0)
self.monster.pos = (5, 4)
return self.get_actual_state()

def get_actual_state(self):
return (self.player_pos, self.monster.pos)

def step(self, action):
self.player_pos = self.move_player(self.player_pos, action)

monster_actions = self.get_possible_actions(self.monster.pos)
self.monster.move(self.player_pos, self.maze, monster_actions)

done = False
step_reward = -1.0

if self.player_pos == self.monster.pos:
print(f"Agent zginął po spotkaniu z potworem.  Koniec gry!")
step_reward = -100.0
done = True

elif self.player_pos == self.exit_pos:
print(f"Wygrana!")
step_reward = 1.0
done = True

return self.get_actual_state(), step_reward, done

def move_player(self, player_pos, action):
row, col = player_pos
if action == 0 and row > 0 and self.maze[row - 1][col] == 0:  # up
row -= 1
elif action == 1 and row < ROWS - 1 and self.maze[row + 1][col] == 0:  # down
row += 1
elif action == 2 and col > 0 and self.maze[row][col - 1] == 0:  # left
col -= 1
elif action == 3 and col < COLS - 1 and self.maze[row][col + 1] == 0:  # right
col += 1
return row, col

def render(self):
if not self.screen:
self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
self.screen.fill(BLACK)
self.draw_grid()
pygame.display.flip()

def draw_grid(self):
for row in range(ROWS):
for col in range(COLS):
# Kolor tła (białe dla ścieżek, czarne dla ścian)
color = WHITE if self.maze[row][col] == 0 else BLACK
pygame.draw.rect(self.screen, color, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE))
pygame.draw.rect(self.screen, BLACK, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE), 1)

# Rysowanie wyjścia (zielony)
pygame.draw.rect(self.screen, (0, 255, 0),
(self.exit_pos[1] * TILE_SIZE, self.exit_pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))

# Rysowanie startu (żółty)
pygame.draw.rect(self.screen, (255, 255, 0),
(0 * TILE_SIZE, 1 * TILE_SIZE, TILE_SIZE, TILE_SIZE))

# Rysowanie gracza (niebieski)
pygame.draw.rect(self.screen, BLUE,
(self.player_pos[1] * TILE_SIZE, self.player_pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))

# Rysowanie potwora (czerwony)
pygame.draw.rect(self.screen, (255, 0, 0),
(self.monster.pos[1] * TILE_SIZE, self.monster.pos[0] * TILE_SIZE, TILE_SIZE, TILE_SIZE))

def get_possible_actions(self, pos):
row, col = pos
actions = []
if row > 0 and self.maze[row - 1][col] == 0:
actions.append(0)  # up
if row < ROWS - 1 and self.maze[row + 1][col] == 0:
actions.append(1)  # down
if col > 0 and self.maze[row][col - 1] == 0:
actions.append(2)  # left
if col <  COLS - 1 and self.maze[row][col + 1] == 0:
actions.append(3)  # right
return actions

def close(self):
pygame.quit()

def get_all_states(self):
return [(row, col) for row in range(ROWS) for col in range(COLS) if self.maze[row][col] == 0]

class ApproximateQLearningAgent:
def __init__(self, alpha, epsilon, discount, num_features, get_legal_actions, min_epsilon = 0.05, epsilon_decay = 0.995):
self.alpha = alpha
self.epsilon = epsilon
self.discount = discount
self.weights = np.random.uniform(-0.1, 0.1, (4, num_features))
self.min_epsilon = min_epsilon
self.epsilon_decay = epsilon_decay
self.get_legal_actions = get_legal_actions

def get_features(self, state):
player_pos, monster_pos = state
player_row, player_col = player_pos
exit_row, exit_col = (ROWS - 2, COLS - 1)
monster_row, monster_col = monster_pos

bias = 1.0
exit_dist = (abs(player_row - exit_row) + abs(player_col - exit_col)) / (ROWS + COLS - 2)
monster_dist = (abs(player_row - monster_row) + abs(player_col - monster_col)) / (ROWS + COLS - 2)
available_moves = len(self.get_legal_actions(state)) / 4.0
collision_risk = 1.0 if abs(player_row - monster_row) + abs(player_col - monster_col) 

Подробнее здесь: [url]https://stackoverflow.com/questions/79365327/problem-with-linear-q-function-approximation-for-a-maze-runner-game[/url]