У меня проблема с плохим обучением агента при использовании алгоритма аппроксимации Q-функции. Агент стоит на одном месте и не совершает никаких движений. Состояние в моей игре — это позиция игрока и позиция монстра, т. е. ((x, y), (t, w)). Признаками, которые я выбрал для аппроксимации, являются расстояние игрока от выхода, расстояние игрока от монстра, количество доступных действий и возможное столкновение с существом.
Код с игрой и реализация агента предоставлены. ниже.
class Monster:
def __init__(self, pos):
self.pos = pos
def move(self, target_pos, maze, available_actions):
row, col = self.pos
target_row, target_col = target_pos
if random.random() < 0.1:
if available_actions:
action = random.choice(available_actions)
if action == 0: # up
row -= 1
elif action == 1: # down
row += 1
elif action == 2: # left
col -= 1
elif action == 3: # right
col += 1
else:
if row < target_row and maze[row + 1][col] == 0:
row += 1
elif row > target_row and maze[row - 1][col] == 0:
row -= 1
elif col < target_col and maze[row][col + 1] == 0:
col += 1
elif col > target_col and maze[row][col - 1] == 0:
col -= 1
self.pos = (row, col)
class Maze:
def __init__(self, exit_pos = (ROWS - 2, COLS - 1), player_pos = (1, 0), monster_pos = (5, 4)):
self.action_space = [0, 1, 2, 3]
self.screen = None
self.maze = maze
self.exit_pos = exit_pos
self.player_pos = player_pos
self.monster = Monster(monster_pos)
def reset(self):
self.player_pos = (1, 0)
self.monster.pos = (5, 4)
return self.get_actual_state()
def get_actual_state(self):
return (self.player_pos, self.monster.pos)
def step(self, action):
self.player_pos = self.move_player(self.player_pos, action)
monster_actions = self.get_possible_actions(self.monster.pos)
self.monster.move(self.player_pos, self.maze, monster_actions)
done = False
step_reward = -1.0
if self.player_pos == self.monster.pos:
print(f"Agent zginął po spotkaniu z potworem. Koniec gry!")
step_reward = -100.0
done = True
elif self.player_pos == self.exit_pos:
print(f"Wygrana!")
step_reward = 1.0
done = True
return self.get_actual_state(), step_reward, done
def move_player(self, player_pos, action):
row, col = player_pos
if action == 0 and row > 0 and self.maze[row - 1][col] == 0: # up
row -= 1
elif action == 1 and row < ROWS - 1 and self.maze[row + 1][col] == 0: # down
row += 1
elif action == 2 and col > 0 and self.maze[row][col - 1] == 0: # left
col -= 1
elif action == 3 and col < COLS - 1 and self.maze[row][col + 1] == 0: # right
col += 1
return row, col
def render(self):
if not self.screen:
self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
self.screen.fill(BLACK)
self.draw_grid()
pygame.display.flip()
def draw_grid(self):
for row in range(ROWS):
for col in range(COLS):
color = WHITE if self.maze[row][col] == 0 else BLACK
pygame.draw.rect(self.screen, color, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE))
pygame.draw.rect(self.screen, BLACK, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE), 1)
self.screen.blit(exit_image, (self.exit_pos[1] * TILE_SIZE, self.exit_pos[0] * TILE_SIZE))
self.screen.blit(start_image, (0 * TILE_SIZE, 1 * TILE_SIZE))
self.screen.blit(player_image, (self.player_pos[1] * TILE_SIZE, self.player_pos[0] * TILE_SIZE))
self.screen.blit(monster_image, (self.monster.pos[1] * TILE_SIZE, self.monster.pos[0] * TILE_SIZE))
def get_possible_actions(self, pos):
row, col = pos
actions = []
if row > 0 and self.maze[row - 1][col] == 0:
actions.append(0) # up
if row < ROWS - 1 and self.maze[row + 1][col] == 0:
actions.append(1) # down
if col > 0 and self.maze[row][col - 1] == 0:
actions.append(2) # left
if col < COLS - 1 and self.maze[row][col + 1] == 0:
actions.append(3) # right
return actions
def close(self):
pygame.quit()
def get_all_states(self):
return [(row, col) for row in range(ROWS) for col in range(COLS) if self.maze[row][col] == 0]
class ApproximateQLearningAgent:
def __init__(self, alpha, epsilon, discount, num_features, get_legal_actions, min_epsilon = 0.05, epsilon_decay = 0.995):
self.alpha = alpha
self.epsilon = epsilon
self.discount = discount
self.weights = np.random.uniform(-0.1, 0.1, (4, num_features))
self.min_epsilon = min_epsilon
self.epsilon_decay = epsilon_decay
self.get_legal_actions = get_legal_actions
def get_features(self, state):
player_pos, monster_pos = state
player_row, player_col = player_pos
exit_row, exit_col = (ROWS - 2, COLS - 1)
monster_row, monster_col = monster_pos
bias = 1.0
exit_dist = (abs(player_row - exit_row) + abs(player_col - exit_col)) / (ROWS + COLS - 2)
monster_dist = (abs(player_row - monster_row) + abs(player_col - monster_col)) / (ROWS + COLS - 2)
available_moves = len(self.get_legal_actions(state)) / 4.0
collision_risk = 1.0 if abs(player_row - monster_row) + abs(player_col - monster_col)
Подробнее здесь: https://stackoverflow.com/questions/793 ... unner-game
Задача с линейной аппроксимацией Q-функции для игры в лабиринт ⇐ Python
Программы на Python
-
Anonymous
1737129192
Anonymous
У меня проблема с плохим обучением агента при использовании алгоритма аппроксимации Q-функции. Агент стоит на одном месте и не совершает никаких движений. Состояние в моей игре — это позиция игрока и позиция монстра, т. е. ((x, y), (t, w)). Признаками, которые я выбрал для аппроксимации, являются расстояние игрока от выхода, расстояние игрока от монстра, количество доступных действий и возможное столкновение с существом.
Код с игрой и реализация агента предоставлены. ниже.
class Monster:
def __init__(self, pos):
self.pos = pos
def move(self, target_pos, maze, available_actions):
row, col = self.pos
target_row, target_col = target_pos
if random.random() < 0.1:
if available_actions:
action = random.choice(available_actions)
if action == 0: # up
row -= 1
elif action == 1: # down
row += 1
elif action == 2: # left
col -= 1
elif action == 3: # right
col += 1
else:
if row < target_row and maze[row + 1][col] == 0:
row += 1
elif row > target_row and maze[row - 1][col] == 0:
row -= 1
elif col < target_col and maze[row][col + 1] == 0:
col += 1
elif col > target_col and maze[row][col - 1] == 0:
col -= 1
self.pos = (row, col)
class Maze:
def __init__(self, exit_pos = (ROWS - 2, COLS - 1), player_pos = (1, 0), monster_pos = (5, 4)):
self.action_space = [0, 1, 2, 3]
self.screen = None
self.maze = maze
self.exit_pos = exit_pos
self.player_pos = player_pos
self.monster = Monster(monster_pos)
def reset(self):
self.player_pos = (1, 0)
self.monster.pos = (5, 4)
return self.get_actual_state()
def get_actual_state(self):
return (self.player_pos, self.monster.pos)
def step(self, action):
self.player_pos = self.move_player(self.player_pos, action)
monster_actions = self.get_possible_actions(self.monster.pos)
self.monster.move(self.player_pos, self.maze, monster_actions)
done = False
step_reward = -1.0
if self.player_pos == self.monster.pos:
print(f"Agent zginął po spotkaniu z potworem. Koniec gry!")
step_reward = -100.0
done = True
elif self.player_pos == self.exit_pos:
print(f"Wygrana!")
step_reward = 1.0
done = True
return self.get_actual_state(), step_reward, done
def move_player(self, player_pos, action):
row, col = player_pos
if action == 0 and row > 0 and self.maze[row - 1][col] == 0: # up
row -= 1
elif action == 1 and row < ROWS - 1 and self.maze[row + 1][col] == 0: # down
row += 1
elif action == 2 and col > 0 and self.maze[row][col - 1] == 0: # left
col -= 1
elif action == 3 and col < COLS - 1 and self.maze[row][col + 1] == 0: # right
col += 1
return row, col
def render(self):
if not self.screen:
self.screen = pygame.display.set_mode((WIDTH, HEIGHT))
self.screen.fill(BLACK)
self.draw_grid()
pygame.display.flip()
def draw_grid(self):
for row in range(ROWS):
for col in range(COLS):
color = WHITE if self.maze[row][col] == 0 else BLACK
pygame.draw.rect(self.screen, color, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE))
pygame.draw.rect(self.screen, BLACK, (col * TILE_SIZE, row * TILE_SIZE, TILE_SIZE, TILE_SIZE), 1)
self.screen.blit(exit_image, (self.exit_pos[1] * TILE_SIZE, self.exit_pos[0] * TILE_SIZE))
self.screen.blit(start_image, (0 * TILE_SIZE, 1 * TILE_SIZE))
self.screen.blit(player_image, (self.player_pos[1] * TILE_SIZE, self.player_pos[0] * TILE_SIZE))
self.screen.blit(monster_image, (self.monster.pos[1] * TILE_SIZE, self.monster.pos[0] * TILE_SIZE))
def get_possible_actions(self, pos):
row, col = pos
actions = []
if row > 0 and self.maze[row - 1][col] == 0:
actions.append(0) # up
if row < ROWS - 1 and self.maze[row + 1][col] == 0:
actions.append(1) # down
if col > 0 and self.maze[row][col - 1] == 0:
actions.append(2) # left
if col < COLS - 1 and self.maze[row][col + 1] == 0:
actions.append(3) # right
return actions
def close(self):
pygame.quit()
def get_all_states(self):
return [(row, col) for row in range(ROWS) for col in range(COLS) if self.maze[row][col] == 0]
class ApproximateQLearningAgent:
def __init__(self, alpha, epsilon, discount, num_features, get_legal_actions, min_epsilon = 0.05, epsilon_decay = 0.995):
self.alpha = alpha
self.epsilon = epsilon
self.discount = discount
self.weights = np.random.uniform(-0.1, 0.1, (4, num_features))
self.min_epsilon = min_epsilon
self.epsilon_decay = epsilon_decay
self.get_legal_actions = get_legal_actions
def get_features(self, state):
player_pos, monster_pos = state
player_row, player_col = player_pos
exit_row, exit_col = (ROWS - 2, COLS - 1)
monster_row, monster_col = monster_pos
bias = 1.0
exit_dist = (abs(player_row - exit_row) + abs(player_col - exit_col)) / (ROWS + COLS - 2)
monster_dist = (abs(player_row - monster_row) + abs(player_col - monster_col)) / (ROWS + COLS - 2)
available_moves = len(self.get_legal_actions(state)) / 4.0
collision_risk = 1.0 if abs(player_row - monster_row) + abs(player_col - monster_col)
Подробнее здесь: [url]https://stackoverflow.com/questions/79365327/problem-with-linear-q-function-approximation-for-a-maze-runner-game[/url]
Ответить
1 сообщение
• Страница 1 из 1
Перейти
- Кемерово-IT
- ↳ Javascript
- ↳ C#
- ↳ JAVA
- ↳ Elasticsearch aggregation
- ↳ Python
- ↳ Php
- ↳ Android
- ↳ Html
- ↳ Jquery
- ↳ C++
- ↳ IOS
- ↳ CSS
- ↳ Excel
- ↳ Linux
- ↳ Apache
- ↳ MySql
- Детский мир
- Для души
- ↳ Музыкальные инструменты даром
- ↳ Печатная продукция даром
- Внешняя красота и здоровье
- ↳ Одежда и обувь для взрослых даром
- ↳ Товары для здоровья
- ↳ Физкультура и спорт
- Техника - даром!
- ↳ Автомобилистам
- ↳ Компьютерная техника
- ↳ Плиты: газовые и электрические
- ↳ Холодильники
- ↳ Стиральные машины
- ↳ Телевизоры
- ↳ Телефоны, смартфоны, плашеты
- ↳ Швейные машинки
- ↳ Прочая электроника и техника
- ↳ Фототехника
- Ремонт и интерьер
- ↳ Стройматериалы, инструмент
- ↳ Мебель и предметы интерьера даром
- ↳ Cантехника
- Другие темы
- ↳ Разное даром
- ↳ Давай меняться!
- ↳ Отдам\возьму за копеечку
- ↳ Работа и подработка в Кемерове
- ↳ Давай с тобой поговорим...
Мобильная версия