Я включил MCVE из своего кода ниже:
Код: Выделить всё
main.cc
Код: Выделить всё
#include "node.hpp"
#include
#include
#include
#include
#include
#include
#define DEBUG 0
void print_board(uint16_t ai_board, uint16_t enemy_board, char ai, char player) {
char board[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' '};
for (int i = 0; i < 9; i++) {
if (ai_board & (1 0)
return;
for(int pos = 0; pos < 9; pos++) {
bool p1 = this->ai_board & (1 enemy_board & (1 arena, !this->is_ai_turn);
n.parent = this;
n.ai_board = this->ai_board;
n.enemy_board = this->enemy_board;
if(this->is_ai_turn)
n.ai_board |= 1 push_back(n);
assert(this->child_count < 9);
this->children[this->child_count++] = &this->arena->back();
}
}
}
void Node::SimulateAndBackpropagate() {
int eval = this->GetWinner();
uint16_t saved_ai_board = this->ai_board;
uint16_t saved_enemy_board = this->enemy_board;
bool saved_turn = this->is_ai_turn;
std::random_device rd;
std::mt19937 gen(rd());
assert((saved_ai_board & saved_enemy_board) == 0);
while (!eval) {
std::vector available_moves;
for (int pos = 0; pos < 9; pos++) {
bool p1 = this->ai_board & (1 enemy_board & (1 is_ai_turn) {
this->ai_board |= 1 enemy_board |= 1 is_ai_turn = !this->is_ai_turn;
eval = this->GetWinner();
}
this->ai_board = saved_ai_board;
this->enemy_board = saved_enemy_board;
this->is_ai_turn = saved_turn;
auto curr_node = this;
while (curr_node != nullptr) {
curr_node->visit_count++;
curr_node->eval += curr_node->is_ai_turn ? -eval : eval;
curr_node = curr_node->parent;
}
}
Node *Node::CalculateBestMove(size_t iter_count) {
std::random_device rd;
std::mt19937 gen(rd());
for (size_t i = 0; i < iter_count; i++) {
Node *leaf = this->FindBestLeafNode();
leaf->CreateChildren();
if (leaf->child_count > 0) {
std::uniform_int_distribution dis(0, leaf->child_count - 1);
leaf = leaf->children[dis(gen)];
}
leaf->SimulateAndBackpropagate();
}
int64_t best_eval = INT64_MIN;
Node *best_node = nullptr;
for (int i = 0; i < this->child_count; i++) {
if (this->children[i]->eval > best_eval) {
best_eval = this->children[i]->eval;
best_node = this->children[i];
}
}
assert(best_node != nullptr);
return best_node;
}
int Node::GetWinner() {
for (const uint16_t mask :
{0b000000111, 0b000111000, 0b111000000, 0b001001001, 0b010010010,
0b100100100, 0b100010001, 0b001010100}) {
if ((this->ai_board & mask) == mask) {
return 1;
} else if ((this->enemy_board & mask) == mask) {
return -1;
}
}
return 0;
}
double Node::GetUcbScore() {
auto parent = this->parent != nullptr ? this->parent : this;
if (this->visit_count == 0)
return DBL_MAX;
constexpr double c = 1.4;
double exploitation = (double)this->eval / this->visit_count;
double exploration = c * sqrt(log(parent->visit_count) / this->visit_count);
return exploration + exploitation;
}
Вот пример, когда ИИ (X) делает ход, игнорирующий выигрышный ход противника:
Код: Выделить всё
A B C
| X | 1
---|---|---
| | 2
---|---|---
O | | O 3
Я провел много работ по отладке и не смог найти ошибку, поэтому считаю, что моя реализация MCTS неверна.
Подробнее здесь: https://stackoverflow.com/questions/787 ... ementation