Пользовательская модель чат-бота на основе трансформатора не генерирует действительные ответыPython

Программы на Python
Ответить Пред. темаСлед. тема
Anonymous
 Пользовательская модель чат-бота на основе трансформатора не генерирует действительные ответы

Сообщение Anonymous »

В настоящее время я работаю над созданием собственного чат-бота с использованием модели на основе Transformer для личного проекта. Несмотря на то, что я пробовал использовать разные гиперпараметры, корректировать эпохи и увеличивать размер набора данных, я сталкиваюсь с проблемами, из-за которых модель не может генерировать действительные ответы на основе моих наборов данных.
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, random_split
import pandas as pd
import math
from transformers import BertTokenizer

# Define the tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Load the conversational data from a CSV file
def load_conversational_data(file_path):
data = pd.read_csv(file_path)
print("Loaded data:")
print(data.head())
return data['input'].tolist(), data['response'].tolist()

class PositionalEncoding(nn.Module):
def __init__(self, d_model, dropout=0.1, max_len=50):
super(PositionalEncoding, self).__init__()
self.dropout = nn.Dropout(p=dropout)
pe = torch.zeros(max_len, d_model)
position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
pe[:, 0::2] = torch.sin(position * div_term)
pe[:, 1::2] = torch.cos(position * div_term)
pe = pe.unsqueeze(0).transpose(0, 1)
self.register_buffer('pe', pe)

def forward(self, x):
x = x + self.pe[:x.size(0), :]
return self.dropout(x)

class MultiHeadAttention(nn.Module):
def __init__(self, d_model, num_heads, dropout=0.1):
super(MultiHeadAttention, self).__init__()
assert d_model % num_heads == 0
self.d_head = d_model // num_heads
self.num_heads = num_heads
self.linear_q = nn.Linear(d_model, d_model)
self.linear_k = nn.Linear(d_model, d_model)
self.linear_v = nn.Linear(d_model, d_model)
self.linear_out = nn.Linear(d_model, d_model)
self.dropout = nn.Dropout(dropout)

def forward(self, query, key, value, mask=None):
batch_size = query.size(0)
seq_length = query.size(1)
q = self.linear_q(query)
k = self.linear_k(key)
v = self.linear_v(value)
q = q.view(batch_size, seq_length, self.num_heads, self.d_head).transpose(1, 2)
k = k.view(batch_size, seq_length, self.num_heads, self.d_head).transpose(1, 2)
v = v.view(batch_size, seq_length, self.num_heads, self.d_head).transpose(1, 2)
scores = torch.matmul(q, k.transpose(2, 3)) / math.sqrt(self.d_head)

if mask is not None:
mask = mask.unsqueeze(1).unsqueeze(1)
scores = scores.masked_fill(mask == 0, float('-inf'))

attn_weights = torch.softmax(scores, dim=-1)
attn_weights = self.dropout(attn_weights)
context = torch.matmul(attn_weights, v)
context = context.transpose(1, 2).contiguous().view(batch_size, seq_length, self.num_heads * self.d_head)
output = self.linear_out(context)
return output

class FeedForward(nn.Module):
def __init__(self, d_model, d_ff, dropout=0.1):
super(FeedForward, self).__init__()
self.linear1 = nn.Linear(d_model, d_ff)
self.dropout = nn.Dropout(dropout)
self.linear2 = nn.Linear(d_ff, d_model)

def forward(self, x):
x = nn.functional.relu(self.linear1(x))
x = self.dropout(x)
x = self.linear2(x)
return x

class TransformerEncoderLayer(nn.Module):
def __init__(self, d_model, num_heads, d_ff, dropout=0.1):
super(TransformerEncoderLayer, self).__init__()
self.self_attn = MultiHeadAttention(d_model, num_heads, dropout)
self.ffn = FeedForward(d_model, d_ff, dropout)
self.norm1 = nn.LayerNorm(d_model)
self.norm2 = nn.LayerNorm(d_model)
self.dropout1 = nn.Dropout(dropout)
self.dropout2 = nn.Dropout(dropout)

def forward(self, src, src_mask=None):
src2 = self.norm1(src)
src2 = self.self_attn(src2, src2, src2, src_mask)
src = src + self.dropout1(src2)

src2 = self.norm2(src)
src2 = self.ffn(src2)
src = src + self.dropout2(src2)
return src

class TransformerEncoder(nn.Module):
def __init__(self, num_layers, d_model, num_heads, d_ff, dropout=0.1):
super(TransformerEncoder, self).__init__()
self.layers = nn.ModuleList(
[TransformerEncoderLayer(d_model, num_heads, d_ff, dropout) for _ in range(num_layers)]
)

def forward(self, src, src_mask=None):
output = src
for layer in self.layers:
output = layer(output, src_mask)
return output

class CustomTransformerModel(nn.Module):
def __init__(self, vocab_size, d_model=128, num_heads=4, num_layers=6, d_ff=256, dropout=0.1):
super(CustomTransformerModel, self).__init__()
self.embedding = nn.Embedding(vocab_size, d_model)
self.pos_encoder = PositionalEncoding(d_model, dropout)
self.transformer_encoder = TransformerEncoder(num_layers, d_model, num_heads, d_ff, dropout)
self.fc = nn.Linear(d_model, vocab_size)
self.init_weights()

def init_weights(self):
for p in self.parameters():
if p.dim() > 1:
nn.init.xavier_uniform_(p)

def forward(self, src, src_mask=None):
x = self.embedding(src)
x = self.pos_encoder(x)
x = self.transformer_encoder(x, src_mask)
x = self.fc(x)
return x

# Custom Dataset class
class ConversationDataset(Dataset):
def __init__(self, inputs, responses, tokenizer, max_length=50):
self.inputs = inputs
self.responses = responses
self.tokenizer = tokenizer
self.max_length = max_length

def __len__(self):
return len(self.inputs)

def __getitem__(self, idx):
input_text = self.inputs[idx]
response_text = self.responses[idx]
input_tokens = self.tokenizer.encode(input_text, add_special_tokens=True, max_length=self.max_length, truncation=True, padding='max_length')
response_tokens = self.tokenizer.encode(response_text, add_special_tokens=True, max_length=self.max_length, truncation=True, padding='max_length')
return torch.tensor(input_tokens), torch.tensor(response_tokens)

# Load and preprocess your real-world dataset here
inputs, responses = load_conversational_data('conversations.csv')
dataset = ConversationDataset(inputs, responses, tokenizer)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# Data loaders
batch_size = 14
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

# Instantiate the model
vocab_size = tokenizer.vocab_size
model = CustomTransformerModel(vocab_size)

# Loss and optimizer
loss_fn = nn.CrossEntropyLoss(ignore_index=tokenizer.pad_token_id)
optimizer = optim.AdamW(model.parameters(), lr=0.001)

# Function to generate responses
def generate_response(model, tokenizer, input_text, max_length=50):
model.eval()
try:
# Tokenize the input text
tokens = tokenizer.encode(input_text, add_special_tokens=True, max_length=max_length, truncation=True, padding='max_length')
input_data = torch.tensor([tokens])
input_mask = (input_data != tokenizer.pad_token_id).long()

# Log the tokenized input
print(f"Tokenized input: {tokens}")

with torch.no_grad():
# Forward pass through the model
output = model(input_data)

# Check for NaNs in the output
if torch.sum(torch.isnan(output)) > 0:
print("Error: Model output contains NaNs.")
return "I'm sorry, there was an error in generating the response."

# Get the predicted tokens
output_tokens = torch.argmax(output, dim=-1).squeeze().tolist()
print(f"Output tokens: {output_tokens}")

# Decode the tokens into a string
response = tokenizer.decode(output_tokens, skip_special_tokens=True)
print(f"Generated response: {response}")

if response.strip() == "":
print("Error: Generated response is empty.")
return "I'm sorry, I couldn't generate a valid response."

return response

except Exception as e:
print(f"Exception during response generation: {str(e)}")
return "I'm sorry, an unexpected error occurred."

# Training and validation loop
num_epochs = 10
for epoch in range(num_epochs):
# Training
model.train()
train_loss = 0.0
for input_data, target in train_loader:
optimizer.zero_grad()
input_mask = (input_data != tokenizer.pad_token_id).long()

output = model(input_data, input_mask)
loss = loss_fn(output.view(-1, vocab_size), target.view(-1))
loss.backward()

optimizer.step()

train_loss += loss.item()

avg_train_loss = train_loss / len(train_loader)

# Validation
model.eval()
val_loss = 0.0
with torch.no_grad():
for input_data, target in val_loader:
input_mask = (input_data != tokenizer.pad_token_id).long()

output = model(input_data, input_mask)
loss = loss_fn(output.view(-1, vocab_size), target.view(-1))
val_loss += loss.item()

avg_val_loss = val_loss / len(val_loader)

print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}')

# Log a sample response from the model
sample_input = "Hello, how are you?"
sample_response = generate_response(model, tokenizer, sample_input)
print(f"Sample input: {sample_input}\nSample response: {sample_response}")

# Test the model interactively
while True:
test_input = input("you: ")
if test_input.lower() == 'exit':
break
response = generate_response(model, tokenizer, test_input)
print(f"Input: {test_input}\nResponse: {response}")


Я экспериментировал с различными гиперпараметрами, такими как скорость обучения, размеры пакетов и разное количество эпох обучения. Я ожидал, что эти корректировки помогут улучшить качество ответов, генерируемых моей моделью на основе Transformer. Однако, несмотря на увеличение размера набора данных и уточнение этих параметров, модель продолжает выдавать либо повторяющиеся токены, либо совершенно пустые ответы во время обучения и вывода.
вот выходные данные:
python ai.py
Loaded data:
input response
0 Hello, how are you? I'm good, thank you!
1 What are you up to today? Just relaxing at home. How about you?
2 Have you seen any good movies lately? Yes, I watched a great thriller last night.
3 Can you recommend any good books? Sure! Have you read 'To Kill a Mockingbird'?
4 Tell me about your hobbies. I enjoy painting and hiking.
Epoch [1/10], Train Loss: 9.8999, Val Loss: 8.5966
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045]
Generated response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Sample input: Hello, how are you?
Sample response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Epoch [2/10], Train Loss: 6.4885, Val Loss: 8.5841
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045]
Generated response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Sample input: Hello, how are you?
Sample response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Epoch [3/10], Train Loss: 5.1883, Val Loss: 10.1303
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012]
Generated response: ..................................................
Sample input: Hello, how are you?
Sample response: ..................................................
Epoch [4/10], Train Loss: 4.8297, Val Loss: 10.9283
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045]
Generated response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Sample input: Hello, how are you?
Sample response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Epoch [5/10], Train Loss: 4.8646, Val Loss: 11.6196
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045]
Generated response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Sample input: Hello, how are you?
Sample response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Epoch [6/10], Train Loss: 4.8166, Val Loss: 11.6514
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012]
Generated response: ..................................................
Sample input: Hello, how are you?
Sample response: ..................................................
Epoch [7/10], Train Loss: 4.7412, Val Loss: 11.3170
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012, 1012]
Generated response: ..................................................
Sample input: Hello, how are you?
Sample response: ..................................................
Epoch [8/10], Train Loss: 4.7109, Val Loss: 10.7475
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102, 102]
Generated response:
Error: Generated response is empty.
Sample input: Hello, how are you?
Sample response: I'm sorry, I couldn't generate a valid response.
Epoch [9/10], Train Loss: 4.6831, Val Loss: 10.1369
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045]
Generated response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Sample input: Hello, how are you?
Sample response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Epoch [10/10], Train Loss: 4.6578, Val Loss: 9.8423
Tokenized input: [101, 7592, 1010, 2129, 2024, 2017, 1029, 102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Output tokens: [1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045, 1045]
Generated response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
Sample input: Hello, how are you?
Sample response: i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i i
you:


Подробнее здесь: https://stackoverflow.com/questions/786 ... -responses
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение

Вернуться в «Python»