Я пытаюсь создать генеративного чат-бота, пройдя курс НЛП Codecademy. Я выполнил все инструкции курса, но получаю только повторяющиеся ответы типа «Твой дедушка» или «Какие дедушки».
Ответы в стиле поиска работают нормально, просто порождающий элемент, вызывающий проблемы.
Это та же проблема, что описана здесь на старых форумах Codecademy: https://discuss.codecademy.com/t/so-how ... ain/556731
Я добавлю часть своего кода ниже (для обучения, тестирования и моделирования) .
Я пробовал использовать тот же код, но на гораздо большем наборе данных и с 1000 эпохами обучения, но результаты те же.
Кто-нибудь знает, что я могу делать?
Спасибо
from preprocessing import num_encoder_tokens, num_decoder_tokens, decoder_target_data, encoder_input_data, decoder_input_data, decoder_target_data, max_encoder_seq_length, max_decoder_seq_length
from tensorflow import keras
# Add Dense to the imported layers
from keras.layers import Input, LSTM, Dense, Masking
from keras.models import Model
import os
# os.environ['KMP_DUPLICATE_LIB_OK']='True'
# Choose dimensionality
dimensionality = 256
# Choose the batch size
# and number of epochs:
batch_size = 50
epochs = 1000
# Encoder training setup
encoder_inputs = Input(shape=(None, num_encoder_tokens))
encoder_lstm = LSTM(dimensionality, return_state=True)
encoder_outputs, state_hidden, state_cell = encoder_lstm(encoder_inputs)
encoder_states = [state_hidden, state_cell]
# Decoder training setup:
decoder_inputs = Input(shape=(None, num_decoder_tokens))
decoder_lstm = LSTM(dimensionality, return_sequences=True, return_state=True)
decoder_outputs, decoder_state_hidden, decoder_state_cell = decoder_lstm(decoder_inputs, initial_state=encoder_states)
decoder_dense = Dense(num_decoder_tokens, activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)
# Building the training model:
training_model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
# Compile the model:
training_model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])
# print("Training the model:\n")
# Train the model:
training_model.fit([encoder_input_data, decoder_input_data], decoder_target_data, batch_size = batch_size, epochs = epochs, validation_split = 0.2)
# Save the training model in the root directory
training_model.save('/Users/gavinreid/Code/0_nlp/5_twitter_generative/training_model.h5')
from preprocessing import input_features_dict, target_features_dict, reverse_input_features_dict, reverse_target_features_dict, max_decoder_seq_length, input_docs, target_docs, input_tokens, target_tokens, max_encoder_seq_length
from training_model import decoder_inputs, decoder_lstm, decoder_dense, encoder_input_data, num_decoder_tokens, num_encoder_tokens
from tensorflow import keras
from keras.layers import Input, LSTM, Dense
from keras.models import Model, load_model
import numpy as np
import re
training_model = load_model('/Users/gavinreid/Code/0_nlp/5_twitter_generative/training_model.h5')
###### because we're working with a saved model
encoder_inputs = training_model.input[0]
encoder_outputs, state_h_enc, state_c_enc = training_model.layers[2].output
encoder_states = [state_h_enc, state_c_enc]
######
encoder_model = Model(encoder_inputs, encoder_states)
latent_dim = 256
decoder_state_input_hidden = Input(shape=(latent_dim,))
decoder_state_input_cell = Input(shape=(latent_dim,))
decoder_states_inputs = [decoder_state_input_hidden, decoder_state_input_cell]
decoder_outputs, state_hidden, state_cell = decoder_lstm(decoder_inputs, initial_state=decoder_states_inputs)
decoder_states = [state_hidden, state_cell]
decoder_outputs = decoder_dense(decoder_outputs)
decoder_model = Model([decoder_inputs] + decoder_states_inputs, [decoder_outputs] + decoder_states)
def decode_sequence(test_input):
# Encode the input as state vectors.
states_value = encoder_model.predict(test_input)
# Generate empty target sequence of length 1.
target_seq = np.zeros((1, 1, num_decoder_tokens))
# Populate the first token of target sequence with the start token.
target_seq[0, 0, target_features_dict['']] = 1.
# Sampling loop for a batch of sequences
# (to simplify, here we assume a batch of size 1).
decoded_sentence = ''
stop_condition = False
while not stop_condition:
# Run the decoder model to get possible
# output tokens (with probabilities) & states
output_tokens, hidden_state, cell_state = decoder_model.predict(
[target_seq] + states_value)
# Choose token with highest probability
sampled_token_index = np.argmax(output_tokens[0, -1, :])
sampled_token = reverse_target_features_dict[sampled_token_index]
decoded_sentence += " " + sampled_token
# Exit condition: either hit max length
# or find stop token.
if (sampled_token == '' or len(decoded_sentence) > max_decoder_seq_length):
stop_condition = True
# Update the target sequence (of length 1).
target_seq = np.zeros((1, 1, num_decoder_tokens))
target_seq[0, 0, sampled_token_index] = 1.
# Update states
states_value = [hidden_state, cell_state]
return decoded_sentence
import numpy as np
import re
from test_model import encoder_model, decoder_model, num_decoder_tokens, num_encoder_tokens, input_features_dict, target_features_dict, reverse_target_features_dict, max_decoder_seq_length, max_encoder_seq_length
class ChatBot:
negative_responses = ("no", "nope", "nah", "naw", "not a chance", "sorry")
exit_commands = ("quit", "pause", "exit", "goodbye", "bye", "later", "stop")
def start_chat(self):
user_response = input("Hi, I'm a WeatherBot trained on dialog from Twitter. Wanna talk about the weather?\n")
if user_response in self.negative_responses:
print("Ok, I hope it rains on you!")
return
self.chat(user_response)
def make_exit(self, reply):
for exit_command in self.exit_commands:
if exit_command in reply:
print("Ok, have a great day!")
return True
return False
def chat(self, reply):
while not self.make_exit(reply):
reply = input(self.generate_response(reply))
def string_to_matrix(self, user_input):
tokens = re.findall(r"[\w']+|[^\s\w]", user_input)
user_input_matrix = np.zeros((1, max_encoder_seq_length, num_encoder_tokens), dtype='float32')
for timestep, token in enumerate(tokens):
if token in input_features_dict:
user_input_matrix[0, timestep, input_features_dict[token]] = 1.
return user_input_matrix
def generate_response(self, user_input):
input_matrix = self.string_to_matrix(user_input)
states_value = encoder_model.predict(input_matrix)
target_seq = np.zeros((1, 1, num_decoder_tokens))
target_seq[0, 0, target_features_dict['']] = 1.
chatbot_response = ''
stop_condition = False
while not stop_condition:
output_tokens, hidden_state, cell_state = decoder_model.predict(
[target_seq] + states_value)
sampled_token_index = np.argmax(output_tokens[0, -1, :])
sampled_token = reverse_target_features_dict[sampled_token_index]
chatbot_response += " " + sampled_token
if (sampled_token == '' or len(chatbot_response) > max_decoder_seq_length):
stop_condition = True
target_seq = np.zeros((1, 1, num_decoder_tokens))
target_seq[0, 0, sampled_token_index] = 1.
states_value = [hidden_state, cell_state]
chatbot_response = chatbot_response.replace("", "").replace("", "")
return chatbot_response
bot = ChatBot()
bot.start_chat()
Подробнее здесь: https://stackoverflow.com/questions/792 ... tbot-issue
Проблема с генеративным чат-ботом НЛП ⇐ Python
-
- Похожие темы
- Ответы
- Просмотры
- Последнее сообщение
-
-
Проблема с чат-ботом на основе трансформатора, генерирующим бессвязные ответы
Anonymous » » в форуме Python - 0 Ответы
- 25 Просмотры
-
Последнее сообщение Anonymous
-