Traceback (most recent call last):
File "nllbtrain.py", line 273, in
print(trainer.train())
File "/home//.conda/envs/dict/lib/python3.8/site-packages/transformers/trainer.py", line 1645, in train
return inner_training_loop(
File "/home//.conda/envs/dict/lib/python3.8/site-packages/transformers/trainer.py", line 1907, in _inner_training_loop
for step, inputs in enumerate(epoch_iterator):
File "/home//.conda/envs/dict/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 631, in __next__
data = self._next_data()
File "/home//.conda/envs/dict/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data
data = self._dataset_fetcher.fetch(index) # may raise StopIteration
File "/home//.conda/envs/dict/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
data = self.dataset.__getitems__(possibly_batched_index)
File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2814, in __getitems__
batch = self.__getitem__(keys)
File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2810, in __getitem__
return self._getitem(key)
File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2794, in _getitem
pa_subtable = query_table(self._data, key, indices=self._indices)
File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/formatting/formatting.py", line 583, in query_table
_check_valid_index_key(key, size)
File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/formatting/formatting.py", line 536, in _check_valid_index_key
_check_valid_index_key(int(max(key)), size=size)
File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/formatting/formatting.py", line 526, in _check_valid_index_key
raise IndexError(f"Invalid key: {key} is out of bounds for size {size}")
IndexError: Invalid key: 39463 is out of bounds for size 0
0%|
Я пытаюсь выполнить машинный перевод с хинди на санскрит, используя модель NLLB. Но я продолжаю получать сообщение об ошибке:
IndexError: Неверный ключ: 39463 выходит за пределы размера 0.
[list] [*]Ошибка возникает при обучении предварительно обученной модели NLLB `facebook/nllb-200-1.3B [*]Входные данные ~ 40 тысяч предложений на хинди. Та же ошибка возникает, когда я пытаюсь тренироваться с использованием образца данных. [/list] Подробное сообщение об ошибке: [code]Traceback (most recent call last): File "nllbtrain.py", line 273, in print(trainer.train()) File "/home//.conda/envs/dict/lib/python3.8/site-packages/transformers/trainer.py", line 1645, in train return inner_training_loop( File "/home//.conda/envs/dict/lib/python3.8/site-packages/transformers/trainer.py", line 1907, in _inner_training_loop for step, inputs in enumerate(epoch_iterator): File "/home//.conda/envs/dict/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 631, in __next__ data = self._next_data() File "/home//.conda/envs/dict/lib/python3.8/site-packages/torch/utils/data/dataloader.py", line 675, in _next_data data = self._dataset_fetcher.fetch(index) # may raise StopIteration File "/home//.conda/envs/dict/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch data = self.dataset.__getitems__(possibly_batched_index) File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2814, in __getitems__ batch = self.__getitem__(keys) File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2810, in __getitem__ return self._getitem(key) File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/arrow_dataset.py", line 2794, in _getitem pa_subtable = query_table(self._data, key, indices=self._indices) File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/formatting/formatting.py", line 583, in query_table _check_valid_index_key(key, size) File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/formatting/formatting.py", line 536, in _check_valid_index_key _check_valid_index_key(int(max(key)), size=size) File "/home//.conda/envs/dict/lib/python3.8/site-packages/datasets/formatting/formatting.py", line 526, in _check_valid_index_key raise IndexError(f"Invalid key: {key} is out of bounds for size {size}") IndexError: Invalid key: 39463 is out of bounds for size 0 0%| [/code] Код предварительной обработки данных: [code]def preprocess_function(examples): inputs = [example + ' ' + f' ' for example in examples[source_lang]] targets = [f' ' + example + ' ' for example in examples[target_lang]]
Я изучаю учебник Pytorch SEQ2SEQ:
У меня есть вопрос о Decoder
. />
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size):
super(DecoderRNN, self).__init__()
self.embedding = nn.Embedding(output_size, hidden_size)
self.gru =...
Я изучаю учебник Pytorch SEQ2SEQ:
У меня есть вопрос о Decoder
. />
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size):
super(DecoderRNN, self).__init__()
self.embedding = nn.Embedding(output_size, hidden_size)
self.gru =...
Я изучаю учебник Pytorch SEQ2SEQ:
У меня есть вопрос о Decoder
. />
class DecoderRNN(nn.Module):
def __init__(self, hidden_size, output_size):
super(DecoderRNN, self).__init__()
self.embedding = nn.Embedding(output_size, hidden_size)
self.gru =...