Возникает ошибка при точной настройке с использованием peft.Python

Программы на Python
Ответить Пред. темаСлед. тема
Anonymous
 Возникает ошибка при точной настройке с использованием peft.

Сообщение Anonymous »

Я пытался выполнить точную настройку с помощью пефта. Ниже приведен мой код:

Код: Выделить всё

from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer, DataCollatorWithPadding
from transformers import TrainingArguments
from transformers import Trainer
from datasets import load_dataset

peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=2, lora_alpha=16, lora_dropout=0.01)
model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/mt0-large")
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()

dataset = load_dataset('imdb')
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def tokenize_function(examples):
return tokenizer(examples['text'], padding="max_length", truncation=True)

tokenized_datasets = dataset.map(tokenize_function, batched=True)

def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = predictions[:, 0]
return metric.compute(predictions=predictions, references=labels)

training_args = TrainingArguments(
output_dir="./mt0-large-lora",
learning_rate=1e-3,
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
num_train_epochs=2,
weight_decay=0.01,
evaluation_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
)

trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["test"],
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)

trainer.train()
Я получаю следующую ошибку:

Код: Выделить всё

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[2], line 49
27 training_args = TrainingArguments(
28     output_dir="./mt0-large-lora",
29     learning_rate=1e-3,
(...)
36     load_best_model_at_end=True,
37 )
39 trainer = Trainer(
40     model=model,
41     args=training_args,
(...)
46     compute_metrics=compute_metrics,
47 )
---> 49 trainer.train()

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/trainer.py:1539, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1534     self.model_wrapped = self.model
1536 inner_training_loop = find_executable_batch_size(
1537     self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1538 )
-> 1539 return inner_training_loop(
1540     args=args,
1541     resume_from_checkpoint=resume_from_checkpoint,
1542     trial=trial,
1543     ignore_keys_for_eval=ignore_keys_for_eval,
1544 )

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/trainer.py:1809, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1806     self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
1808 with self.accelerator.accumulate(model):
-> 1809     tr_loss_step = self.training_step(model, inputs)
1811 if (
1812     args.logging_nan_inf_filter
1813     and not is_torch_tpu_available()
1814     and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
1815 ):
1816     # if loss is nan or inf simply add the average of previous logged losses
1817     tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/trainer.py:2654, in Trainer.training_step(self, model, inputs)
2651     return loss_mb.reduce_mean().detach().to(self.args.device)
2653 with self.compute_loss_context_manager():
-> 2654     loss = self.compute_loss(model, inputs)
2656 if self.args.n_gpu > 1:
2657     loss = loss.mean()  # mean() to average on multi-gpu parallel training

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/trainer.py:2679, in Trainer.compute_loss(self, model, inputs, return_outputs)
2677 else:
2678     labels = None
->  2679 outputs = model(**inputs)
2680 # Save past state if it exists
2681 # TODO: this needs to be fixed and made cleaner later.
2682 if self.args.past_index >= 0:

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
1742 # If we don't have any hooks, we want to skip the rest of the logic in
1743 # this function, and just call forward.
1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1745         or _global_backward_pre_hooks or _global_backward_hooks
1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
1749 result = None
1750 called_always_called_hooks = set()

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/peft/peft_model.py:1080, in PeftModelForSeq2SeqLM.forward(self, input_ids, attention_mask, inputs_embeds, decoder_input_ids, decoder_attention_mask, decoder_inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, **kwargs)
1078 peft_config = self.active_peft_config
1079 if not isinstance(peft_config, PromptLearningConfig):
-> 1080     return self.base_model(
1081         input_ids=input_ids,
1082         attention_mask=attention_mask,
1083         inputs_embeds=inputs_embeds,
1084         decoder_input_ids=decoder_input_ids,
1085         decoder_attention_mask=decoder_attention_mask,
1086         decoder_inputs_embeds=decoder_inputs_embeds,
1087         labels=labels,
1088         output_attentions=output_attentions,
1089         output_hidden_states=output_hidden_states,
1090         return_dict=return_dict,
1091         **kwargs,
1092     )
1094 batch_size = input_ids.shape[0]
1095 if decoder_attention_mask is not None:
1096     # concat prompt attention mask

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
1742 # If we don't have any hooks, we want to skip the rest of the logic in
1743 # this function, and just call forward.
1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1745         or _global_backward_pre_hooks or _global_backward_hooks
1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
1749 result = None
1750 called_always_called_hooks = set()

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/models/mt5/modeling_mt5.py:1741, in MT5ForConditionalGeneration.forward(self, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, head_mask, decoder_head_mask, cross_attn_head_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
1738         decoder_attention_mask = decoder_attention_mask.to(self.decoder.first_device)
1740 # Decode
->  1741 decoder_outputs = self.decoder(
1742     input_ids=decoder_input_ids,
1743     attention_mask=decoder_attention_mask,
1744     inputs_embeds=decoder_inputs_embeds,
1745     past_key_values=past_key_values,
1746     encoder_hidden_states=hidden_states,
1747     encoder_attention_mask=attention_mask,
1748     head_mask=decoder_head_mask,
1749     cross_attn_head_mask=cross_attn_head_mask,
1750     use_cache=use_cache,
1751     output_attentions=output_attentions,
1752     output_hidden_states=output_hidden_states,
1753     return_dict=return_dict,
1754 )
1756 sequence_output = decoder_outputs[0]
1758 # Set device for model parallelism

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
1734     return self._compiled_call_impl(*args, **kwargs)  # type: ignore[misc]
1735 else:
-> 1736     return self._call_impl(*args, **kwargs)

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
1742 # If we don't have any hooks, we want to skip the rest of the logic in
1743 # this function, and just call forward.
1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1745         or _global_backward_pre_hooks or _global_backward_hooks
1746         or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747     return forward_call(*args, **kwargs)
1749 result = None
1750 called_always_called_hooks = set()

File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/models/mt5/modeling_mt5.py:966, in MT5Stack.forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, cross_attn_head_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
963         raise ValueError("You have to initialize the model with valid token embeddings")
964     inputs_embeds = self.embed_tokens(input_ids)
--> 966 batch_size, seq_length = input_shape
968 # required mask seq length can be calculated via length of past
969 mask_seq_length = past_key_values[0][0].shape[2] + seq_length if past_key_values is not None else seq_length

ValueError: not enough values to unpack (expected 2, got 1)
Есть идеи, что здесь не так? Работаю на моей RTX GeForce 4090, графическом процессоре с 24 ГБ памяти для тестирования.

Подробнее здесь: https://stackoverflow.com/questions/792 ... using-peft
Реклама
Ответить Пред. темаСлед. тема

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

  • Похожие темы
    Ответы
    Просмотры
    Последнее сообщение

Вернуться в «Python»