Код: Выделить всё
from peft import LoraConfig, TaskType, get_peft_model
from transformers import AutoModelForSeq2SeqLM
from transformers import AutoTokenizer, DataCollatorWithPadding
from transformers import TrainingArguments
from transformers import Trainer
from datasets import load_dataset
peft_config = LoraConfig(task_type=TaskType.SEQ_2_SEQ_LM, inference_mode=False, r=2, lora_alpha=16, lora_dropout=0.01)
model = AutoModelForSeq2SeqLM.from_pretrained("bigscience/mt0-large")
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()
dataset = load_dataset('imdb')
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
def tokenize_function(examples):
return tokenizer(examples['text'], padding="max_length", truncation=True)
tokenized_datasets = dataset.map(tokenize_function, batched=True)
def compute_metrics(eval_pred):
predictions, labels = eval_pred
predictions = predictions[:, 0]
return metric.compute(predictions=predictions, references=labels)
training_args = TrainingArguments(
output_dir="./mt0-large-lora",
learning_rate=1e-3,
per_device_train_batch_size=1,
per_device_eval_batch_size=1,
num_train_epochs=2,
weight_decay=0.01,
evaluation_strategy="epoch",
save_strategy="epoch",
load_best_model_at_end=True,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_datasets["train"],
eval_dataset=tokenized_datasets["test"],
tokenizer=tokenizer,
data_collator=data_collator,
compute_metrics=compute_metrics,
)
trainer.train()
Код: Выделить всё
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[2], line 49
27 training_args = TrainingArguments(
28 output_dir="./mt0-large-lora",
29 learning_rate=1e-3,
(...)
36 load_best_model_at_end=True,
37 )
39 trainer = Trainer(
40 model=model,
41 args=training_args,
(...)
46 compute_metrics=compute_metrics,
47 )
---> 49 trainer.train()
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/trainer.py:1539, in Trainer.train(self, resume_from_checkpoint, trial, ignore_keys_for_eval, **kwargs)
1534 self.model_wrapped = self.model
1536 inner_training_loop = find_executable_batch_size(
1537 self._inner_training_loop, self._train_batch_size, args.auto_find_batch_size
1538 )
-> 1539 return inner_training_loop(
1540 args=args,
1541 resume_from_checkpoint=resume_from_checkpoint,
1542 trial=trial,
1543 ignore_keys_for_eval=ignore_keys_for_eval,
1544 )
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/trainer.py:1809, in Trainer._inner_training_loop(self, batch_size, args, resume_from_checkpoint, trial, ignore_keys_for_eval)
1806 self.control = self.callback_handler.on_step_begin(args, self.state, self.control)
1808 with self.accelerator.accumulate(model):
-> 1809 tr_loss_step = self.training_step(model, inputs)
1811 if (
1812 args.logging_nan_inf_filter
1813 and not is_torch_tpu_available()
1814 and (torch.isnan(tr_loss_step) or torch.isinf(tr_loss_step))
1815 ):
1816 # if loss is nan or inf simply add the average of previous logged losses
1817 tr_loss += tr_loss / (1 + self.state.global_step - self._globalstep_last_logged)
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/trainer.py:2654, in Trainer.training_step(self, model, inputs)
2651 return loss_mb.reduce_mean().detach().to(self.args.device)
2653 with self.compute_loss_context_manager():
-> 2654 loss = self.compute_loss(model, inputs)
2656 if self.args.n_gpu > 1:
2657 loss = loss.mean() # mean() to average on multi-gpu parallel training
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/trainer.py:2679, in Trainer.compute_loss(self, model, inputs, return_outputs)
2677 else:
2678 labels = None
-> 2679 outputs = model(**inputs)
2680 # Save past state if it exists
2681 # TODO: this needs to be fixed and made cleaner later.
2682 if self.args.past_index >= 0:
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
1734 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1735 else:
-> 1736 return self._call_impl(*args, **kwargs)
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
1742 # If we don't have any hooks, we want to skip the rest of the logic in
1743 # this function, and just call forward.
1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1745 or _global_backward_pre_hooks or _global_backward_hooks
1746 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747 return forward_call(*args, **kwargs)
1749 result = None
1750 called_always_called_hooks = set()
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/peft/peft_model.py:1080, in PeftModelForSeq2SeqLM.forward(self, input_ids, attention_mask, inputs_embeds, decoder_input_ids, decoder_attention_mask, decoder_inputs_embeds, labels, output_attentions, output_hidden_states, return_dict, **kwargs)
1078 peft_config = self.active_peft_config
1079 if not isinstance(peft_config, PromptLearningConfig):
-> 1080 return self.base_model(
1081 input_ids=input_ids,
1082 attention_mask=attention_mask,
1083 inputs_embeds=inputs_embeds,
1084 decoder_input_ids=decoder_input_ids,
1085 decoder_attention_mask=decoder_attention_mask,
1086 decoder_inputs_embeds=decoder_inputs_embeds,
1087 labels=labels,
1088 output_attentions=output_attentions,
1089 output_hidden_states=output_hidden_states,
1090 return_dict=return_dict,
1091 **kwargs,
1092 )
1094 batch_size = input_ids.shape[0]
1095 if decoder_attention_mask is not None:
1096 # concat prompt attention mask
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
1734 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1735 else:
-> 1736 return self._call_impl(*args, **kwargs)
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
1742 # If we don't have any hooks, we want to skip the rest of the logic in
1743 # this function, and just call forward.
1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1745 or _global_backward_pre_hooks or _global_backward_hooks
1746 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747 return forward_call(*args, **kwargs)
1749 result = None
1750 called_always_called_hooks = set()
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/models/mt5/modeling_mt5.py:1741, in MT5ForConditionalGeneration.forward(self, input_ids, attention_mask, decoder_input_ids, decoder_attention_mask, head_mask, decoder_head_mask, cross_attn_head_mask, encoder_outputs, past_key_values, inputs_embeds, decoder_inputs_embeds, labels, use_cache, output_attentions, output_hidden_states, return_dict)
1738 decoder_attention_mask = decoder_attention_mask.to(self.decoder.first_device)
1740 # Decode
-> 1741 decoder_outputs = self.decoder(
1742 input_ids=decoder_input_ids,
1743 attention_mask=decoder_attention_mask,
1744 inputs_embeds=decoder_inputs_embeds,
1745 past_key_values=past_key_values,
1746 encoder_hidden_states=hidden_states,
1747 encoder_attention_mask=attention_mask,
1748 head_mask=decoder_head_mask,
1749 cross_attn_head_mask=cross_attn_head_mask,
1750 use_cache=use_cache,
1751 output_attentions=output_attentions,
1752 output_hidden_states=output_hidden_states,
1753 return_dict=return_dict,
1754 )
1756 sequence_output = decoder_outputs[0]
1758 # Set device for model parallelism
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1736, in Module._wrapped_call_impl(self, *args, **kwargs)
1734 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1735 else:
-> 1736 return self._call_impl(*args, **kwargs)
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/torch/nn/modules/module.py:1747, in Module._call_impl(self, *args, **kwargs)
1742 # If we don't have any hooks, we want to skip the rest of the logic in
1743 # this function, and just call forward.
1744 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1745 or _global_backward_pre_hooks or _global_backward_hooks
1746 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1747 return forward_call(*args, **kwargs)
1749 result = None
1750 called_always_called_hooks = set()
File ~/venvs/fine_tuning_llama_peft/lib/python3.11/site-packages/transformers/models/mt5/modeling_mt5.py:966, in MT5Stack.forward(self, input_ids, attention_mask, encoder_hidden_states, encoder_attention_mask, inputs_embeds, head_mask, cross_attn_head_mask, past_key_values, use_cache, output_attentions, output_hidden_states, return_dict)
963 raise ValueError("You have to initialize the model with valid token embeddings")
964 inputs_embeds = self.embed_tokens(input_ids)
--> 966 batch_size, seq_length = input_shape
968 # required mask seq length can be calculated via length of past
969 mask_seq_length = past_key_values[0][0].shape[2] + seq_length if past_key_values is not None else seq_length
ValueError: not enough values to unpack (expected 2, got 1)
Подробнее здесь: https://stackoverflow.com/questions/792 ... using-peft