Traceback (most recent call last):
File "/run/determined/workdir/main.py", line 247, in
main(training_args, det_callback, hparams)
File "/run/determined/workdir/main.py", line 216, in main
trainer.train()
File "/run/determined/pythonuserbase/lib/python3.10/site-packages/trl/trainer/ppo_trainer.py", line 570, in train
self._save_checkpoint(model, trial=None)
File "/run/determined/pythonuserbase/lib/python3.10/site-packages/transformers/trainer.py", line 3097, in _save_checkpoint
self.save_model(output_dir, _internal_call=True)
File "/run/determined/pythonuserbase/lib/python3.10/site-packages/trl/trainer/ppo_trainer.py", line 260, in save_model
self.model = self.model.policy # save only the policy
File "/run/determined/pythonuserbase/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1931, in __getattr__
raise AttributeError(
AttributeError: 'DistributedDataParallel' object has no attribute 'policy'
Ошибка возникает во время обучения PPOTrainer. Использование этой версии или v2 вызывает одну и ту же ошибку.
Мой код взят из примера trl здесь: https://github.com/huggingface/trl/blob/main/examples/ скрипты/ppo/ppo.py
def main(training_args, det_callback, model_config):
################
# Model & Tokenizer
################
tokenizer = AutoTokenizer.from_pretrained(
model_config["model_name_or_path"],
padding_side="left",
trust_remote_code=model_config["trust_remote_code"],
#attn_implementation="flash_attention_2"
)
tokenizer.add_special_tokens({"pad_token": "[PAD]"})
if tokenizer.chat_template is None:
tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
value_model = AutoModelForSequenceClassification.from_pretrained(
training_args.reward_model_path, trust_remote_code=model_config["trust_remote_code"], num_labels=1, #attn_implementation="flash_attention_2"
)
reward_model = AutoModelForSequenceClassification.from_pretrained(
training_args.reward_model_path, trust_remote_code=model_config["trust_remote_code"], num_labels=1, #attn_implementation="flash_attention_2"
)
ref_policy = AutoModelForCausalLM.from_pretrained(
training_args.sft_model_path, trust_remote_code=model_config["trust_remote_code"], #attn_implementation="flash_attention_2"
)
policy = AutoModelForCausalLM.from_pretrained(
training_args.sft_model_path, trust_remote_code=model_config["trust_remote_code"], #attn_implementation="flash_attention_2"
)
################
# Dataset
################
dataset = load_jsonl_dataset(model_config["dataset_name"], split=model_config["dataset_train_split"])
eval_samples = 100
dataset = dataset.filter(
lambda example: sum(len(example[col]) for col in ['system', 'prompt', 'chosen', 'rejected']) < 4096 # lo so lo so, dovrei contare i token, ma va bene così per ora
)
train_dataset = dataset.select(range(len(dataset) - eval_samples))
eval_dataset = dataset.select(range(len(dataset) - eval_samples, len(dataset)))
dataset_text_field = "prompt"
def prepare_dataset(dataset, tokenizer):
"""pre-tokenize the dataset before training; only collate during training"""
def tokenize(element):
outputs = tokenizer(
element[dataset_text_field],
padding=False,
)
return {"input_ids": outputs["input_ids"]}
return dataset.map(
tokenize,
batched=True,
remove_columns=dataset.column_names,
num_proc=training_args.dataset_num_proc,
)
# Compute that only on the main process for faster data processing.
# see: https://github.com/huggingface/trl/pull/1255
with PartialState().local_main_process_first():
train_dataset = prepare_dataset(train_dataset, tokenizer)
eval_dataset = prepare_dataset(eval_dataset, tokenizer)
################
# Training
################
trainer = PPOTrainer( # PPOTrainer( PPOv2Trainer
config=training_args,
processing_class=tokenizer,
policy=policy,
ref_policy=ref_policy,
reward_model=reward_model,
value_model=value_model,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
)
trainer.train()
Я пробую эту функцию на модели на основе Llama.
Понятия не имею, в чем может быть проблема...< /p>
Благодарю вас за любые советы, которые вы можете мне дать!
РЕДАКТИРОВАТЬ: добавьте сюда дополнительную информацию: https://github.com/huggingface/trl /issues/2375
Я пытаюсь запустить сценарий PPO с использованием Transformers и TRL. Однако на этапе сохранения модели возникает ошибка: [code]Traceback (most recent call last):
File "/run/determined/workdir/main.py", line 247, in main(training_args, det_callback, hparams) File "/run/determined/workdir/main.py", line 216, in main trainer.train() File "/run/determined/pythonuserbase/lib/python3.10/site-packages/trl/trainer/ppo_trainer.py", line 570, in train self._save_checkpoint(model, trial=None) File "/run/determined/pythonuserbase/lib/python3.10/site-packages/transformers/trainer.py", line 3097, in _save_checkpoint self.save_model(output_dir, _internal_call=True) File "/run/determined/pythonuserbase/lib/python3.10/site-packages/trl/trainer/ppo_trainer.py", line 260, in save_model self.model = self.model.policy # save only the policy File "/run/determined/pythonuserbase/lib/python3.10/site-packages/torch/nn/modules/module.py", line 1931, in __getattr__ raise AttributeError( AttributeError: 'DistributedDataParallel' object has no attribute 'policy' [/code] Ошибка возникает во время обучения PPOTrainer. Использование этой версии или v2 вызывает одну и ту же ошибку. Мой код взят из примера trl здесь: https://github.com/huggingface/trl/blob/main/examples/ скрипты/ppo/ppo.py [code]def main(training_args, det_callback, model_config):
################ # Model & Tokenizer ################
tokenizer = AutoTokenizer.from_pretrained( model_config["model_name_or_path"], padding_side="left", trust_remote_code=model_config["trust_remote_code"], #attn_implementation="flash_attention_2" ) tokenizer.add_special_tokens({"pad_token": "[PAD]"}) if tokenizer.chat_template is None: tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE value_model = AutoModelForSequenceClassification.from_pretrained( training_args.reward_model_path, trust_remote_code=model_config["trust_remote_code"], num_labels=1, #attn_implementation="flash_attention_2" ) reward_model = AutoModelForSequenceClassification.from_pretrained( training_args.reward_model_path, trust_remote_code=model_config["trust_remote_code"], num_labels=1, #attn_implementation="flash_attention_2" ) ref_policy = AutoModelForCausalLM.from_pretrained( training_args.sft_model_path, trust_remote_code=model_config["trust_remote_code"], #attn_implementation="flash_attention_2" ) policy = AutoModelForCausalLM.from_pretrained( training_args.sft_model_path, trust_remote_code=model_config["trust_remote_code"], #attn_implementation="flash_attention_2" ) ################ # Dataset ################ dataset = load_jsonl_dataset(model_config["dataset_name"], split=model_config["dataset_train_split"]) eval_samples = 100 dataset = dataset.filter( lambda example: sum(len(example[col]) for col in ['system', 'prompt', 'chosen', 'rejected']) < 4096 # lo so lo so, dovrei contare i token, ma va bene così per ora ) train_dataset = dataset.select(range(len(dataset) - eval_samples)) eval_dataset = dataset.select(range(len(dataset) - eval_samples, len(dataset))) dataset_text_field = "prompt"
def prepare_dataset(dataset, tokenizer): """pre-tokenize the dataset before training; only collate during training"""
# Compute that only on the main process for faster data processing. # see: https://github.com/huggingface/trl/pull/1255 with PartialState().local_main_process_first(): train_dataset = prepare_dataset(train_dataset, tokenizer) eval_dataset = prepare_dataset(eval_dataset, tokenizer)
trainer.train() [/code] Я пробую эту функцию на модели на основе Llama. Понятия не имею, в чем может быть проблема...< /p> Благодарю вас за любые советы, которые вы можете мне дать! РЕДАКТИРОВАТЬ: добавьте сюда дополнительную информацию: https://github.com/huggingface/trl /issues/2375