Я создал модель NER, используя BERT для обнаружения медицинских объектов, и она отлично работает. Я пытаюсь добавить слой CRF поверх моей модели BERT, чтобы улучшить ее производительность, но получаю ошибку, которую не могу устранить.
Вот ошибка:
ValueError Traceback (most recent call last)
in ()
83
84 # Start training
---> 85 trainer.train()
7 frames
/usr/local/lib/python3.10/dist-packages/torchcrf/__init__.py in _validate(self, emissions, tags, mask)
165 no_empty_seq_bf = self.batch_first and mask[:, 0].all()
166 if not no_empty_seq and not no_empty_seq_bf:
--> 167 raise ValueError('mask of the first timestep must all be on')
168
169 def _compute_score(
ValueError: mask of the first timestep must all be on
from transformers import TrainingArguments, Trainer
from torchcrf import CRF
import torch.nn as nn
from transformers import DataCollatorForTokenClassification
from transformers import AutoTokenizer, BertTokenizerFast
def tokenize_and_align_labels(examples):
tokenized_inputs = tokenizer(examples["tokens"], truncation=True, is_split_into_words=True)
labels = []
for i, label in enumerate(examples[f"ner_tags"]):
word_ids = tokenized_inputs.word_ids(batch_index=i)
previous_word_idx = None
label_ids = []
for word_idx in word_ids:
# Special tokens have a word id that is None. We set the label to -100 so they are automatically
# ignored in the loss function
if word_idx is None:
label_ids.append(-100)
# We set the label for the first token of each word.
elif word_idx != previous_word_idx:
label_ids.append(label[word_idx])
# For the other tokens in a word, we set the label to either the current label or -100, depending on
# the label_all_tokens flag.
else:
label_ids.append(label[word_idx] if label_all_tokens else -100)
previous_word_idx = word_idx
labels.append(label_ids)
tokenized_inputs["labels"] = labels
return tokenized_inputs
label_all_tokens = False
tokenizer = BertTokenizerFast.from_pretrained('bert-base-cased')
tokenized_data = my_dataset_dict.map(tokenize_and_align_labels, batched=True)
data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)
class BERT_CRF_Model(nn.Module):
def __init__(self, bert_model, num_labels):
super(BERT_CRF_Model, self).__init__()
self.bert = bert_model
self.dropout = nn.Dropout(0.1)
self.classifier = nn.Linear(self.bert.config.hidden_size, num_labels)
self.crf = CRF(num_labels, batch_first=True)
def forward(self, input_ids, attention_mask, labels=None):
outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask)
sequence_output = self.dropout(outputs[0]) # Last hidden state
emissions = self.classifier(sequence_output)
if labels is not None:
# CRF loss
loss = -self.crf(emissions, labels, mask=attention_mask.bool(), reduction='mean')
return loss
else:
# CRF decoding (prediction)
prediction = self.crf.decode(emissions, mask=attention_mask.bool())
return emissions # Make sure to return emissions here
class CustomTrainer(Trainer):
def __init__(self, *args, crf_layer=None, **kwargs):
super().__init__(*args, **kwargs)
self.crf_layer = crf_layer
def compute_loss(self, model, inputs, return_outputs=False):
labels = inputs.pop("labels") # Extraire les labels
emissions = model(**inputs) # Obtenir les émissions du modèle
emissions = torch.stack(emissions) if isinstance(emissions, list) else emissions
# Vérifiez le masque d'attention
mask = inputs["attention_mask"].bool()
if mask.size(0) == 0 or mask[:, 0].sum() == 0:
raise ValueError("Le masque du premier pas de temps doit être activé")
# Calculer la perte CRF
loss = -self.crf_layer(emissions, labels, mask=mask)
return (loss, inputs) if return_outputs else loss
# Load BERT model
from transformers import BertModel
bert_model = BertModel.from_pretrained("bert-base-cased")
model = BERT_CRF_Model(bert_model, num_labels=len(unique_labels))
crf_layer = CRF(num_tags=len(unique_labels))
training_args = TrainingArguments(
output_dir="my_awesome_ner_model",
evaluation_strategy="epoch",
learning_rate=2e-5,
per_device_train_batch_size=12,
per_device_eval_batch_size=12,
num_train_epochs=1,
weight_decay=0.01,
push_to_hub=True,
)
trainer = CustomTrainer(
model=model,
args=training_args,
train_dataset=tokenized_data["train"],
eval_dataset=tokenized_data["val"],
tokenizer=tokenizer,
data_collator=data_collator,
crf_layer=crf_layer # Pass the CRF layer
)
trainer.train()
Я не совсем понимаю, почему именно я получаю эту ошибку, буду благодарен за любую помощь!
Я создал модель NER, используя BERT для обнаружения медицинских объектов, и она отлично работает. Я пытаюсь добавить слой CRF поверх моей модели BERT, чтобы улучшить ее производительность, но получаю ошибку, которую не могу устранить. Вот ошибка: [code]ValueError Traceback (most recent call last) in () 83 84 # Start training ---> 85 trainer.train()
7 frames /usr/local/lib/python3.10/dist-packages/torchcrf/__init__.py in _validate(self, emissions, tags, mask) 165 no_empty_seq_bf = self.batch_first and mask[:, 0].all() 166 if not no_empty_seq and not no_empty_seq_bf: --> 167 raise ValueError('mask of the first timestep must all be on') 168 169 def _compute_score(
ValueError: mask of the first timestep must all be on [/code] Что касается моего кода, то вот он: [code]from transformers import TrainingArguments, Trainer from torchcrf import CRF import torch.nn as nn from transformers import DataCollatorForTokenClassification from transformers import AutoTokenizer, BertTokenizerFast
labels = [] for i, label in enumerate(examples[f"ner_tags"]):
word_ids = tokenized_inputs.word_ids(batch_index=i) previous_word_idx = None label_ids = [] for word_idx in word_ids: # Special tokens have a word id that is None. We set the label to -100 so they are automatically # ignored in the loss function if word_idx is None: label_ids.append(-100) # We set the label for the first token of each word. elif word_idx != previous_word_idx: label_ids.append(label[word_idx]) # For the other tokens in a word, we set the label to either the current label or -100, depending on # the label_all_tokens flag. else: label_ids.append(label[word_idx] if label_all_tokens else -100) previous_word_idx = word_idx
def forward(self, input_ids, attention_mask, labels=None): outputs = self.bert(input_ids=input_ids, attention_mask=attention_mask) sequence_output = self.dropout(outputs[0]) # Last hidden state emissions = self.classifier(sequence_output)
if labels is not None: # CRF loss loss = -self.crf(emissions, labels, mask=attention_mask.bool(), reduction='mean') return loss else: # CRF decoding (prediction) prediction = self.crf.decode(emissions, mask=attention_mask.bool()) return emissions # Make sure to return emissions here
def compute_loss(self, model, inputs, return_outputs=False): labels = inputs.pop("labels") # Extraire les labels emissions = model(**inputs) # Obtenir les émissions du modèle
emissions = torch.stack(emissions) if isinstance(emissions, list) else emissions
# Vérifiez le masque d'attention mask = inputs["attention_mask"].bool() if mask.size(0) == 0 or mask[:, 0].sum() == 0: raise ValueError("Le masque du premier pas de temps doit être activé")
# Calculer la perte CRF loss = -self.crf_layer(emissions, labels, mask=mask)
return (loss, inputs) if return_outputs else loss
# Load BERT model from transformers import BertModel
bert_model = BertModel.from_pretrained("bert-base-cased") model = BERT_CRF_Model(bert_model, num_labels=len(unique_labels))