Я создаю эталон для оценки языковой модели. Во -первых, я сгенерировал набор данных, с которым я собираюсь предложить языковую модель. Впоследствии я попытался оценить любую крошечную языковую модель, чтобы получить сквозное небольшое функционирующее приложение. Я следовал инструкции по документам об объятиях Lighteval Libration, но я продолжаю получать ошибку при попытке запустить код оценки. given that i am using Python 3.11 / tried 3.10 and the same happens.
error:
py .\run_evaluation.py
Traceback (most recent call last):
File "C:\Users\Mahmoud\Desktop\codings\benchmark-eval-llm\run_evaluation.py", line 48, in
main()
File "C: \ users \ mahmoud \ desktop \ codings \ bendarmmark-eval-llm \ run_evaluation.py", строка 21, в main
pipeline_params = pipelineparameters (
^^^^^^^^^^^^^^^^^^^^^
typeerror: pipelineprameters. 'custom_task_directory'
Пользовательская задача должна быть идентификация синонимов
Вот все части кода со структурой проекта до сих пор.
-Benchmark-ll
[*] />---- custom_csi_task.py
[*]csi_benchmark_advanced.jsonl
[*]run_evaluation.py
< /ul>
evaluation.py>
import numpy as np
from lighteval.tasks.lighteval_task import LightevalTask
from lighteval.tasks.requests import Doc
class CustomCSI(LightevalTask):
def doc_to_text(self, doc: Doc) -> str:
return doc["question"]
def doc_to_target(self, doc: Doc) -> int:
return doc["choices"].index(doc["answer"])
def construct_requests(self, doc: Doc, ctx: str) -> list:
from lighteval.tasks.requests import Request
return [Request(request_type="loglikelihood", args=(ctx, " " + choice)) for choice in doc["choices"]]
def process_results(self, doc: Doc, results: list) -> dict:
prediction_index = np.argmax(results)
ground_truth_index = self.doc_to_target(doc)
return {"acc": 1 if prediction_index == ground_truth_index else 0}
# Register this task via callable
def custom_csi():
return CustomCSI()
run_evaluation.py
import lighteval
from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import ParallelismManager, Pipeline, PipelineParameters
from lighteval.utils.imports import is_accelerate_available
if is_accelerate_available():
from datetime import timedelta
from accelerate import Accelerator, InitProcessGroupKwargs
accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
else:
accelerator = None
def main():
evaluation_tracker = EvaluationTracker(
output_dir="./results",
save_details=True,
)
pipeline_params = PipelineParameters(
launcher_type=ParallelismManager.ACCELERATE,
custom_task_directory='evaluation', # if using a custom task
# Remove the 2 parameters below once your configuration is tested
)
model_config = VLLMModelConfig(
model_name="HuggingFaceH4/zephyr-7b-beta",
dtype="float16",
use_chat_template=True,
)
task = "helm|mmlu|5|1"
pipeline = Pipeline(
# tasks=task,
pipeline_parameters=pipeline_params,
evaluation_tracker=evaluation_tracker,
model_config=model_config,
)
pipeline.evaluate()
pipeline.save_and_push_results()
pipeline.show_results()
if __name__ == "__main__":
main()
< /code>
csi_benchmark_advanced.jsonl
{"task_id": "CSI-EN-DE-001", "task_type": "cross_lingual_synonym_identification", "source_word": "house", "source_lang": "EN", "target_lang": "DE", "question": "Which word has the same meaning as 'house' in German?", "choices": ["Torbau", "häusliches Arbeitszimmer", "Haus", "Dachboden"], "answer": "Haus"}
{"task_id": "CSI-EN-FR-002", "task_type": "cross_lingual_synonym_identification", "source_word": "house", "source_lang": "EN", "target_lang": "FR", "question": "Which word has the same meaning as 'house' in French?", "choices": ["porche", "galetas", "maison", "bibliothèque"], "answer": "maison"}
{"task_id": "CSI-EN-DE-003", "task_type": "cross_lingual_synonym_identification", "source_word": "water", "source_lang": "EN", "target_lang": "DE", "question": "Which word has the same meaning as 'water' in German?", "choices": ["Flaschenwasser", "Eiswasser", "Trinkwasser", "Sodawasser"], "answer": "Trinkwasser"}
{"task_id": "CSI-EN-FR-004", "task_type": "cross_lingual_synonym_identification", "source_word": "water", "source_lang": "EN", "target_lang": "FR", "question": "Which word has the same meaning as 'water' in French?", "choices": ["eau potable", "eau gazeuse", "eau en bouteille", "l'eau glacée"], "answer": "eau potable"}
{"task_id": "CSI-EN-DE-005", "task_type": "cross_lingual_synonym_identification", "source_word": "sun", "source_lang": "EN", "target_lang": "DE", "question": "Which word has the same meaning as 'sun' in German?", "choices": ["ruhetag", "Sonntag", "Eins", "Weekend"], "answer": "Sonntag"}
{"task_id": "CSI-EN-FR-006", "task_type": "cross_lingual_synonym_identification", "source_word": "sun", "source_lang": "EN", "target_lang": "FR", "question": "Which word has the same meaning as 'sun' in French?", "choices": ["fin de semaine", "un", "jour de repos", "dimanche"], "answer": "dimanche"}
{"task_id": "CSI-EN-DE-007", "task_type": "cross_lingual_synonym_identification", "source_word": "tree", "source_lang": "EN", "target_lang": "DE", "question": "Which word has the same meaning as 'tree' in German?", "choices": ["Kernholz", "Wald", "Splintholz", "Baum"], "answer": "Baum"}
{"task_id": "CSI-EN-FR-008", "task_type": "cross_lingual_synonym_identification", "source_word": "tree", "source_lang": "EN", "target_lang": "FR", "question": "Which word has the same meaning as 'tree' in French?", "choices": ["aubier", "bois", "arbre", "duramen"], "answer": "arbre"}
{"task_id": "CSI-EN-FR-009", "task_type": "cross_lingual_synonym_identification", "source_word": "eat", "source_lang": "EN", "target_lang": "FR", "question": "Which word has the same meaning as 'eat' in French?", "choices": ["essuyer", "consommer", "petit", "occuper"], "answer": "consommer"}
Подробнее здесь: https://stackoverflow.com/questions/796 ... -lighteval
Оценка языковой модели с пользовательской задачей - Объятие Face Lileateval ⇐ Python
-
- Похожие темы
- Ответы
- Просмотры
- Последнее сообщение
-
-
Использование языковой модели Phi-3-Mini квантовая версия в ноутбуке Юпитера
Anonymous » » в форуме Python - 0 Ответы
- 35 Просмотры
-
Последнее сообщение Anonymous
-