Azure AI Studio – сценарий выполняется неопределенно долго, без ошибок и выходных данных.Python

Программы на Python
Ответить
Anonymous
 Azure AI Studio – сценарий выполняется неопределенно долго, без ошибок и выходных данных.

Сообщение Anonymous »

Я следую этому руководству, чтобы начать работу с Azure AI Studio. Сценарий create_search_index.py успешно выполнился и создал индекс. Но сценарии get_product_documents.py иchat_with_products.py не выдают никаких ошибок или вывода при запуске, они просто продолжают работать бесконечно.
Есть идеи, что может быть происходит? И что я могу попробовать, чтобы решить эту проблему?

Вот скрипты по ссылке:
### config.py

# ruff: noqa: ANN201, ANN001

import os
import sys
import pathlib
import logging
from azure.identity import DefaultAzureCredential
from azure.ai.projects import AIProjectClient
from azure.ai.inference.tracing import AIInferenceInstrumentor

# load environment variables from the .env file
from dotenv import load_dotenv

load_dotenv()

# Set "./assets" as the path where assets are stored, resolving the absolute path:
ASSET_PATH = pathlib.Path(__file__).parent.resolve() / "assets"

# Configure an root app logger that prints info level logs to stdout
logger = logging.getLogger("app")
logger.setLevel(logging.INFO)
logger.addHandler(logging.StreamHandler(stream=sys.stdout))

# Returns a module-specific logger, inheriting from the root app logger
def get_logger(module_name):
return logging.getLogger(f"app.{module_name}")

# Enable instrumentation and logging of telemetry to the project
def enable_telemetry(log_to_project: bool = False):
AIInferenceInstrumentor().instrument()

# enable logging message contents
os.environ["AZURE_TRACING_GEN_AI_CONTENT_RECORDING_ENABLED"] = "true"

if log_to_project:
from azure.monitor.opentelemetry import configure_azure_monitor

project = AIProjectClient.from_connection_string(
conn_str=os.environ["AIPROJECT_CONNECTION_STRING"], credential=DefaultAzureCredential()
)
tracing_link = f"https://ai.azure.com/tracing?wsid=/subs ... ject.scope['subscription_id']}/resourceGroups/{project.scope['resource_group_name']}/providers/Microsoft.MachineLearningServices/workspaces/{project.scope['project_name']}"
application_insights_connection_string = project.telemetry.get_connection_string()
if not application_insights_connection_string:
logger.warning(
"No application insights configured, telemetry will not be logged to project. Add application insights at:"
)
logger.warning(tracing_link)

return

configure_azure_monitor(connection_string=application_insights_connection_string)
logger.info("Enabled telemetry logging to project, view traces at:")
logger.info(tracing_link)


### create_search_index.py

import os
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import ConnectionType
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from azure.search.documents.indexes import SearchIndexClient
from config import get_logger

# initialize logging object
logger = get_logger(__name__)

# create a project client using environment variables loaded from the .env file
# project = AIProjectClient.from_connection_string(
# conn_str=os.environ["AIPROJECT_CONNECTION_STRING"], credential=DefaultAzureCredential()
# )

API_ENDPOINT = os.environ["API_ENDPOINT"]
API_KEY = os.environ["API_KEY"]
SUBSCRIPTION_ID = os.environ["SUBSCRIPTION_ID"]
RESOURCE_GROUP_NAME = os.environ["RESOURCE_GROUP_NAME"]
PROJECT_NAME = os.environ["PROJECT_NAME"]

# Initialize the AIProjectClient
project = AIProjectClient(
endpoint=API_ENDPOINT,
credential=DefaultAzureCredential(),
subscription_id=SUBSCRIPTION_ID,
resource_group_name=RESOURCE_GROUP_NAME,
project_name=PROJECT_NAME
)

# create a vector embeddings client that will be used to generate vector embeddings
embeddings = project.inference.get_embeddings_client()

# use the project client to get the default search connection
search_connection = project.connections.get_default(
connection_type=ConnectionType.AZURE_AI_SEARCH, include_credentials=True
)

# Create a search index client using the search connection
# This client will be used to create and delete search indexes
index_client = SearchIndexClient(
endpoint=search_connection.endpoint_url, credential=AzureKeyCredential(key=search_connection.key)
)

### Define a search index
import pandas as pd
from azure.search.documents.indexes.models import (
SemanticSearch,
SearchField,
SimpleField,
SearchableField,
SearchFieldDataType,
SemanticConfiguration,
SemanticPrioritizedFields,
SemanticField,
VectorSearch,
HnswAlgorithmConfiguration,
VectorSearchAlgorithmKind,
HnswParameters,
VectorSearchAlgorithmMetric,
ExhaustiveKnnAlgorithmConfiguration,
ExhaustiveKnnParameters,
VectorSearchProfile,
SearchIndex,
)

def create_index_definition(index_name: str, model: str) -> SearchIndex:
dimensions = 1536 # text-embedding-ada-002
if model == "text-embedding-3-large":
dimensions = 3072

# The fields we want to index. The "embedding" field is a vector field that will
# be used for vector search.
fields = [
SimpleField(name="id", type=SearchFieldDataType.String, key=True),
SearchableField(name="content", type=SearchFieldDataType.String),
SimpleField(name="filepath", type=SearchFieldDataType.String),
SearchableField(name="title", type=SearchFieldDataType.String),
SimpleField(name="url", type=SearchFieldDataType.String),
SearchField(
name="contentVector",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
# Size of the vector created by the text-embedding-ada-002 model.
vector_search_dimensions=dimensions,
vector_search_profile_name="myHnswProfile",
),
]

# The "content" field should be prioritized for semantic ranking.
semantic_config = SemanticConfiguration(
name="default",
prioritized_fields=SemanticPrioritizedFields(
title_field=SemanticField(field_name="title"),
keywords_fields=[],
content_fields=[SemanticField(field_name="content")],
),
)

# For vector search, we want to use the HNSW (Hierarchical Navigable Small World)
# algorithm (a type of approximate nearest neighbor search algorithm) with cosine
# distance.
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="myHnsw",
kind=VectorSearchAlgorithmKind.HNSW,
parameters=HnswParameters(
m=4,
ef_construction=1000,
ef_search=1000,
metric=VectorSearchAlgorithmMetric.COSINE,
),
),
ExhaustiveKnnAlgorithmConfiguration(
name="myExhaustiveKnn",
kind=VectorSearchAlgorithmKind.EXHAUSTIVE_KNN,
parameters=ExhaustiveKnnParameters(metric=VectorSearchAlgorithmMetric.COSINE),
),
],
profiles=[
VectorSearchProfile(
name="myHnswProfile",
algorithm_configuration_name="myHnsw",
),
VectorSearchProfile(
name="myExhaustiveKnnProfile",
algorithm_configuration_name="myExhaustiveKnn",
),
],
)

# Create the semantic settings with the configuration
semantic_search = SemanticSearch(configurations=[semantic_config])

# Create the search index definition
return SearchIndex(
name=index_name,
fields=fields,
semantic_search=semantic_search,
vector_search=vector_search,
)

### add a csv file to the index
# define a function for indexing a csv file, that adds each row as a document
# and generates vector embeddings for the specified content_column
def create_docs_from_csv(path: str, content_column: str, model: str) -> list[dict[str, any]]:
products = pd.read_csv(path)
items = []
for product in products.to_dict("records"):
content = product[content_column]
id = str(product["id"])
title = product["name"]
url = f"/products/{title.lower().replace(' ', '-')}"
emb = embeddings.embed(input=content, model=model)
rec = {
"id": id,
"content": content,
"filepath": f"{title.lower().replace(' ', '-')}",
"title": title,
"url": url,
"contentVector": emb.data[0].embedding,
}
items.append(rec)

return items

def create_index_from_csv(index_name, csv_file):
# If a search index already exists, delete it:
try:
index_definition = index_client.get_index(index_name)
index_client.delete_index(index_name)
logger.info(f"🗑️ Found existing index named '{index_name}', and deleted it")
except Exception:
pass

# create an empty search index
index_definition = create_index_definition(index_name, model=os.environ["EMBEDDINGS_MODEL"])
index_client.create_index(index_definition)

# create documents from the products.csv file, generating vector embeddings for the "description" column
docs = create_docs_from_csv(path=csv_file, content_column="description", model=os.environ["EMBEDDINGS_MODEL"])

# Add the documents to the index using the Azure AI Search client
search_client = SearchClient(
endpoint=search_connection.endpoint_url,
index_name=index_name,
credential=AzureKeyCredential(key=search_connection.key),
)

search_client.upload_documents(docs)
logger.info(f"➕ Uploaded {len(docs)} documents to '{index_name}' index")

### run the functions to build the index and register it to the cloud project
if __name__ == "__main__":
import argparse

parser = argparse.ArgumentParser()
parser.add_argument(
"--index-name",
type=str,
help="index name to use when creating the AI Search index",
default=os.environ["AISEARCH_INDEX_NAME"],
)
parser.add_argument(
"--csv-file", type=str, help="path to data for creating search index", default="assets/products.csv"
)
args = parser.parse_args()
index_name = args.index_name
csv_file = args.csv_file

create_index_from_csv(index_name, csv_file)


### get_product_documents.py

import os
from pathlib import Path
from opentelemetry import trace
from azure.ai.projects import AIProjectClient
from azure.ai.projects.models import ConnectionType
from azure.identity import DefaultAzureCredential
from azure.core.credentials import AzureKeyCredential
from azure.search.documents import SearchClient
from config import ASSET_PATH, get_logger
from azure.ai.inference.prompts import PromptTemplate
from azure.search.documents.models import VectorizedQuery

# initialize logging and tracing objects
logger = get_logger(__name__)
tracer = trace.get_tracer(__name__)

# create a project client using environment variables loaded from the .env file
# project = AIProjectClient.from_connection_string(
# conn_str=os.environ["AIPROJECT_CONNECTION_STRING"], credential=DefaultAzureCredential()
# )

API_ENDPOINT = os.environ["API_ENDPOINT"]
API_KEY = os.environ["API_KEY"]
SUBSCRIPTION_ID = os.environ["SUBSCRIPTION_ID"]
RESOURCE_GROUP_NAME = os.environ["RESOURCE_GROUP_NAME"]
PROJECT_NAME = os.environ["PROJECT_NAME"]

# Initialize the AIProjectClient
project = AIProjectClient(
endpoint=API_ENDPOINT,
credential=DefaultAzureCredential(),
subscription_id=SUBSCRIPTION_ID,
resource_group_name=RESOURCE_GROUP_NAME,
project_name=PROJECT_NAME
)

# create a vector embeddings client that will be used to generate vector embeddings
chat = project.inference.get_chat_completions_client()
embeddings = project.inference.get_embeddings_client()

# use the project client to get the default search connection
search_connection = project.connections.get_default(
connection_type=ConnectionType.AZURE_AI_SEARCH, include_credentials=True
)

# Create a search index client using the search connection
# This client will be used to create and delete search indexes
search_client = SearchClient(
index_name=os.environ["AISEARCH_INDEX_NAME"],
endpoint=search_connection.endpoint_url,
credential=AzureKeyCredential(key=search_connection.key),
)

@tracer.start_as_current_span(name="get_product_documents")
def get_product_documents(messages: list, context: dict = None) -> dict:
if context is None:
context = {}

overrides = context.get("overrides", {})
top = overrides.get("top", 5)

# generate a search query from the chat messages
intent_prompty = PromptTemplate.from_prompty(Path(ASSET_PATH) / "intent_mapping.prompty")

intent_mapping_response = chat.complete(
model=os.environ["INTENT_MAPPING_MODEL"],
messages=intent_prompty.create_messages(conversation=messages),
**intent_prompty.parameters,
)

search_query = intent_mapping_response.choices[0].message.content
logger.debug(f"🧠 Intent mapping: {search_query}")

# generate a vector representation of the search query
embedding = embeddings.embed(model=os.environ["EMBEDDINGS_MODEL"], input=search_query)
search_vector = embedding.data[0].embedding

# search the index for products matching the search query
vector_query = VectorizedQuery(vector=search_vector, k_nearest_neighbors=top, fields="contentVector")

search_results = search_client.search(
search_text=search_query, vector_queries=[vector_query], select=["id", "content", "filepath", "title", "url"]
)

documents = [
{
"id": result["id"],
"content": result["content"],
"filepath": result["filepath"],
"title": result["title"],
"url": result["url"],
}
for result in search_results
]

# add results to the provided context
if "thoughts" not in context:
context["thoughts"] = []

# add thoughts and documents to the context object so it can be returned to the caller
context["thoughts"].append(
{
"title": "Generated search query",
"description": search_query,
}
)

if "grounding_data" not in context:
context["grounding_data"] = []
context["grounding_data"].append(documents)

logger.debug(f"📄 {len(documents)} documents retrieved: {documents}")
return documents

if __name__ == "__main__":
import logging
import argparse

# set logging level to debug when running this module directly
logger.setLevel(logging.DEBUG)

# load command line arguments
parser = argparse.ArgumentParser()
parser.add_argument(
"--query",
type=str,
help="Query to use to search product",
default="I need a new tent for 4 people, what would you recommend?",
)

args = parser.parse_args()
query = args.query

result = get_product_documents(messages=[{"role": "user", "content": query}])


### chat_with_products.py

import os
from pathlib import Path
from opentelemetry import trace
from azure.ai.projects import AIProjectClient
from azure.identity import DefaultAzureCredential
from config import ASSET_PATH, get_logger, enable_telemetry
from get_product_documents import get_product_documents
from azure.ai.inference.prompts import PromptTemplate

# initialize logging and tracing objects
logger = get_logger(__name__)
tracer = trace.get_tracer(__name__)

# create a project client using environment variables loaded from the .env file
# project = AIProjectClient.from_connection_string(
# conn_str=os.environ["AIPROJECT_CONNECTION_STRING"], credential=DefaultAzureCredential()
# )

API_ENDPOINT = os.environ["API_ENDPOINT"]
API_KEY = os.environ["API_KEY"]
SUBSCRIPTION_ID = os.environ["SUBSCRIPTION_ID"]
RESOURCE_GROUP_NAME = os.environ["RESOURCE_GROUP_NAME"]
PROJECT_NAME = os.environ["PROJECT_NAME"]

# Initialize the AIProjectClient
project = AIProjectClient(
endpoint=API_ENDPOINT,
credential=DefaultAzureCredential(),
subscription_id=SUBSCRIPTION_ID,
resource_group_name=RESOURCE_GROUP_NAME,
project_name=PROJECT_NAME
)

# create a chat client we can use for testing
chat = project.inference.get_chat_completions_client()

@tracer.start_as_current_span(name="chat_with_products")
def chat_with_products(messages: list, context: dict = None) -> dict:
if context is None:
context = {}

documents = get_product_documents(messages, context)

# do a grounded chat call using the search results
grounded_chat_prompt = PromptTemplate.from_prompty(Path(ASSET_PATH) / "grounded_chat.prompty")

system_message = grounded_chat_prompt.create_messages(documents=documents, context=context)
response = chat.complete(
model=os.environ["CHAT_MODEL"],
messages=system_message + messages,
**grounded_chat_prompt.parameters,
)
logger.info(f"💬 Response: {response.choices[0].message}")

# Return a chat protocol compliant response
return {"message": response.choices[0].message, "context": context}

if __name__ == "__main__":
import argparse

# load command line arguments
parser = argparse.ArgumentParser()
parser.add_argument(
"--query",
type=str,
help="Query to use to search product",
default="I need a new tent for 4 people, what would you recommend?",
)
parser.add_argument(
"--enable-telemetry",
action="store_true",
help="Enable sending telemetry back to the project",
)
args = parser.parse_args()
if args.enable_telemetry:
enable_telemetry(True)

# run chat with products
response = chat_with_products(messages=[{"role": "user", "content": args.query}])


Подробнее здесь: https://stackoverflow.com/questions/792 ... or-outputs
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Python»