Ниже приведен фрагмент кода Python, который создает индекс в ACS и загружает документы из блокнота Azure databricks.
Код: Выделить всё
from azure.search.documents.indexes import SearchIndexClient
from azure.search.documents.indexes.models import (
SimpleField,
SearchFieldDataType,
SearchableField,
SearchField,
VectorSearch,
HnswAlgorithmConfiguration,
VectorSearchProfile,
SemanticConfiguration,
SemanticPrioritizedFields,
SemanticField,
SemanticSearch,
SearchIndex,
AzureOpenAIVectorizer,
AzureOpenAIVectorizerParameters
)
# Azure Cognitive Search setup
service_endpoint = "https://yourserviceendpoint.search.windows.net"
admin_key = "ABC"
index_name = "courses-index"
# Wrap admin_key in AzureKeyCredential
credential = AzureKeyCredential(admin_key)
# Create the index client with AzureKeyCredential
index_client = SearchIndexClient(endpoint=service_endpoint, credential=credential)
# Define the index schema
fields = [
SimpleField(name="id", type="Edm.String", key=True),
SimpleField(name="content", type="Edm.String"),
SearchField(
name="embedding",
type=SearchFieldDataType.Collection(SearchFieldDataType.Single),
searchable=True,
vector_search_dimensions=384,
vector_search_profile_name="myHnswProfile"
)
# SearchField(name="embedding", type='Collection(Edm.Single)', searchable=True)
]
# Configure the vector search configuration
vector_search = VectorSearch(
algorithms=[
HnswAlgorithmConfiguration(
name="myHnsw"
)
],
profiles=[
VectorSearchProfile(
name="myHnswProfile",
algorithm_configuration_name="myHnsw"
)
]
)
# Create the index
index = SearchIndex(
name=index_name,
fields=fields,
vector_search=vector_search
)
# Send the index creation request
index_client.create_index(index)
print(f"Index '{index_name}' created successfully.")
Код: Выделить всё
from azure.search.documents import SearchClient
# Generate embeddings and upload data
search_client = SearchClient(endpoint=service_endpoint, index_name=index_name, credential=credential)
documents = []
for i, row in courses_pd.iterrows():
document = {
"id": str(i),
"content": row["content"],
"embedding": row["embeddings"] # Ensure embeddings are a list of floats
}
documents.append(document)
# Upload documents to the index
search_client.upload_documents(documents=documents)
print(f"Uploaded {len(documents)} documents to Azure Cognitive Search.")
Код: Выделить всё
from azure.search.documents.models import VectorQuery
# Generate embedding for the query
query = "machine learning"
query_embedding = model.encode(query).tolist() # Convert to list of floats
# Create a VectorQuery
vector_query = VectorQuery(
vector=query_embedding,
k=3, # Number of nearest neighbors
fields="embedding" # Name of the field where embeddings are stored
)
# Perform the search
results = search_client.search(
vector_queries=[vector_query],
select=["id", "content"]
)
# Print the results
for result in results:
print(f"ID: {result['id']}, Content: {result['content']}")
Код: Выделить всё
vector is not a known attribute of class and will be ignored
k is not a known attribute of class and will be ignored
HttpResponseError: (InvalidRequestParameter) The vector query's 'kind' parameter is not set.
Документы загружаются и создается индекс, как я вижу на портале.

Я, должно быть, делаю что-то не так, либо то, как я настроил поисковый индекс, либо то, как я запрашиваю индекс, документация и кодовая база GitHub ничего не дают по этому поводу, поэтому обращаюсь за помощью к сообществу, новичку в этом.
Подробнее здесь: https://stackoverflow.com/questions/793 ... x-creation