Я создал чат-бот с базой знаний в формате PDF, обновил данные PDF в Astradb.
Он успешно выполняется, но я интегрировал модель Mistral 7B LLM в свой код Python и получаю результат успешно, но время, необходимое для получения ответа, превышает 1 минуту. Как я могу сократить время?
Я попробовал приведенный ниже код получения ответа на запрос:
class EnhancedChatbot:
def __init__(self, collection):
self.llm = global_llm
self.collection = collection
self.prompt_template = PromptTemplate(
template="""
Based on the following context, please answer the question.
Context: {context}
Question: {question}
Answer:""",
input_variables=["context", "question"]
)
def get_relevant_documents(self, query: str) -> List[Document]:
try:
embeddings_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2",
model_kwargs={'device': 'cpu'},
encode_kwargs={'normalize_embeddings': False}
)
query_embedding = np.array(embeddings_model.embed_query(query))
results = self.collection.find()
if not results or not isinstance(results, dict) or 'data' not in results:
print("No documents found in collection")
return []
documents = results['data'].get('documents', [])
if not documents:
print("No documents in collection data")
return []
docs_with_similarity = []
for doc in documents:
try:
if not isinstance(doc, dict):
continue
content = doc.get('content')
embedding = doc.get('embedding')
if not content or not embedding:
continue
embedding = np.array(embedding)
if embedding.shape[0] != 768:
print(f"Skipping document with invalid embedding shape: {embedding.shape}")
continue
similarity = euclidean_distances([query_embedding], [embedding])[0][0]
docs_with_similarity.append(
Document(
page_content=content,
metadata={'similarity': similarity, **doc.get('metadata', {})}
)
)
except Exception as doc_error:
print(f"Error processing document: {str(doc_error)}")
continue
docs_with_similarity.sort(key=lambda x: x.metadata['similarity'], reverse=True)
return docs_with_similarity[:6]
except Exception as e:
print(f"Error in get_relevant_documents: {str(e)}")
return []
def answer_query(self, query: str) -> str:
try:
if not query or not isinstance(query, str):
return self.format_response("Invalid query provided", [], query)
relevant_docs = self.get_relevant_documents(query)
if not relevant_docs:
return self.format_response(
"No relevant information found in the database.",
[],
query
)
text_splitter = TokenTextSplitter(
chunk_size=200,
chunk_overlap=20
)
texts = text_splitter.split_documents(relevant_docs)
max_chunks = 3
texts = texts[:max_chunks]
llm_chain = LLMChain(
llm=self.llm,
prompt=self.prompt_template
)
chain = StuffDocumentsChain(
llm_chain=llm_chain,
document_variable_name="context"
)
context = "\n".join([doc.page_content for doc in texts])
max_context_chars = 800
if len(context) > max_context_chars:
context = context[:max_context_chars] + "..."
response = chain({
"question": query,
"input_documents": [Document(page_content=context)]
})
response_text = response.get('output_text', 'No response generated')
print(f"Response text: {response_text}")
return self.format_response(
response_text,
[doc.metadata.get('source', 'Unknown') for doc in relevant_docs],
query
)
except Exception as e:
print(f"Error in answer_query: {str(e)}")
return self.format_response(
f"An error occurred while processing your query: {str(e)}",
[],
query
)
def format_response(self, message: str, sources: List[str], query: str) -> str:
try:
sources_text = "\n".join([f"{source}" for source in sources]) if sources else "No relevant sources found"
print(f"source response: {sources_text}")
return f"""
{message}
Query: {query}
{message if sources else f"Unable to provide detailed information due to: {message}"}
{sources_text}
"""
except Exception as e:
print(f"Error formatting response: {str(e)}")
return f"""
Error formatting response
An error occurred while formatting the response
No sources available
"""
def train_on_example(self, example: Dict[str, str]):
"""Add a training example to improve responses"""
try:
if not isinstance(example, dict) or 'question' not in example or 'answer' not in example:
raise ValueError("Invalid training example format")
formatted_prompt = self.prompt_template.format(
context="",
question=example['question']
)
training_example = {
'prompt': formatted_prompt,
'response': example['answer'],
'metadata': {
'timestamp': str(time.time()),
'type': 'training_example'
}
}
if self.collection:
self.collection.insert_one(training_example)
print(f"Training example added successfully for question: {example['question']}")
else:
raise ValueError("Collection not initialized")
except Exception as e:
print(f"Error adding training example: {str(e)}")
def initialize_application(model_path: str):
"""Initialize the application with the LLM"""
try:
initialize_llm(model_path, model_type='mistral')
print("Application initialized successfully")
except Exception as e:
print(f"Failed to initialize application: {str(e)}")
raise
def answer_query(query: str):
if chatbot_collection is None:
print("Collection not initialized")
return None
try:
chatbot = EnhancedChatbot(collection=chatbot_collection)
return chatbot.answer_query(query)
except Exception as e:
print(f"Error processing query: {str(e)}")
return None
@app.route('/query', methods=\['POST'\])
async def query():
try:
authenticated = is_authenticated()
print(f"authenticated: {authenticated}", flush=True)
if not authenticated:
if not check_query_limit():
print("Query limit exceeded", flush=True)
return jsonify({
"error": "Query limit exceeded. Please login to continue.",
"status": "error",
"requires_login": True
}), 403
session['query_count'] = session.get('query_count', 0) + 1
data = request.json # This parses the JSON automatically
if not data:
return jsonify({"error": "No data provided", "status": "error"}), 400
question = data.get('query')
session_id = data.get('session_id')
if not question:
return jsonify({"error": "No question provided", "status": "error"}), 400
user_id = None
if authenticated:
auth_header = request.headers.get('Authorization')
token = auth_header.split(' ')[1]
user_id = decode_jwt_token(token)
if not session_id:
user = db.users.find_one(
{
"_id": ObjectId(user_id),
"queries.status": "active"
},
{"queries": {"$elemMatch": {"status": "active"}}}
)
if user and 'queries' in user and user['queries']:
session_id = user['queries'][0]['session_id']
else:
# Create new session if no active session exists
new_session = {
"session_id": str(uuid.uuid4()),
"created_at": datetime.utcnow(),
"status": "active",
"queries": []
}
result = db.users.update_one(
{"_id": ObjectId(user_id)},
{
"$push": {
"queries": new_session
}
}
)
if result.modified_count == 0:
return jsonify({
"error": "Failed to create session",
"status": "error"
}), 500
session_id = new_session["session_id"]
response = answer_query(question)
if not response:
return jsonify({
"error": "Failed to generate response",
"status": "error"
}), 500
if authenticated and user_id and session_id:
try:
# Check if the session is active
user = db.users.find_one({
"_id": ObjectId(user_id),
"queries": {
"$elemMatch": {
"session_id": session_id,
"status": "active"
}
}
})
if not user:
return jsonify({
"error": "Session not found or has ended",
"status": "error"
}), 400
query_data = {
"query": question,
"response": response,
"timestamp": datetime.utcnow()
}
result = db.users.update_one(
{
"_id": ObjectId(user_id),
"queries.session_id": session_id
},
{
"$push": {
"queries.$.queries": query_data
}
}
)
if not result.modified_count:
logger.error(f"Failed to update session {session_id}")
except Exception as e:
logger.error(f"Error storing query in session: {str(e)}")
remaining_queries = 2 - session.get('query_count', 0) if not authenticated else None
return jsonify({
"answer": response,
"status": "success",
"remaining_queries": remaining_queries,
"authenticated": authenticated,
"session_id": session_id
}), 200
except Exception as e:
logger.error(f"Query endpoint error: {str(e)}")
return jsonify({
"error": f"An error occurred: {str(e)}",
"status": "error"
}), 500`your text`
if __name__ == '__main__':
connect_to_astra_db()
initialize_application('./models/models/capybarahermes-2.5-mistral-7b.Q4_K_M.gguf')
app.run(host='0.0.0.0', port=5000)
Подробнее здесь: https://stackoverflow.com/questions/792 ... -llm-model
Получение позднего ответа на модель Llama mistral 7b LLM ⇐ Python
Программы на Python
-
Anonymous
1731938279
Anonymous
Я создал чат-бот с базой знаний в формате PDF, обновил данные PDF в Astradb.
Он успешно выполняется, но я интегрировал модель Mistral 7B LLM в свой код Python и получаю результат успешно, но время, необходимое для получения ответа, превышает 1 минуту. Как я могу сократить время?
Я попробовал приведенный ниже код получения ответа на запрос:
class EnhancedChatbot:
def __init__(self, collection):
self.llm = global_llm
self.collection = collection
self.prompt_template = PromptTemplate(
template="""
Based on the following context, please answer the question.
Context: {context}
Question: {question}
Answer:""",
input_variables=["context", "question"]
)
def get_relevant_documents(self, query: str) -> List[Document]:
try:
embeddings_model = HuggingFaceEmbeddings(
model_name="sentence-transformers/all-mpnet-base-v2",
model_kwargs={'device': 'cpu'},
encode_kwargs={'normalize_embeddings': False}
)
query_embedding = np.array(embeddings_model.embed_query(query))
results = self.collection.find()
if not results or not isinstance(results, dict) or 'data' not in results:
print("No documents found in collection")
return []
documents = results['data'].get('documents', [])
if not documents:
print("No documents in collection data")
return []
docs_with_similarity = []
for doc in documents:
try:
if not isinstance(doc, dict):
continue
content = doc.get('content')
embedding = doc.get('embedding')
if not content or not embedding:
continue
embedding = np.array(embedding)
if embedding.shape[0] != 768:
print(f"Skipping document with invalid embedding shape: {embedding.shape}")
continue
similarity = euclidean_distances([query_embedding], [embedding])[0][0]
docs_with_similarity.append(
Document(
page_content=content,
metadata={'similarity': similarity, **doc.get('metadata', {})}
)
)
except Exception as doc_error:
print(f"Error processing document: {str(doc_error)}")
continue
docs_with_similarity.sort(key=lambda x: x.metadata['similarity'], reverse=True)
return docs_with_similarity[:6]
except Exception as e:
print(f"Error in get_relevant_documents: {str(e)}")
return []
def answer_query(self, query: str) -> str:
try:
if not query or not isinstance(query, str):
return self.format_response("Invalid query provided", [], query)
relevant_docs = self.get_relevant_documents(query)
if not relevant_docs:
return self.format_response(
"No relevant information found in the database.",
[],
query
)
text_splitter = TokenTextSplitter(
chunk_size=200,
chunk_overlap=20
)
texts = text_splitter.split_documents(relevant_docs)
max_chunks = 3
texts = texts[:max_chunks]
llm_chain = LLMChain(
llm=self.llm,
prompt=self.prompt_template
)
chain = StuffDocumentsChain(
llm_chain=llm_chain,
document_variable_name="context"
)
context = "\n".join([doc.page_content for doc in texts])
max_context_chars = 800
if len(context) > max_context_chars:
context = context[:max_context_chars] + "..."
response = chain({
"question": query,
"input_documents": [Document(page_content=context)]
})
response_text = response.get('output_text', 'No response generated')
print(f"Response text: {response_text}")
return self.format_response(
response_text,
[doc.metadata.get('source', 'Unknown') for doc in relevant_docs],
query
)
except Exception as e:
print(f"Error in answer_query: {str(e)}")
return self.format_response(
f"An error occurred while processing your query: {str(e)}",
[],
query
)
def format_response(self, message: str, sources: List[str], query: str) -> str:
try:
sources_text = "\n".join([f"{source}" for source in sources]) if sources else "No relevant sources found"
print(f"source response: {sources_text}")
return f"""
{message}
Query: {query}
{message if sources else f"Unable to provide detailed information due to: {message}"}
{sources_text}
"""
except Exception as e:
print(f"Error formatting response: {str(e)}")
return f"""
Error formatting response
An error occurred while formatting the response
No sources available
"""
def train_on_example(self, example: Dict[str, str]):
"""Add a training example to improve responses"""
try:
if not isinstance(example, dict) or 'question' not in example or 'answer' not in example:
raise ValueError("Invalid training example format")
formatted_prompt = self.prompt_template.format(
context="",
question=example['question']
)
training_example = {
'prompt': formatted_prompt,
'response': example['answer'],
'metadata': {
'timestamp': str(time.time()),
'type': 'training_example'
}
}
if self.collection:
self.collection.insert_one(training_example)
print(f"Training example added successfully for question: {example['question']}")
else:
raise ValueError("Collection not initialized")
except Exception as e:
print(f"Error adding training example: {str(e)}")
def initialize_application(model_path: str):
"""Initialize the application with the LLM"""
try:
initialize_llm(model_path, model_type='mistral')
print("Application initialized successfully")
except Exception as e:
print(f"Failed to initialize application: {str(e)}")
raise
def answer_query(query: str):
if chatbot_collection is None:
print("Collection not initialized")
return None
try:
chatbot = EnhancedChatbot(collection=chatbot_collection)
return chatbot.answer_query(query)
except Exception as e:
print(f"Error processing query: {str(e)}")
return None
@app.route('/query', methods=\['POST'\])
async def query():
try:
authenticated = is_authenticated()
print(f"authenticated: {authenticated}", flush=True)
if not authenticated:
if not check_query_limit():
print("Query limit exceeded", flush=True)
return jsonify({
"error": "Query limit exceeded. Please login to continue.",
"status": "error",
"requires_login": True
}), 403
session['query_count'] = session.get('query_count', 0) + 1
data = request.json # This parses the JSON automatically
if not data:
return jsonify({"error": "No data provided", "status": "error"}), 400
question = data.get('query')
session_id = data.get('session_id')
if not question:
return jsonify({"error": "No question provided", "status": "error"}), 400
user_id = None
if authenticated:
auth_header = request.headers.get('Authorization')
token = auth_header.split(' ')[1]
user_id = decode_jwt_token(token)
if not session_id:
user = db.users.find_one(
{
"_id": ObjectId(user_id),
"queries.status": "active"
},
{"queries": {"$elemMatch": {"status": "active"}}}
)
if user and 'queries' in user and user['queries']:
session_id = user['queries'][0]['session_id']
else:
# Create new session if no active session exists
new_session = {
"session_id": str(uuid.uuid4()),
"created_at": datetime.utcnow(),
"status": "active",
"queries": []
}
result = db.users.update_one(
{"_id": ObjectId(user_id)},
{
"$push": {
"queries": new_session
}
}
)
if result.modified_count == 0:
return jsonify({
"error": "Failed to create session",
"status": "error"
}), 500
session_id = new_session["session_id"]
response = answer_query(question)
if not response:
return jsonify({
"error": "Failed to generate response",
"status": "error"
}), 500
if authenticated and user_id and session_id:
try:
# Check if the session is active
user = db.users.find_one({
"_id": ObjectId(user_id),
"queries": {
"$elemMatch": {
"session_id": session_id,
"status": "active"
}
}
})
if not user:
return jsonify({
"error": "Session not found or has ended",
"status": "error"
}), 400
query_data = {
"query": question,
"response": response,
"timestamp": datetime.utcnow()
}
result = db.users.update_one(
{
"_id": ObjectId(user_id),
"queries.session_id": session_id
},
{
"$push": {
"queries.$.queries": query_data
}
}
)
if not result.modified_count:
logger.error(f"Failed to update session {session_id}")
except Exception as e:
logger.error(f"Error storing query in session: {str(e)}")
remaining_queries = 2 - session.get('query_count', 0) if not authenticated else None
return jsonify({
"answer": response,
"status": "success",
"remaining_queries": remaining_queries,
"authenticated": authenticated,
"session_id": session_id
}), 200
except Exception as e:
logger.error(f"Query endpoint error: {str(e)}")
return jsonify({
"error": f"An error occurred: {str(e)}",
"status": "error"
}), 500`your text`
if __name__ == '__main__':
connect_to_astra_db()
initialize_application('./models/models/capybarahermes-2.5-mistral-7b.Q4_K_M.gguf')
app.run(host='0.0.0.0', port=5000)
Подробнее здесь: [url]https://stackoverflow.com/questions/79200068/getting-late-response-of-llama-mistral-7b-llm-model[/url]
Ответить
1 сообщение
• Страница 1 из 1
Перейти
- Кемерово-IT
- ↳ Javascript
- ↳ C#
- ↳ JAVA
- ↳ Elasticsearch aggregation
- ↳ Python
- ↳ Php
- ↳ Android
- ↳ Html
- ↳ Jquery
- ↳ C++
- ↳ IOS
- ↳ CSS
- ↳ Excel
- ↳ Linux
- ↳ Apache
- ↳ MySql
- Детский мир
- Для души
- ↳ Музыкальные инструменты даром
- ↳ Печатная продукция даром
- Внешняя красота и здоровье
- ↳ Одежда и обувь для взрослых даром
- ↳ Товары для здоровья
- ↳ Физкультура и спорт
- Техника - даром!
- ↳ Автомобилистам
- ↳ Компьютерная техника
- ↳ Плиты: газовые и электрические
- ↳ Холодильники
- ↳ Стиральные машины
- ↳ Телевизоры
- ↳ Телефоны, смартфоны, плашеты
- ↳ Швейные машинки
- ↳ Прочая электроника и техника
- ↳ Фототехника
- Ремонт и интерьер
- ↳ Стройматериалы, инструмент
- ↳ Мебель и предметы интерьера даром
- ↳ Cантехника
- Другие темы
- ↳ Разное даром
- ↳ Давай меняться!
- ↳ Отдам\возьму за копеечку
- ↳ Работа и подработка в Кемерове
- ↳ Давай с тобой поговорим...
Мобильная версия