Я разработал код машинного обучения с помощьюstreamlit как веб-приложение, но теперь, когда я хочу преобразовать его в API для развертывания веб-сервера с другим интерфейсом, я изо всех сил пытаюсь это сделать. Вот мой код:
import streamlit as st
from ultralytics import YOLO
from transformers import Blip2ForConditionalGeneration, AutoProcessor
from peft import PeftModel, PeftConfig
from PIL import Image
import os
import cv2
from fastapi import FastAPI
app = FastAPI()
# Global Constants
PREPROCESS_CKP = "./Salesforce/blip2-opt-2.7b"
PEFT_MODEL_ID = "ManhTien22/blip2-opt-2.7b-Kimono"
MODEL_PATH = "./model/last.pt"
SAVE_DIR = "./uploads"
# Load models once using cache
@st.cache_resource
@app.post('/detect')
def load_models():
try:
detection_model = YOLO(MODEL_PATH)
except Exception as ex:
st.error("Error loading YOLO model.")
st.error(ex)
return None, None
try:
processor = AutoProcessor.from_pretrained(PREPROCESS_CKP)
config = PeftConfig.from_pretrained(PEFT_MODEL_ID)
model = Blip2ForConditionalGeneration.from_pretrained(config.base_model_name_or_path)
model = PeftModel.from_pretrained(model, PEFT_MODEL_ID)
except Exception as ex:
st.error("Error loading BLIP-2 model.")
st.error(ex)
return None, None
return detection_model, (processor, model)
# Detect and crop objects
def detect_and_crop_objects(image_path, model, confidence=0.4):
result = model.predict(source=image_path, conf=confidence, save=False)
boxes = result[0].boxes
labels = result[0].names # Get detected labels
cropped_images = []
cropped_labels = []
img = cv2.imread(image_path)
for box in boxes:
xyxy = box.xyxy.cpu().numpy()[0]
xA, yA, xB, yB = map(int, xyxy)
cropped_image = img[yA:yB, xA:xB]
cropped_images.append(cropped_image)
cropped_labels.append(labels[int(box.cls.cpu().numpy()[0])]) # Append label for the box
return cropped_images, cropped_labels, result[0].plot()[:, :, ::-1]
# Generate caption
def generate_caption(image, processor, model, label=None):
"""
Generate a caption focused on the label using BLIP-2.
Parameters:
- image: PIL image input.
- processor: Processor for BLIP-2.
- model: BLIP-2 model.
- label: Label to focus on.
Returns:
- Caption describing the label.
"""
if label:
prompt = (
"What are the main clothing items (like {label}) visible in this image?"
)
# Input processing
inputs = processor(images=image, text=prompt, return_tensors="pt")
# Caption generation
generated_ids = model.generate(
pixel_values=inputs.pixel_values,
max_length=50, # Caption's max length
num_beams=3, # beam search setting
early_stopping=True,
repetition_penalty=2.0,
)
# Decode the result
caption = processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
return caption
# Streamlit App
st.title("Automated Clothing Descriptions Using Image-To-Text Generation")
# Load models
detection_model, (blip_processor, blip_model) = load_models()
if not detection_model or not blip_processor or not blip_model:
st.stop()
# Sidebar: Confidence Selection
confidence = st.sidebar.slider("Set Object Detection Confidence", 25, 100, 40) / 100
# File Uploader
uploaded_file = st.file_uploader("Upload an image", type=["jpg", "jpeg", "png"])
if uploaded_file:
img = Image.open(uploaded_file)
st.image(img, caption="Uploaded Image", use_column_width=True)
# Step 1: Detect Objects
if st.button("Detect Objects"):
img_path = os.path.join(SAVE_DIR, uploaded_file.name)
img.save(img_path)
cropped_images, cropped_labels, detection_visual = detect_and_crop_objects(img_path, detection_model, confidence)
st.session_state.detection_visual = detection_visual
st.session_state.cropped_images = cropped_images
st.session_state.cropped_labels = cropped_labels
# Display detected objects if available
if "detection_visual" in st.session_state:
st.image(st.session_state.detection_visual, caption="Detected Objects", use_column_width=True)
# Step 2: Generate Captions
if "cropped_images" in st.session_state and st.session_state.cropped_images:
if st.button("Generate Captions"):
st.subheader("Generated Captions")
for cropped_img, label in zip(st.session_state.cropped_images, st.session_state.cropped_labels):
cropped_pil_img = Image.fromarray(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB))
caption = generate_caption(cropped_pil_img, blip_processor, blip_model, label=label)
st.image(cropped_pil_img, caption=f"{label.capitalize()}: {caption}", use_column_width=True)
Я пытался изучить Flask или Fast API, но пока не понял их. Буду очень признателен за любую помощь.
Я пытался конвертировать с помощью Fast API, но пока не знаю, как это сделать.
Я разработал код машинного обучения с помощьюstreamlit как веб-приложение, но теперь, когда я хочу преобразовать его в API для развертывания веб-сервера с другим интерфейсом, я изо всех сил пытаюсь это сделать. Вот мой код: [code]import streamlit as st from ultralytics import YOLO from transformers import Blip2ForConditionalGeneration, AutoProcessor from peft import PeftModel, PeftConfig from PIL import Image import os import cv2
# Display detected objects if available if "detection_visual" in st.session_state: st.image(st.session_state.detection_visual, caption="Detected Objects", use_column_width=True)
# Step 2: Generate Captions if "cropped_images" in st.session_state and st.session_state.cropped_images: if st.button("Generate Captions"): st.subheader("Generated Captions") for cropped_img, label in zip(st.session_state.cropped_images, st.session_state.cropped_labels): cropped_pil_img = Image.fromarray(cv2.cvtColor(cropped_img, cv2.COLOR_BGR2RGB)) caption = generate_caption(cropped_pil_img, blip_processor, blip_model, label=label) st.image(cropped_pil_img, caption=f"{label.capitalize()}: {caption}", use_column_width=True) [/code] Я пытался изучить Flask или Fast API, но пока не понял их. Буду очень признателен за любую помощь. Я пытался конвертировать с помощью Fast API, но пока не знаю, как это сделать.