Модель Yolov5 имеет низкий показатель доверия в Android Studio

Модель Yolov5 имеет низкий показатель доверия в Android Studio ⇐ Android

1 сообщение • Страница 1 из 1

Anonymous

Модель Yolov5 имеет низкий показатель доверия в Android Studio

Цитата

Сообщение Anonymous » 13 сен 2025, 07:43

Модель Yolov5 имеет точность от 0,0 до 0,1 в Android Studio. Сначала я думал, что это проблема модели, но точность моей модели составляет 90% в Python. < /P>
Я попытался реализовать классификатор таким образом: < /p>
Interpreter interpreter;
int tensorWidth = 0;
int tensorHeight = 0;
int numChannel = 0;
int numElements = 0;
float scale = 0f;
int zeroPoint = 0;

List labels;
ImageProcessor imageProcessor;

public final String TAG = "ImageModel";
private static final float INPUT_MEAN = 0f;
private static final float INPUT_STANDARD_DEVIATION = 255f;
private static final float CONFIDENCE_THRESHOLD = 0.1f;
private static final float IOU_THRESHOLD = 0.5f;

private static final DataType INPUT_IMAGE_TYPE = DataType.UINT8;
private static final DataType OUTPUT_IMAGE_TYPE = DataType.UINT8;

public ImageModel(Context context, String modelPath, String labelPath) throws IOException {
labels = new ArrayList();

ByteBuffer model = FileUtil.loadMappedFile(context, modelPath);
Interpreter.Options options = new Interpreter.Options();
options.setNumThreads(4);
interpreter = new Interpreter(model, options);

int[] inputShape = interpreter.getInputTensor(0).shape();
int[] outputShape = interpreter.getOutputTensor(0).shape();

// 320
tensorWidth = inputShape[1];

// 320
tensorHeight = inputShape[2];

// this is 6300
numElements = outputShape[1];

// this is 85
numChannel = outputShape[2];

Tensor outputTensor = interpreter.getOutputTensor(0);
DataType dtype = outputTensor.dataType();
Tensor.QuantizationParams qParams = outputTensor.quantizationParams();
scale = qParams.getScale();
zeroPoint = qParams.getZeroPoint();

// logs
Log.i(TAG, "Output datatype: " + dtype);
Log.i(TAG, "Output scale: " + qParams.getScale() + ", zeroPoint: " + qParams.getZeroPoint());
Tensor.QuantizationParams iparam = interpreter.getInputTensor(0).quantizationParams();
Log.i(TAG, "Input scale: " + iparam.getScale() + ", zerepoint: " + iparam.getZeroPoint());

// get the labels.txt and add them to the array list
try (InputStream inputStream = context.getAssets().open(labelPath)) {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = bufferedReader.readLine();
while (line != null && !line.isEmpty()) {
labels.add(line);
line = bufferedReader.readLine();
}
bufferedReader.close();
}
}

// the interface that will be returned (buffer and the bitmap)
// the returned resized bitmap is just for viewing purposes only
public class ResizeResult {
public ByteBuffer buffer;
public Bitmap resizedBitmap;

public ResizeResult(ByteBuffer buffer, Bitmap resizedBitmap) {
this.buffer = buffer;
this.resizedBitmap = resizedBitmap;
}
}

// resize the image to fit in the interpreter model
public ResizeResult resize(Bitmap bitmap) {
TensorImage tensorImage = new TensorImage(INPUT_IMAGE_TYPE);
tensorImage.load(bitmap);
TensorImage processedImage = imageProcessor.process(tensorImage);

// this is just for displaying resized bitmap
ImageProcessor resizeOnlyProcessor = new ImageProcessor.Builder()
.add(new ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
.build();
TensorImage resizedImage = resizeOnlyProcessor.process(tensorImage);
Bitmap displayBitmap = resizedImage.getBitmap();

return new ResizeResult(processedImage.getBuffer(), displayBitmap);

}

public ClassifyResults classify(Bitmap bitmap) {

int BITMAP_WIDTH = bitmap.getWidth();
int BITMAP_HEIGHT = bitmap.getHeight();

imageProcessor = new ImageProcessor.Builder()
.add(new ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
.add(new NormalizeOp(0, 255))
//THIS IS uint8
.add(new CastOp(INPUT_IMAGE_TYPE))
.build();

ResizeResult elements = resize(bitmap);

TensorBuffer output = TensorBuffer.createFixedSize(
//this is {1, 6300, 85}
new int[]{1, numElements, numChannel},
// this is uint8
OUTPUT_IMAGE_TYPE
);
interpreter.run(elements.buffer, output.getBuffer());

TensorProcessor tensorProcessor = new TensorProcessor.Builder()
.add(new DequantizeOp(zeroPoint, scale))
.build();
output = tensorProcessor.process(output);

// I believe the prediction results would shape like:
// [x, y, w, h, confidence (or objectness), class1, class2, class3..., x_1, y_1, w_1, h_1, confidence_1, class1_1, class2_1, ...]
float[] predictions = output.getFloatArray();

// store here all the detected objects with high confidence
List detectionResults = new ArrayList();

for (int i = 0; i < numElements; i++) {

// offset since the prediction array is flattened
int offset = i * numChannel;
float x = predictions[offset] * BITMAP_WIDTH;
float y = predictions[offset + 1] * BITMAP_HEIGHT;
float w = predictions[offset + 2] * BITMAP_WIDTH;
float h = predictions[offset + 3] * BITMAP_HEIGHT;

// get the regions
int left = (int) Math.max(0, (x - w / 2));
int top = (int) Math.max(0, (y - h / 2));
int right = (int) Math.min(320, (x + w / 2.));
int bottom = (int) Math.min(320, (y + h / 2.));

float[] classScores = Arrays.copyOfRange(predictions, 5 + offset, numChannel + offset);

// find the label of the highest interval
float maxClassScore = -1f;
int labelId = 0;
for (int j = 0; j < classScores.length; j++) {
if (classScores[j] > maxClassScore) {
maxClassScore = classScores[j];
labelId = j;
}
}

float confidence = predictions[offset + 4];
float finalConfidence = confidence;

// check if the confidence is high ( > 0.1) then put it in the results list
if (finalConfidence > CONFIDENCE_THRESHOLD) {
Log.w(TAG, "x: " + x + " y: " + y + " w: " + w + " h: " + h + " label: " + labels.get(labelId) + " confidence: " + confidence);
detectionResults.add(new DetectionResult(labelId, finalConfidence, left, top, right, bottom, labels.get(labelId)));
}
}

return new ClassifyResults(elements.resizedBitmap, detectionResults);
}
< /code>
Обнаруженные этикетки не соответствуют фактическому обнаруженному объекту, и он также имеет чрезвычайно низкие уверенные результаты, которые, я думаю, могут возникнуть проблема с тем, как я реализовал интерпретатор. 1080x2424. Я получаю ошибку Bytebuffer, когда я пытаюсь сделать это, поэтому я только что оставил конфигурацию ввода Processor Image. src = "https://i.sstatic.net/gwzfldzi.png"/>

Подробнее здесь: https://stackoverflow.com/questions/797 ... oid-studio

1757738580

Anonymous

 Модель Yolov5 имеет точность от 0,0 до 0,1 в Android Studio. Сначала я думал, что это проблема модели, но точность моей модели составляет 90% в Python.  < /P>
Я попытался реализовать классификатор таким образом: < /p>
    Interpreter interpreter;
int tensorWidth = 0;
int tensorHeight = 0;
int numChannel = 0;
int numElements = 0;
float scale = 0f;
int zeroPoint = 0;

List labels;
ImageProcessor imageProcessor;

public final String TAG = "ImageModel";
private static final float INPUT_MEAN = 0f;
private static final float INPUT_STANDARD_DEVIATION = 255f;
private static final float CONFIDENCE_THRESHOLD = 0.1f;
private static final float IOU_THRESHOLD = 0.5f;

private static final DataType INPUT_IMAGE_TYPE = DataType.UINT8;
private static final DataType OUTPUT_IMAGE_TYPE = DataType.UINT8;

public ImageModel(Context context, String modelPath, String labelPath) throws IOException {
labels = new ArrayList();

ByteBuffer model = FileUtil.loadMappedFile(context, modelPath);
Interpreter.Options options = new Interpreter.Options();
options.setNumThreads(4);
interpreter = new Interpreter(model, options);

int[] inputShape = interpreter.getInputTensor(0).shape();
int[] outputShape = interpreter.getOutputTensor(0).shape();

// 320
tensorWidth = inputShape[1];

// 320
tensorHeight = inputShape[2];

// this is 6300
numElements = outputShape[1];

// this is 85
numChannel = outputShape[2];

Tensor outputTensor = interpreter.getOutputTensor(0);
DataType dtype = outputTensor.dataType();
Tensor.QuantizationParams qParams = outputTensor.quantizationParams();
scale = qParams.getScale();
zeroPoint = qParams.getZeroPoint();

// logs
Log.i(TAG, "Output datatype: " + dtype);
Log.i(TAG, "Output scale: " + qParams.getScale() + ", zeroPoint: " + qParams.getZeroPoint());
Tensor.QuantizationParams iparam = interpreter.getInputTensor(0).quantizationParams();
Log.i(TAG, "Input scale: " + iparam.getScale() + ", zerepoint: " + iparam.getZeroPoint());

// get the labels.txt and add them to the array list
try (InputStream inputStream = context.getAssets().open(labelPath)) {
BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
String line = bufferedReader.readLine();
while (line != null &&  !line.isEmpty()) {
labels.add(line);
line = bufferedReader.readLine();
}
bufferedReader.close();
}
}

// the interface that will be returned (buffer and the bitmap)
// the returned resized bitmap is just for viewing purposes only
public class ResizeResult {
public ByteBuffer buffer;
public Bitmap resizedBitmap;

public ResizeResult(ByteBuffer buffer, Bitmap resizedBitmap) {
this.buffer = buffer;
this.resizedBitmap = resizedBitmap;
}
}

// resize the image to fit in the interpreter model
public ResizeResult resize(Bitmap bitmap) {
TensorImage tensorImage = new TensorImage(INPUT_IMAGE_TYPE);
tensorImage.load(bitmap);
TensorImage processedImage = imageProcessor.process(tensorImage);

// this is just for displaying resized bitmap
ImageProcessor resizeOnlyProcessor = new ImageProcessor.Builder()
.add(new ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
.build();
TensorImage resizedImage = resizeOnlyProcessor.process(tensorImage);
Bitmap displayBitmap = resizedImage.getBitmap();

return new ResizeResult(processedImage.getBuffer(), displayBitmap);

}

public ClassifyResults classify(Bitmap bitmap) {

int BITMAP_WIDTH = bitmap.getWidth();
int BITMAP_HEIGHT = bitmap.getHeight();

imageProcessor = new ImageProcessor.Builder()
.add(new ResizeOp(320, 320, ResizeOp.ResizeMethod.BILINEAR))
.add(new NormalizeOp(0, 255))
//THIS IS uint8
.add(new CastOp(INPUT_IMAGE_TYPE))
.build();

ResizeResult elements = resize(bitmap);

TensorBuffer output = TensorBuffer.createFixedSize(
//this is {1, 6300, 85}
new int[]{1, numElements, numChannel},
// this is uint8
OUTPUT_IMAGE_TYPE
);
interpreter.run(elements.buffer, output.getBuffer());

TensorProcessor tensorProcessor = new TensorProcessor.Builder()
.add(new DequantizeOp(zeroPoint, scale))
.build();
output = tensorProcessor.process(output);

// I believe the prediction results would shape like:
// [x, y, w, h, confidence (or objectness), class1, class2, class3..., x_1, y_1, w_1, h_1, confidence_1, class1_1, class2_1, ...]
float[] predictions = output.getFloatArray();

// store here all the detected objects with high confidence
List detectionResults = new ArrayList();

for (int i = 0; i < numElements; i++) {

// offset since the prediction array is flattened
int offset = i * numChannel;
float x = predictions[offset] * BITMAP_WIDTH;
float y = predictions[offset + 1] * BITMAP_HEIGHT;
float w = predictions[offset + 2] * BITMAP_WIDTH;
float h = predictions[offset + 3] * BITMAP_HEIGHT;

// get the regions
int left = (int) Math.max(0, (x - w / 2));
int top = (int) Math.max(0, (y - h / 2));
int right = (int) Math.min(320, (x + w / 2.));
int bottom = (int) Math.min(320, (y + h / 2.));

float[] classScores = Arrays.copyOfRange(predictions, 5 + offset, numChannel + offset);

// find the label of the highest interval
float maxClassScore = -1f;
int labelId = 0;
for (int j = 0; j < classScores.length; j++) {
if (classScores[j] > maxClassScore) {
maxClassScore = classScores[j];
labelId = j;
}
}

float confidence = predictions[offset + 4];
float finalConfidence = confidence;

// check if the confidence  is high ( > 0.1) then put it in the results list
if (finalConfidence > CONFIDENCE_THRESHOLD) {
Log.w(TAG, "x: " + x + " y: " + y + " w: " + w + " h: " + h + " label: " + labels.get(labelId) + " confidence: " + confidence);
detectionResults.add(new DetectionResult(labelId, finalConfidence, left, top, right, bottom, labels.get(labelId)));
}
}

return new ClassifyResults(elements.resizedBitmap, detectionResults);
}
< /code>
Обнаруженные этикетки не соответствуют фактическому обнаруженному объекту, и он также имеет чрезвычайно низкие уверенные результаты, которые, я думаю, могут возникнуть проблема с тем, как я реализовал интерпретатор. 1080x2424.  Я получаю ошибку Bytebuffer, когда я пытаюсь сделать это, поэтому я только что оставил конфигурацию ввода Processor Image. src = "https://i.sstatic.net/gwzfldzi.png"/>  

Подробнее здесь: [url]https://stackoverflow.com/questions/79763507/yolov5-model-has-low-confidence-score-in-android-studio[/url]