Значения пикселей различаются в Python и Android Kotlin при вычислении вложений MobileNetV3, что приводит к несовпадающиAndroid

Форум для тех, кто программирует под Android
Ответить
Anonymous
 Значения пикселей различаются в Python и Android Kotlin при вычислении вложений MobileNetV3, что приводит к несовпадающи

Сообщение Anonymous »

У меня возникла проблема с встраиванием изображений MobileNetV3:
Я вычисляю встраивание исходных изображений в Python и сохраняю их в базе данных.
В Android Kotlin, когда пользователь загружает изображение, я также вычисляю его встраивание с помощью MobileNetV3 и пытаюсь сопоставить его с базой данных.
Однако я обнаружил, что для одного и того же изображения значения пикселей считываются в Python и Kotlin совершенно разные, что приводит к несовпадающим встраиваниям и неверным результатам.
Я пробовал:
BitmapFactory.decodeFile / decodeStream с ARGB_8888
Игнорирование альфа-канала путем рисования на черном фоне
Использование getPixels для извлечения RGB
Но я все еще не могу получить значения пикселей, соответствующие значениям Python np.array(img.convert("RGB")).
Пример моего кода Python:
def generate_vectors(self, image_paths: List[str]) -> List[List[float]]:
vectors = []

for path in image_paths:
if not os.path.exists(path):
print(f"Warning: {path} not found, skipped")
continue

img = Image.open(path).convert("RGB")

input_tensor = self.preprocess(img)

self.interpreter.set_tensor(
self.input_details[0]["index"], input_tensor)

self.interpreter.invoke()

embedding = self.interpreter.get_tensor(
self.output_details[0]["index"])

embedding = embedding.squeeze()

norm = np.linalg.norm(embedding)
if norm > 0:
embedding = embedding / norm

vectors.append(embedding.tolist())

return vectors

def preprocess(self, img: Image.Image) -> np.ndarray:
"""
process image
"""
img = img.resize((self.width, self.height))

img_array = np.array(img).astype(np.float32)

img_array = (img_array / 127.5) - 1.0

img_array = np.expand_dims(img_array, axis=0)

return img_array

Пример моего кода на Kotlin:
val bitmap = this.assets.open("example.png").use { BitmapFactory.decodeStream(it) }
val mobileNetV3 = MobileNetV3(this)
val embedding = mobileNetV3.encodeImage(bitmap)

import android.content.Context
import android.graphics.Bitmap
import android.graphics.Color
import org.tensorflow.lite.DataType
import org.tensorflow.lite.support.common.ops.NormalizeOp
import org.tensorflow.lite.support.image.ImageProcessor
import org.tensorflow.lite.support.image.TensorImage
import org.tensorflow.lite.support.image.ops.ResizeOp
import java.nio.ByteBuffer
import kotlin.math.sqrt

class MobileNetV3(context: Context) : ImageEmbedder {

private val model = MobilenetV3TfliteLarge100224FeatureVectorMetadataV1.newInstance(context)

private val imageProcessor =
ImageProcessor.Builder()
.add(ResizeOp(224, 224, ResizeOp.ResizeMethod.BILINEAR))
.add(NormalizeOp(127.5f, 127.5f)) // [-1, 1]
.build()

override fun encodeImage(bitmap: Bitmap): FloatArray {

val tensorImage = TensorImage(DataType.FLOAT32)
tensorImage.load(bitmap)

val processedImage = imageProcessor.process(tensorImage)

val outputs = model.process(processedImage)

val vector = outputs.featureAsTensorBuffer.floatArray.copyOf()

l2Normalize(vector)

return vector
}

override fun close() {
model.close()
}

private fun l2Normalize(vector: FloatArray) {
var sum = 0f
for (v in vector) {
sum += v * v
}
val norm = sqrt(sum)
if (norm > 0f) {
for (i in vector.indices) {
vector /= norm
}
}
}

}

Я хочу, чтобы Android Kotlin считывал пиксели изображения точно так же, как Python, иначе встраивания MobileNetV3 не будут совпадать.
Есть ли способ считывать пиксели RGB на Android, как это делает Python, или лучший подход для достижения этой функциональности?

Это мое тестовое изображение
Изображение
Вот сравнение значений пикселей 20x20 в верхнем левом углу, считанных Kotlin и Python:
Пример кода Kotlin
fun bitmapToRawRgbArray(bitmap: Bitmap): Array {
val width = bitmap.width
val height = bitmap.height

// [R,G,B]
val rgbArray = Array(height) { Array(width) { IntArray(3) } }

val pixels = IntArray(width * height)
bitmap.getPixels(pixels, 0, width, 0, 0, width, height)

for (y in 0 until height) {
for (x in 0 until width) {
val pixel = pixels[y * width + x]

val r = (pixel shr 16) and 0xFF
val g = (pixel shr 8) and 0xFF
val b = pixel and 0xFF

rgbArray[y][x][0] = r
rgbArray[y][x][1] = g
rgbArray[y][x][2] = b
}
}
return rgbArray
}

Kotlin RGB
Row 0: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0]
Row 1: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0]
Row 2: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [13,13,13] [15,12,10]
Row 3: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [16,11,11] [16,14,12] [15,13,12] [15,12,12]
Row 4: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [16,16,16] [17,12,12] [14,13,11] [15,13,11] [15,13,11] [15,12,11]
Row 5: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [17,13,13] [16,12,12] [16,11,13] [15,13,11] [15,13,11] [15,13,11] [15,12,11]
Row 6: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [23,23,23] [17,10,14] [15,12,12] [15,12,13] [16,11,12] [15,12,11] [15,13,11] [15,13,11] [15,12,11]
Row 7: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [13,13,13] [14,11,14] [15,12,13] [15,12,13] [16,12,13] [16,11,12] [15,12,11] [15,13,11] [15,13,11] [15,12,11]
Row 8: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [20,10,10] [11,14,14] [13,12,14] [15,12,14] [15,12,13] [15,12,13] [16,11,12] [15,12,11] [15,13,11] [15,13,11] [15,12,11]
Row 9: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [10,10,10] [13,11,11] [12,13,13] [13,12,13] [14,12,13] [15,11,13] [15,12,13] [16,11,12] [15,12,11] [15,13,11] [15,13,11] [15,12,11]
Row 10: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [13,13,13] [11,14,16] [13,13,13] [13,13,13] [13,12,13] [14,12,13] [15,11,13] [15,11,13] [16,11,12] [15,12,11] [15,13,11] [15,13,11] [15,12,11]
Row 11: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [23,23,23] [14,14,9] [13,14,12] [13,13,12] [13,13,13] [13,12,13] [14,12,13] [15,12,13] [15,12,13] [16,12,12] [15,12,11] [15,13,11] [15,13,11] [15,12,11]
Row 12: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [14,14,10] [14,14,11] [13,14,11] [13,13,11] [13,13,11] [13,13,11] [14,13,11] [15,13,11] [15,13,11] [16,13,11] [15,13,11] [15,13,11] [15,13,11] [15,12,11]
Row 13: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [0,0,0] [13,13,13] [13,14,11] [13,14,11] [13,13,11] [13,14,11] [13,14,11] [13,13,11] [14,13,11] [15,13,11] [15,13,11] [16,13,11] [15,13,11] [15,13,11] [15,13,11] [15,12,12]
Row 14: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [8,16,8] [12,14,10] [12,14,11] [12,14,12] [13,14,11] [13,13,11] [12,13,11] [12,13,11] [14,13,11] [15,13,11] [15,13,11] [16,13,11] [15,13,11] [15,13,11] [15,13,12] [15,12,11]
Row 15: [0,0,0] [0,0,0] [0,0,0] [0,0,0] [15,12,10] [13,14,11] [13,14,11] [13,14,12] [13,14,11] [13,14,11] [13,14,11] [13,14,11] [14,14,11] [15,14,11] [15,14,11] [16,14,11] [15,14,11] [15,13,12] [15,13,11] [15,12,12]
Row 16: [0,0,0] [0,0,0] [0,0,0] [17,11,11] [16,14,11] [15,14,12] [14,14,12] [14,14,12] [14,14,12] [14,14,12] [14,14,12] [14,13,12] [15,13,12] [15,13,12] [15,13,12] [16,13,12] [15,14,12] [15,13,11] [15,13,12] [15,13,12]
Row 17: [0,0,0] [0,0,0] [0,0,0] [16,14,12] [15,14,11] [15,14,11] [15,14,11] [15,14,11] [15,14,11] [15,14,11] [15,14,11] [15,14,11] [15,14,11] [16,14,11] [16,14,11] [15,14,11] [15,13,12] [15,14,12] [15,14,12] [15,13,12]
Row 18: [0,0,0] [0,0,0] [14,14,14] [16,13,11] [15,14,11] [15,14,11] [15,14,11] [15,14,11] [15,14,11] [15,14,12] [15,14,12] [15,14,11] [15,14,11] [16,14,11] [15,14,11] [16,14,11] [16,14,12] [16,13,12] [15,14,12] [15,13,12]
Row 19: [0,0,0] [0,0,0] [17,13,4] [16,13,10] [16,14,10] [16,14,10] [16,14,10] [16,14,10] [16,14,11] [16,14,11] [16,14,11] [16,13,12] [16,13,12] [16,13,12] [16,13,12] [16,13,12] [16,14,12] [15,14,12] [16,14,12] [15,14,12]

Пример кода Python:
img = Image.open(path).convert("RGB")
img_array = np.array(img)

print(f"--- Python (Pillow) Top 20x20 Pixels ---")
print(f"Image Size: {img.size}")

for y in range(20):
row_str = f"Row {y:2d}: "
for x in range(20):
r, g, b = img_array[y, x]
row_str += f"[{r:3d},{g:3d},{b:3d}] "
print(row_str)

Python RGB
Row 0: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 18, 13, 15] [ 16, 11, 14] [ 16, 11, 14] [ 16, 12, 12] [ 15, 12, 12] [ 7, 18, 12] [ 15, 14, 11] [ 14, 13, 11] [ 15, 12, 12]
Row 1: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 21, 18, 18] [ 16, 14, 13] [ 16, 14, 13] [ 15, 14, 12] [ 15, 13, 12] [ 16, 13, 13] [ 15, 12, 11] [ 15, 13, 11] [ 15, 13, 11] [ 17, 13, 12]
Row 2: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 21, 16, 18] [ 15, 12, 14] [ 15, 12, 13] [ 15, 16, 11] [ 15, 11, 14] [ 16, 11, 13] [ 15, 12, 12] [ 15, 13, 11] [ 15, 13, 9] [ 14, 13, 11] [ 15, 12, 11]
Row 3: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 17, 17, 18] [ 12, 15, 16] [ 9, 12, 14] [ 13, 13, 13] [ 14, 6, 13] [ 23, 0, 5] [ 15, 15, 13] [ 15, 12, 13] [ 16, 12, 12] [ 15, 12, 12] [ 15, 13, 11] [ 15, 13, 12] [ 15, 12, 12]
Row 4: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 19, 15, 18] [ 14, 12, 14] [ 14, 12, 14] [ 15, 12, 14] [ 19, 12, 13] [ 6, 38, 18] [ 15, 13, 13] [ 15, 12, 13] [ 16, 14, 13] [ 16, 11, 11] [ 14, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 5: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 19, 20, 20] [ 12, 15, 13] [ 12, 14, 13] [ 13, 13, 13] [ 19, 6, 14] [ 0, 38, 12] [ 18, 7, 13] [ 14, 11, 13] [ 15, 11, 12] [ 15, 12, 13] [ 16, 11, 12] [ 15, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 6: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 27, 3, 12] [ 12, 13, 14] [ 12, 13, 14] [ 14, 11, 13] [ 13, 10, 13] [ 13, 12, 13] [ 16, 12, 14] [ 15, 12, 13] [ 15, 12, 13] [ 16, 11, 12] [ 15, 12, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 7: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 23, 20, 25] [ 0, 16, 0] [ 15, 13, 17] [ 12, 14, 14] [ 13, 14, 14] [ 14, 12, 13] [ 14, 14, 13] [ 15, 11, 13] [ 15, 12, 14] [ 15, 12, 13] [ 16, 12, 13] [ 16, 11, 12] [ 15, 12, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 8: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 16, 15, 18] [ 12, 14, 11] [ 13, 13, 13] [ 13, 13, 13] [ 15, 12, 14] [ 12, 13, 14] [ 13, 12, 14] [ 15, 12, 14] [ 15, 12, 13] [ 15, 12, 13] [ 16, 11, 12] [ 15, 12, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 9: [ 0, 0, 0] [ 0, 0, 0] [ 20, 20, 18] [ 15, 16, 3] [ 16, 16, 11] [ 14, 14, 2] [ 14, 13, 13] [ 13, 13, 12] [ 13, 14, 8] [ 13, 12, 12] [ 12, 13, 13] [ 13, 12, 13] [ 14, 12, 13] [ 15, 11, 13] [ 15, 12, 13] [ 16, 11, 12] [ 15, 12, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 10: [ 0, 0, 0] [ 0, 0, 0] [ 0, 0, 0] [ 15, 15, 16] [ 13, 14, 35] [ 11, 14, 30] [ 13, 13, 13] [ 13, 13, 12] [ 12, 14, 15] [ 13, 13, 13] [ 13, 13, 13] [ 13, 12, 13] [ 14, 12, 13] [ 15, 11, 13] [ 15, 11, 13] [ 16, 11, 12] [ 15, 12, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 11: [ 14, 18, 16] [ 19, 16, 14] [ 12, 16, 13] [ 13, 14, 15] [ 14, 14, 9] [ 13, 14, 8] [ 12, 13, 17] [ 14, 13, 9] [ 13, 14, 12] [ 13, 13, 12] [ 13, 13, 13] [ 13, 12, 13] [ 14, 12, 13] [ 15, 12, 13] [ 15, 12, 13] [ 16, 12, 12] [ 15, 12, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 12: [ 0, 0, 0] [ 13, 15, 13] [ 16, 14, 12] [ 0, 14, 0] [ 13, 14, 12] [ 12, 14, 11] [ 13, 13, 10] [ 13, 13, 11] [ 13, 14, 11] [ 13, 13, 11] [ 13, 13, 11] [ 13, 13, 11] [ 14, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 16, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 11]
Row 13: [ 9, 16, 13] [ 17, 14, 12] [ 49, 8, 10] [ 8, 14, 13] [ 15, 14, 11] [ 13, 14, 12] [ 13, 14, 11] [ 13, 14, 11] [ 13, 13, 11] [ 13, 14, 11] [ 13, 14, 11] [ 13, 13, 11] [ 14, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 16, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 12, 12]
Row 14: [ 12, 14, 12] [ 12, 16, 13] [ 7, 13, 11] [ 13, 13, 11] [ 7, 14, 11] [ 12, 14, 11] [ 12, 14, 11] [ 12, 14, 12] [ 13, 14, 11] [ 13, 13, 11] [ 12, 13, 11] [ 12, 13, 11] [ 14, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 16, 13, 11] [ 15, 13, 11] [ 15, 13, 11] [ 15, 13, 12] [ 15, 12, 11]
Row 15: [ 18, 17, 14] [ 0, 0, 0] [ 17, 16, 14] [ 13, 14, 11] [ 14, 13, 11] [ 13, 14, 11] [ 13, 14, 11] [ 13, 14, 12] [ 13, 14, 11] [ 13, 14, 11] [ 13, 14, 11] [ 13, 14, 11] [ 14, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 16, 14, 11] [ 15, 14, 11] [ 15, 13, 12] [ 15, 13, 11] [ 15, 12, 12]
Row 16: [ 12, 12, 12] [ 20, 17, 15] [ 15, 14, 12] [ 15, 14, 11] [ 15, 14, 12] [ 15, 14, 12] [ 14, 14, 12] [ 14, 14, 12] [ 14, 14, 12] [ 14, 14, 12] [ 14, 14, 12] [ 14, 13, 12] [ 15, 13, 12] [ 15, 13, 12] [ 15, 13, 12] [ 16, 13, 12] [ 15, 14, 12] [ 15, 13, 11] [ 15, 13, 12] [ 15, 13, 12]
Row 17: [ 20, 16, 13] [ 16, 14, 12] [ 14, 14, 11] [ 16, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 16, 14, 11] [ 16, 14, 11] [ 15, 14, 11] [ 15, 13, 12] [ 15, 14, 12] [ 15, 14, 12] [ 15, 13, 12]
Row 18: [ 18, 15, 13] [ 16, 14, 8] [ 15, 13, 11] [ 16, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 11] [ 15, 14, 12] [ 15, 14, 12] [ 15, 14, 11] [ 15, 14, 11] [ 16, 14, 11] [ 15, 14, 11] [ 16, 14, 11] [ 16, 14, 12] [ 16, 13, 12] [ 15, 14, 12] [ 15, 13, 12]
Row 19: [ 16, 14, 7] [ 23, 21, 55] [ 16, 13, 5] [ 16, 14, 10] [ 16, 14, 10] [ 16, 14, 10] [ 16, 14, 10] [ 16, 14, 10] [ 16, 14, 11] [ 16, 14, 11] [ 16, 14, 11] [ 16, 13, 12] [ 16, 13, 12] [ 16, 13, 12] [ 16, 13, 12] [ 16, 13, 12] [ 16, 14, 12] [ 15, 14, 12] [ 16, 14, 12] [ 15, 14, 12]


Подробнее здесь: https://stackoverflow.com/questions/798 ... obilenetv3
Ответить

Быстрый ответ

Изменение регистра текста: 
Смайлики
:) :( :oops: :roll: :wink: :muza: :clever: :sorry: :angel: :read: *x)
Ещё смайлики…
   
К этому ответу прикреплено по крайней мере одно вложение.

Если вы не хотите добавлять вложения, оставьте поля пустыми.

Максимально разрешённый размер вложения: 15 МБ.

Вернуться в «Android»