Сегментация изображения onnx от guggingface дает очень разные результаты при использовании в Ml.net

Сегментация изображения onnx от guggingface дает очень разные результаты при использовании в Ml.net ⇐ C#

1 сообщение • Страница 1 из 1

Гость

Сегментация изображения onnx от guggingface дает очень разные результаты при использовании в Ml.net

Цитата

Сообщение Гость » 04 фев 2025, 12:38

Я пытался получить модель сегментации изображений от Huggingface (RMBG-2.0) для работы для вывода с использованием ML.net. После большого количества проб и ошибок я наконец -то получил код для компиляции и создания вывода, но он сильно отличается от результата, который я получаю от использования демонстрации на Huggingface. < /P>
Код: < /p>

Код: Выделить всё

public static void RemoveGreenBackgroundAI2(string imagePath, string outputfile)
{
string modelPath = Path.Combine( Application.StartupPath,"ONNX","model.onnx");
MLContext mlContext = new MLContext();

var imageData = new ImageInputData
{
Image = MLImage.CreateFromFile (imagePath)
};

var imageDataView = mlContext.Data.LoadFromEnumerable(new[] { imageData });

var pipeline = mlContext.Transforms.ResizeImages(
outputColumnName: "input",
imageWidth: 1024,
imageHeight: 1024,
inputColumnName: nameof(ImageInputData.Image))
.Append(mlContext.Transforms.ExtractPixels(
outputColumnName: "out1",
inputColumnName: "input",
interleavePixelColors: true,
scaleImage: 1f / 255f,
offsetImage: 0,
outputAsFloatArray: true))
.Append(mlContext.Transforms.CustomMapping(
mapAction: (input, output) =>
{
output.pixel_values = new float[input.out1.Length];
for (int i = 0; i < input.out1.Length; i += 3)
{
// R
output.pixel_values[i] = (input.out1[i] - 0.485f) / 0.229f;

//G
output.pixel_values[i + 1] = (input.out1[i + 1] - 0.456f) / 0.224f;

//B
output.pixel_values[i + 2] = (input.out1[i + 2] - 0.406f) / 0.225f;
}
}, contractName: null))
.Append(mlContext.Transforms.ApplyOnnxModel(
modelFile: modelPath,
outputColumnNames: new[] { "alphas" },
inputColumnNames: new[] { "pixel_values" },
shapeDictionary: new Dictionary
{
{ "pixel_values", new[] { 1, 3, 1024, 1024 } }

},
fallbackToCpu:true,
gpuDeviceId:null
));

var model = pipeline.Fit(imageDataView);
var predictionEngine = mlContext.Model.CreatePredictionEngine(model);
var prediction = predictionEngine.Predict(imageData);
ApplyMaskAndSaveImage(imagePath, prediction, outputfile);

}

public static void ApplyMaskAndSaveImage(string originalImagepath, ModelOutput prediction, string outputPath)
{
int width = 1024;
int height = 1024;
float[] outputData = prediction.Output;

Bitmap originalImage = (Bitmap)Bitmap.FromFile(originalImagepath);
int originalWidth = originalImage.Width;
int originalHeight = originalImage.Height;

Bitmap resizedImage = new Bitmap(originalImage, new System.Drawing.Size(width, height));
Bitmap outputImage = new Bitmap(width, height, PixelFormat.Format32bppArgb);

for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
float maskValue = outputData[y * width + x];
float threshold = 0.5f;
byte alpha = maskValue >= threshold ? (byte)255 : (byte)0;
Color pixelColor = resizedImage.GetPixel(x, y);
Color newColor = Color.FromArgb(alpha, pixelColor.R, pixelColor.G, pixelColor.B);
outputImage.SetPixel(x, y, newColor);
}
}
outputImage.Save(outputPath, ImageFormat.Png);
}

public class ModelOutput
{
[ColumnName("alphas")]
[VectorType(1, 1, 1024, 1024)]
public float[] Output { get; set; }
}
public class ImageInputData
{
[ColumnName("Image")]
[ImageType(1024, 1024)]
public MLImage Image { get; set; }
}
public class CustomMappingInput
{
[VectorType(3, 1024, 1024)]
public float[] out1 { get; set; }
}
public class CustomMappingOutput
{
[VectorType(3, 1024, 1024)]
public float[] pixel_values { get; set;  }
}
< /code>
Я знаю, что код далеко не оптимальный (GetPixel()

и setpixel () должны быть заменены среди прочего), и что соотношение сторон моего результата неправильно, потому что я не масштабировал изображение обратно в исходные измерения. Сначала я хотел бы правильно снять фон. Файл доступен в ссылке RMBG-2.0 в начале. Существует также фрагмент кода в Python для использования модели, и поэтому я применяю преобразования технических преобразований к изображению в трубопроводе. < /P>
Входное изображение < /p>
ожидаемый результат
Результат я получаю

Подробнее здесь: https://stackoverflow.com/questions/794 ... s-when-use

1738661937

Гость

 Я пытался получить модель сегментации изображений от Huggingface (RMBG-2.0) для работы для вывода с использованием ML.net. После большого количества проб и ошибок я наконец -то получил код для компиляции и создания вывода, но он сильно отличается от результата, который я получаю от использования демонстрации на Huggingface.  < /P>
Код: < /p>
[code]public static void RemoveGreenBackgroundAI2(string imagePath, string outputfile)
{
string modelPath = Path.Combine( Application.StartupPath,"ONNX","model.onnx");
MLContext mlContext = new MLContext();

var imageData = new ImageInputData
{
Image = MLImage.CreateFromFile (imagePath)
};

var imageDataView = mlContext.Data.LoadFromEnumerable(new[] { imageData });

var pipeline = mlContext.Transforms.ResizeImages(
outputColumnName: "input",
imageWidth: 1024,
imageHeight: 1024,
inputColumnName: nameof(ImageInputData.Image))
.Append(mlContext.Transforms.ExtractPixels(
outputColumnName: "out1",
inputColumnName: "input",
interleavePixelColors: true,
scaleImage: 1f / 255f,
offsetImage: 0,
outputAsFloatArray: true))
.Append(mlContext.Transforms.CustomMapping(
mapAction: (input, output) =>
{
output.pixel_values = new float[input.out1.Length];
for (int i = 0; i < input.out1.Length; i += 3)
{
// R
output.pixel_values[i] = (input.out1[i] - 0.485f) / 0.229f;

//G
output.pixel_values[i + 1] = (input.out1[i + 1] - 0.456f) / 0.224f;

//B
output.pixel_values[i + 2] = (input.out1[i + 2] - 0.406f) / 0.225f;
}
}, contractName: null))
.Append(mlContext.Transforms.ApplyOnnxModel(
modelFile: modelPath,
outputColumnNames: new[] { "alphas" },
inputColumnNames: new[] { "pixel_values" },
shapeDictionary: new Dictionary
{
{ "pixel_values", new[] { 1, 3, 1024, 1024 } }

},
fallbackToCpu:true,
gpuDeviceId:null
));

var model = pipeline.Fit(imageDataView);
var predictionEngine = mlContext.Model.CreatePredictionEngine(model);
var prediction = predictionEngine.Predict(imageData);
ApplyMaskAndSaveImage(imagePath, prediction, outputfile);

}

public static void ApplyMaskAndSaveImage(string originalImagepath, ModelOutput prediction, string outputPath)
{
int width = 1024;
int height = 1024;
float[] outputData = prediction.Output;

Bitmap originalImage = (Bitmap)Bitmap.FromFile(originalImagepath);
int originalWidth = originalImage.Width;
int originalHeight = originalImage.Height;

Bitmap resizedImage = new Bitmap(originalImage, new System.Drawing.Size(width, height));
Bitmap outputImage = new Bitmap(width, height, PixelFormat.Format32bppArgb);

for (int y = 0; y < height; y++)
{
for (int x = 0; x < width; x++)
{
float maskValue = outputData[y * width + x];
float threshold = 0.5f;
byte alpha = maskValue >= threshold ? (byte)255 : (byte)0;
Color pixelColor = resizedImage.GetPixel(x, y);
Color newColor = Color.FromArgb(alpha, pixelColor.R, pixelColor.G, pixelColor.B);
outputImage.SetPixel(x, y, newColor);
}
}
outputImage.Save(outputPath, ImageFormat.Png);
}

public class ModelOutput
{
[ColumnName("alphas")]
[VectorType(1, 1, 1024, 1024)]
public float[] Output { get; set; }
}
public class ImageInputData
{
[ColumnName("Image")]
[ImageType(1024, 1024)]
public MLImage Image { get; set; }
}
public class CustomMappingInput
{
[VectorType(3, 1024, 1024)]
public float[] out1 { get; set; }
}
public class CustomMappingOutput
{
[VectorType(3, 1024, 1024)]
public float[] pixel_values { get; set;  }
}
< /code>
Я знаю, что код далеко не оптимальный (GetPixel()[/code] и setpixel ()  должны быть заменены среди прочего), и что соотношение сторон моего результата неправильно, потому что я не масштабировал изображение обратно в исходные измерения. Сначала я хотел бы правильно снять фон. Файл доступен в ссылке RMBG-2.0 в начале. Существует также фрагмент кода в Python для использования модели, и поэтому я применяю преобразования технических преобразований к изображению в трубопроводе. < /P>
Входное изображение < /p>
ожидаемый результат 
Результат я получаю  

Подробнее здесь: [url]https://stackoverflow.com/questions/79411192/image-segmentation-onnx-from-huggingface-produces-very-diferent-results-when-use[/url]