В любом случае, проблема кроется в этой области (LogFileProcessor.cs).
Если у кого-нибудь есть идеи о том, как освободить память, которую поглощает EF, я был бы очень признателен (прежде чем я вернусь и буду использовать ADO.Net Core (у которого нет такой же проблемы - разберитесь)
Код: Выделить всё
public partial class LogFileProcessor(LogDbContext dbContext, ILogger logger)
{
private readonly LogDbContext _dbContext = dbContext;
private readonly ILogger _logger = logger;
private List _logEntries = new List();
private string[] _lines = [];
public async Task ProcessLogFileAsync(string filePath)
{
bool result = false;
if (!File.Exists(filePath))
{
_logger.LogError("File not found: {filePath}", filePath);
return result;
}
string fileName = Path.GetFileName(filePath);
if (await LogAlreadyProcessedAsync(fileName))
{
_logger.LogInformation("Log file already processed: {fileName}", fileName);
return result;
}
string fileNameNoExt = Path.GetFileNameWithoutExtension(filePath);
DateTime fileDate = File.GetLastWriteTime(filePath);
string fileHash = ProgramBase.ComputeSha256Hash(filePath);
int logFileId = ExtractLogFileId(fileNameNoExt);
string fileType = ExtractFileType(fileNameNoExt);
using var transaction = await _dbContext.Database.BeginTransactionAsync();
try
{
var parsedLog = new ParsedLog
{
FileName = fileName,
LogType = fileType,
LogFileId = logFileId,
DateParsed = DateTime.UtcNow,
FileDate = fileDate,
FileHash = fileHash
};
await _dbContext.ParsedLogs.AddAsync(parsedLog);
await _dbContext.SaveChangesAsync();
int parsedLogId = parsedLog.Id; //retrieve new Id (identity) from ParsedLogs table
//_lines = await File.ReadLinesAsync(filePath).ToArray(); //not really needed, but if user sets log file size really large, this is better for resources
_lines = await File.ReadAllLinesAsync(filePath);
int lineNum = 0;
foreach (var line in _lines)
{
var entry = ParseLine(line, parsedLogId, lineNum);
if (entry != null)
{
_logEntries.Add(entry);
}
else
{
throw new Exception($"Unable to parse or convert line {lineNum}");
}
lineNum += 1;
}
await _dbContext.LogEntries.AddRangeAsync(_logEntries);
await _dbContext.SaveChangesAsync();
await transaction.CommitAsync();
_logger.LogInformation("Log file: {fileName} processed and data committed to the database.", fileName);
await transaction.DisposeAsync();
result = true;
}
catch (Exception ex)
{
await transaction.RollbackAsync();
_logger.LogError("Error processing log file: {fileName} {ex.Message}", fileName, ex.Message);
await transaction.DisposeAsync();
result = false;
}
finally
{
_logEntries.Clear();
_lines = [];
// Force garbage collection - naturally, this doesn't work, UGH!
GC.Collect();
GC.WaitForPendingFinalizers();
GC.Collect();
}
return result;
}
private async Task LogAlreadyProcessedAsync(string fileName)
{
return await _dbContext.ParsedLogs.AsNoTracking().AnyAsync(l => l.FileName == fileName);
}
private static string ExtractFileType(string fileNameNoExt)
{
var match = FileTypeRegex().Match(fileNameNoExt);
return match.Success ? match.Groups[1].Value : "unknown";
}
private static int ExtractLogFileId(string fileNameNoExt)
{
var match = FileIdRegex().Match(fileNameNoExt);
return match.Success ? int.Parse(match.Groups[1].Value) : 0;
}
private static LogEntry? ParseLine(string line, int parsedLogId, int lineNum)
{
var parts = line.Split("->", StringSplitOptions.TrimEntries);
if (parts.Length < 2) return null;
var dateTimePart = parts[0].Trim();
string ipPart = string.Empty;
string statusAndRestPart;
// Check if the IP address is present
if (parts.Length == 3)
{
ipPart = parts[1].Trim();
statusAndRestPart = parts[2].Trim();
}
else
{
// Assume the IP address is missing and adjust accordingly
statusAndRestPart = parts[1].Trim();
}
var statusPart = statusAndRestPart.Split(':', StringSplitOptions.TrimEntries)[0];
var actionDetailsPart = ActionDetailsRegex().Match(statusAndRestPart);
string action = actionDetailsPart.Groups[1].Value.Trim();
string details = actionDetailsPart.Groups.Count > 2 ? actionDetailsPart.Groups[2].Value.Trim() : string.Empty;
return new LogEntry
{
ParsedLogId = parsedLogId,
LineNum = lineNum,
EntryDate = DateTime.ParseExact(dateTimePart, "ddd, dd MMM yyyy HH:mm:ss", CultureInfo.InvariantCulture),
IPaddress = ipPart,
Status = statusPart,
Action = action,
Details = details
};
}
// generates all regexes at compile time
[GeneratedRegex(@"^(.*?)_\d+$")]
private static partial Regex FileTypeRegex();
[GeneratedRegex(@"_([0-9]+)$")]
private static partial Regex FileIdRegex();
[GeneratedRegex(@"Action=\[(.*?)\](?:, Details=\[(.*?)\])?", RegexOptions.Compiled)]
private static partial Regex ActionDetailsRegex();
}
Код: Выделить всё
namespace LogParserApp;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using Microsoft.Extensions.Configuration;
using System;
using System.IO;
using Microsoft.EntityFrameworkCore;
using System.Text.RegularExpressions;
using Microsoft.Extensions.Logging;
internal partial class Program : ProgramBase
{
public static async Task Main(string[] args)
{
var settings = ParseArguments(args);
if (!settings.TryGetValue("filetype", out List? value) || value.Count == 0)
{
Console.WriteLine("Please specify at least one filetype using '-filetype \"smtp, pop3\"'.");
return;
}
var host = CreateHostBuilder(args).Build();
// Access the configuration and the LogFileProcessor service
var config = host.Services.GetRequiredService();
string? folderPath = settings.TryGetValue("folderpath", out List? value1) && value1.Count > 0 ? value1[0]
: config["LogFileSettings:FolderPath"];
string? archivePath = settings.TryGetValue("archivepath", out List? value2) && value2.Count > 0 ? value2[0]
: config["LogFileSettings:ArchivePath"];
var logFileProcessor = host.Services.GetRequiredService();
string postProcess = settings.TryGetValue("postprocess", out List? value3) && value3.Count > 0 ? value3[0].ToLower() : "keep";
foreach (var fileType in value)
{
var logFiles = Directory.GetFiles(folderPath ?? "C:\\logs", $"{fileType}_*.txt")
.Select(file => new
{
FileName = file,
OrderKey = int.Parse(OrderKeyRegex().Match(Path.GetFileName(file)).Groups[1].Value)
})
.OrderBy(f => f.OrderKey)
.Select(f => f.FileName);
//long memOffset = GC.GetTotalMemory(forceFullCollection: true); //for tracking memory
foreach (var file in logFiles)
{
// EnsureAvailableMemory(); //to keep program from crashing, no joy
//long startMem = GC.GetTotalMemory(forceFullCollection: true); //for tracking memory
Console.WriteLine($"Processing file: {file}");
var processSuccess = (await logFileProcessor.ProcessLogFileAsync(file));
if (processSuccess)
{
switch (postProcess)
{
case "archive":
string targetPath = Path.Combine(archivePath ?? "C:\\logs\\archive", Path.GetFileName(file));
File.Move(file, targetPath);
Console.WriteLine($"Archived file to: {targetPath}");
break;
case "delete":
File.Delete(file);
Console.WriteLine($"Deleted file: {file}");
break;
case "keep":
// Nothing to do, may add something later to keep, but rename, or what-have-you
break;
}
}
else
{
Console.WriteLine($"Processing failed for file: {file}, skipping post-processing steps.");
}
GC.Collect(0, GCCollectionMode.Forced);
//long endMem = GC.GetTotalMemory(forceFullCollection: true); //for tracking memory
//Console.WriteLine($"Memory Utilized: {(endMem - startMem) / 1048576M:N2} MB"); //for tracking memory
//Console.WriteLine($"Running Memory: {(endMem - memOffset) / 1048576M:N2} MB"); //for tracking memory
}
}
await host.RunAsync();
}
static IHostBuilder CreateHostBuilder(string[] args) =>
Host.CreateDefaultBuilder(args)
.ConfigureAppConfiguration((hostingContext, config) =>
{
config.SetBasePath(Directory.GetCurrentDirectory());
config.AddJsonFile("appsettings.json", optional: false, reloadOnChange: true);
})
.ConfigureServices((hostContext, services) =>
{
services.AddDbContext(options =>
options.UseSqlServer(hostContext.Configuration.GetConnectionString("DefaultConnection")));
services.AddScoped();
services.AddLogging();
services.AddSingleton(hostContext.Configuration);
})
.ConfigureLogging(logging => {
logging.ClearProviders();
logging.AddConsole();
logging.AddFilter("Microsoft.EntityFrameworkCore.Database.Command", LogLevel.Warning);
});
// generates a regex at compile time
[GeneratedRegex(@"^.*?_(\d+)\.txt$")]
private static partial Regex OrderKeyRegex();
// this doesn't help - garbage collection never actually occurs, so it stays at 1GB & tries again indefinitely
public static void EnsureAvailableMemory()
{
const long maxAllowedMemory = 1_073_741_824; // Set threshold to 1 GB
while (true)
{
long memoryUsed = GC.GetTotalMemory(false);
Console.WriteLine($"Memory used: {memoryUsed} bytes");
if (memoryUsed < maxAllowedMemory)
{
break;
}
Console.WriteLine("Memory usage is too high, forcing garbage collection.");
GC.Collect();
GC.WaitForPendingFinalizers();
Console.WriteLine("Garbage collection complete, pausing for a few seconds...");
Thread.Sleep(5000); // Wait 5 seconds before checking again
}
}
}
Сущности (вероятно, не помогут , но вот они)
Код: Выделить всё
public class ParsedLog
{
public int Id { get; set; }
public string FileName { get; set; } = string.Empty;
public string LogType { get; set; } = string.Empty;
public int LogFileId { get; set; }
public DateTime DateParsed { get; set; }
public DateTime FileDate { get; set; }
public string? FileHash { get; set; } // SHA-256 hash of the file
}
public class LogEntry
{
public long Id { get; set; }
public int ParsedLogId { get; set; }
public int LineNum { get; set; }
public DateTime EntryDate { get; set; }
public string IPaddress { get; set; } = string.Empty;
public string Status { get; set; } = string.Empty;
public string Action { get; set; } = string.Empty;
public string Details { get; set; } = string.Empty;
}
public class LogDbContext(DbContextOptions options) : DbContext(options)
{
public DbSet LogEntries { get; set; }
public DbSet
ParsedLogs { get; set; }
}
Вот некоторые результаты, демонстрирующие увеличение объема памяти до 1 ГБ по 1–2 МБ за раз.
Код: Выделить всё
PS D:\Projects\LogParserApp> dotnet run -filetype "smtp" -postprocess "archive"
Processing file: D:\EmailLogs\smtp_0.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_0.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_0.txt
Memory Utilized: 12.27 MB
Running Memory: 12.49 MB
Processing file: D:\EmailLogs\smtp_1.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_1.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_1.txt
Memory Utilized: 2.78 MB
Running Memory: 15.27 MB
Processing file: D:\EmailLogs\smtp_2.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_2.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_2.txt
Memory Utilized: 2.48 MB
Running Memory: 17.74 MB
Processing file: D:\EmailLogs\smtp_3.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_3.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_3.txt
Memory Utilized: 3.28 MB
Running Memory: 21.03 MB
Processing file: D:\EmailLogs\smtp_4.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_4.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_4.txt
Memory Utilized: 2.28 MB
Running Memory: 23.31 MB
Processing file: D:\EmailLogs\smtp_5.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_5.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_5.txt
Memory Utilized: 2.55 MB
Running Memory: 25.86 MB
...
...
...
Processing file: D:\EmailLogs\smtp_370.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_370.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_370.txt
Memory Utilized: 2.36 MB
Running Memory: 999.33 MB
Processing file: D:\EmailLogs\smtp_371.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_371.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_371.txt
Memory Utilized: 2.59 MB
Running Memory: 1,001.92 MB
Processing file: D:\EmailLogs\smtp_372.txt
info: LogParserApp.LogFileProcessor[0]
Log file: smtp_372.txt processed and data committed to the database.
Archived file to: D:\EmailLogs\ArchivedLogs\smtp_372.txt
Memory Utilized: 2.24 MB
Running Memory: 1,004.16 MB

Подробнее здесь: https://stackoverflow.com/questions/784 ... ion-8-c-co