много уже кажется хорошим, за исключением исключения, которое я получаю при запуска на Cudaaccelerator, предоставленном Ilgpu, который исчезает, как только я тестирую код на Cpuaccelerator.
Код: Выделить всё
convertedLayout.View[r, c] = rValue; // 0)
{
newDiscoveries = true;
discoveredDeadlocks = newQueue;
totalMultiPassDeadlocks += newQueue.Count;
UpdateGpuOrderProgress(i, DiscoveryPhase.MultiPass, 0, newQueue.Count);
passNumber++;
_logger.Information("D-Deadlocks: Order {Order} pass {Pass} found {NewDeadlocks} additional deadlocks",
i, passNumber - 1, newQueue.Count);
}
else
{
newDiscoveries = false;
}
}
// Mark multi-pass as completed
UpdateGpuOrderProgress(i, DiscoveryPhase.MultiPass, true);
var allDeadlocksForOrder = new List();
// Collect all deadlocks from consolidatedDeadlocks queue
while (consolidatedDeadlocks.TryDequeue(out var deadlock))
{
allDeadlocksForOrder.Add(deadlock);
}
// Add any remaining deadlocks from discoveredDeadlocks
while (discoveredDeadlocks.TryDequeue(out var deadlock))
{
allDeadlocksForOrder.Add(deadlock);
}
// Store in permanent collection with proper ordering
_deadlocks[i] = allDeadlocksForOrder
.DistinctBy(ps => ps.ToString()) // Remove duplicates based on full string representation
.OrderBy(ps => ps.ToDiamondString()) // Order by diamond string representation
.ToArray();
orderStopwatch.Stop();
var deadlockCount = _deadlocks[i].Length;
_logger.Information("D-Deadlocks: Completed order {Order} in {ElapsedTime:0.000}s. Found {DeadlockCount:N0} deadlocks total (First Pass: {FirstPass}, Multi-pass: {MultiPass})",
i, orderStopwatch.Elapsed.TotalSeconds, deadlockCount,
Math.Max(0, deadlockCount - totalMultiPassDeadlocks), totalMultiPassDeadlocks);
}
await loggingTimer.DisposeAsync();
_basicAlgoCompleted = true;
IsCompleted = true;
IsGenerating = false;
fullRunChrono.Stop();
var totalDeadlocks = _deadlocks.Values.Sum(arr => arr.Length);
var elapsedTime = fullRunChrono.Elapsed.TotalSeconds;
_logger.Information("D-Deadlocks: General map discovery completed. Total deadlocks found: {Total:N0} in {ElapsedTime:0.000} seconds", totalDeadlocks, elapsedTime);
LogAllDeadlocks(_deadlocks);
return (finished: true, deadlocks: GetAllPotentialTiles(Deadlocks));
}
private Task ConversionWorkAsync(Accelerator accelerator, int[] convertedAdmissibleTiles, int[][] chunks, int conversionRatio, int j, TileContent[,] layout)
{
// EARLY EXIT: Prevent invalid kernel launch for empty chunk
if (chunks[j] == null || chunks[j].Length == 0)
{
_logger.Warning($"ConversionWorkAsync: Skipping empty chunk at index {j}.");
return Task.FromResult(accelerator.Allocate1D(0));
}
// ADDITIONAL GUARD: Check accelerator validity and group size
if (accelerator == null || accelerator.IsDisposed)
{
_logger.Error($"ConversionWorkAsync: Accelerator is null or disposed at chunk index {j}. Skipping kernel launch.");
return Task.FromResult(accelerator.Allocate1D(0));
}
if (accelerator.MaxNumThreadsPerGroup == 0)
{
_logger.Error($"ConversionWorkAsync: Accelerator reports MaxNumThreadsPerGroup == 0 at chunk index {j}. Skipping kernel launch.");
return Task.FromResult(accelerator.Allocate1D(0));
}
// Log buffer sizes
_logger.Debug($"ConversionWorkAsync: chunk[{j}].Length={chunks[j].Length}, conversionRatio={conversionRatio}, layout=({layout.GetLength(0)},{layout.GetLength(1)})");
MemoryBuffer1D chunkOnDevice = accelerator.Allocate1D(chunks[j]);
Index2D dims = new Index2D(layout.GetLength(0), layout.GetLength(1));
var requiredMemory = dims.Size * sizeof(ushort);
if (requiredMemory > accelerator.MemorySize / 10)
{
_logger.Warning("ConversionWork: Large memory allocation requested: {RequiredMB}MB", requiredMemory / (1024 * 1024));
}
MemoryBuffer2D convertedLayout;
try
{
convertedLayout = accelerator.Allocate2D(
dims,
extent => extent.Y,
(extent, leadingDimension) => Stride2D.DenseY.FromExtent(extent)
);
}
catch (Exception ex)
{
_logger.Error(ex, "Failed to allocate GPU memory for layout conversion. Dims: {Dims}", dims);
chunkOnDevice.Dispose();
return Task.FromResult(accelerator.Allocate1D(0));
}
int errorR = -1, errorC = -1;
try
{
for (int r = 0; r < dims.X; r++)
{
for (int c = 0; c < dims.Y; c++)
{
errorR = r;
errorC = c;
if (r >= 0 && r < layout.GetLength(0) && c >= 0 && c < layout.GetLength(1))
{
var rValue = (ushort)layout[r, c];
convertedLayout.View[r, c] = rValue; // = data.Length) break;
int content = data[i + shift];
if (combDataOffset + shift < combData.Length)
combData[combDataOffset + shift] = content;
int contentCol = content % 256;
int contentRow = (content - contentCol) / 256;
int up = ((contentRow - 1) * 256) + contentCol;
if (IsAdmissible(up, admissibleTiles) && localGenerated < multFactor)
{
if (manPositionsOffset + localGenerated < manPositions.Length)
manPositions[manPositionsOffset + localGenerated] = up;
localGenerated++;
}
int down = ((contentRow + 1) * 256) + contentCol;
if (IsAdmissible(down, admissibleTiles) && localGenerated < multFactor)
{
if (manPositionsOffset + localGenerated < manPositions.Length)
manPositions[manPositionsOffset + localGenerated] = down;
localGenerated++;
}
int right = content + 1;
if (IsAdmissible(right, admissibleTiles) && localGenerated < multFactor)
{
if (manPositionsOffset + localGenerated < manPositions.Length)
manPositions[manPositionsOffset + localGenerated] = right;
localGenerated++;
}
int left = content - 1;
if (IsAdmissible(left, admissibleTiles) && localGenerated < multFactor)
{
if (manPositionsOffset + localGenerated < manPositions.Length)
manPositions[manPositionsOffset + localGenerated] = left;
localGenerated++;
}
}
// Defensive: check subview lengths
if (manPositionsOffset + localGenerated > manPositions.Length) return;
if (combDataOffset + order > combData.Length) return;
if (cleanupOffset + localGenerated > cleanup.Length) return;
var threadManPositions = manPositions.SubView(manPositionsOffset, localGenerated);
var threadCombData = combData.SubView(combDataOffset, order);
var threadCleanup = cleanup.SubView(cleanupOffset, localGenerated);
CleanupEquivalentManPositionsGpu(layout, threadCombData, threadManPositions, threadCleanup, visited, queue);
int pStateLen = (1 + order);
int targetIndex = i * (multFactor * pStateLen);
for (int j = 0; j < localGenerated; j++)
{
if (targetIndex + (j * pStateLen) >= output.Length) break;
if (j >= threadCleanup.Length) break;
output[targetIndex + (j * pStateLen)] = threadCleanup[j];
for (int k = 1; k = output.Length) break;
if ((k - 1) >= threadCombData.Length) break;
output[targetIndex + (j * pStateLen) + k] = threadCombData[k - 1];
}
}
}
< /code>
Чтобы быть уверенным в том, чтобы включить все, это обертка для ускорителей, предоставленных Ilgpu: < /p>
///
/// Contains all information related to GPU usage
///
public class CudaWrapper : IDisposable
{
public CLAccelerator? ClAccelerator { get; set; }
public long ClMaxMemory { get; set; }
public int ClMaxThreads { get; set; }
///
/// Gets or sets the context of this instance
///
///
/// The context.
///
public Context? Context { get; set; }
///
/// A fallback device in case Cuda may fail.
///
///
/// The cpu accelerator.
///
public CPUAccelerator? CpuAccelerator { get; set; }
///
/// The GPU
///
///
/// The gpu accelerator.
///
public CudaAccelerator? GpuAccelerator { get; set; }
public long GpuMaxMemory { get; set; }
public int GpuMaxThreads { get; set; }
public CudaWrapper(ILogger logger)
{
_logger = logger.ForContext();
}
public void Dispose()
{
// Do not change this code. Put cleanup code in 'Dispose(bool disposing)' method
Dispose(disposing: true);
GC.SuppressFinalize(this);
}
public Task Initialize()
{
try
{
Context = Context.Create(builder => builder.AllAccelerators());
var devices = Context.GetCudaDevices();
if (devices.Count == 0)
{
_logger.Warning("No Cuda capable GPU found in the system. Failing over to CPU model accelerator");
}
GpuMaxThreads = 0;
GpuMaxMemory = 0;
for (var index = 0; index < devices.Count; index++)
{
_logger.Information("Listing GPUs found:");
var device = devices[index];
device.PrintInformation(new TextWriterLogger(_logger));
var gpuAccelerator = device.CreateCudaAccelerator(Context);
var mem = gpuAccelerator.MemorySize;
var th = gpuAccelerator.MaxNumThreads;
if (mem > GpuMaxMemory || th > GpuMaxThreads)
{
GpuMaxThreads = th;
GpuMaxMemory = mem;
GpuAccelerator = gpuAccelerator;
_logger.Information("Set {Device} as preferred Cuda device", GpuAccelerator.Name);
}
}
// now set the intel graphic card
var clDevices = Context.GetCLDevices();
for (var index = 0; index < clDevices.Count; index++)
{
_logger.Information("Listing OpenCl GPUs found:");
var device = clDevices[index];
device.PrintInformation(new TextWriterLogger(_logger));
var clAccelerator = device.CreateCLAccelerator(Context);
var mem = clAccelerator.MemorySize;
var th = clAccelerator.MaxNumThreads;
if (mem > GpuMaxMemory || th > GpuMaxThreads)
{
ClMaxThreads = th;
ClMaxMemory = mem;
ClAccelerator = clAccelerator;
_logger.Information("Set {Device} as preferred OpenCL device", ClAccelerator.Name);
}
}
//setting the CPU fallback
CpuAccelerator = Context.GetCPUDevice(0).CreateCPUAccelerator(Context);
_logger.Information("Set {Device} as preferred fallback device", CpuAccelerator.Name);
CpuAccelerator.PrintInformation();
}
catch (Exception ex)
{
_logger.Warning(ex, "GPU: Failed to initialize GPU acceleration, trying CPU acceleration");
}
return Task.CompletedTask;
}
private void Dispose(bool disposing)
{
if (!_disposedValue)
{
if (disposing)
{
CpuAccelerator?.Dispose();
GpuAccelerator?.Dispose();
ClAccelerator?.Dispose();
// dispose managed state (managed objects)
Context?.Dispose();
}
// free unmanaged resources (unmanaged objects) and override finalizer set large
// fields to null
_disposedValue = true;
}
}
private readonly ILogger _logger;
private bool _disposedValue;
}
< /code>
и, наконец, исключение, выброшенное выше: < /p>
Fatal error. System.AccessViolationException: Attempted to read or write protected memory. This is often an indication that other memory is corrupt.
at SokoLib.DomainKnowledge.DynamicDeadlocks.ConversionWorkAsync(ILGPU.Runtime.Accelerator, Int32[], Int32[][], Int32, Int32, SokoLib.Core.TileContent[,])
at SokoLib.DomainKnowledge.DynamicDeadlocks+d__13.MoveNext()
at System.Runtime.CompilerServices.AsyncTaskMethodBuilder`1+AsyncStateMachineBox`1[[System.ValueTuple`2[[System.Boolean, System.Private.CoreLib, Version=9.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e],[System.__Canon, System.Private.CoreLib, Version=9.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e]], System.Private.CoreLib, Version=9.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e],[System.__Canon, System.Private.CoreLib, Version=9.0.0.0, Culture=neutral, PublicKeyToken=7cec85d7bea7798e]].ExecutionContextCallback(System.Object)
[/code]
Я создал переменную rvalue , просто чтобы разделить оценку двух массивов, чтобы убедиться, какой из них создает проблему. Похоже, что он находится в назначении ConvertedLayout .
Есть идеи?>
Подробнее здесь: https://stackoverflow.com/questions/797 ... violations
Мобильная версия