Код: Выделить всё
void solveIncompressibilityRedBlackForColumns(const float dt, const float overRelaxation, const int startColumn, const int endColumn, const bool red) {
int n = this->numY;
for (int i = startColumn; i < endColumn; ++i) {
for (int j = 1; j < numY - 1; ++j) {
if (red) {
if ((i + j) % 2 != 0) {
continue;
}
}
if (!red) {
if ((i + j) % 2 == 0) {
continue;
}
}
if (this->cellType[i * n + j] == FLUID_CELL) {
float leftType = cellType[(i - 1) * n + j] v[i * n + j];
if (this->particleRestDensity > 0.f) {
float k = 10.f; // 3, 10
float compression = this->particleDensity[i * n + j] -this->particleRestDensity;
if (compression > 0.f) {
divergence = divergence - k * compression;
}
}
float p = divergence / divideBy;
p *= overRelaxation;
this->u[i * n + j] += leftType * p;
this->u[(i + 1) * n + j] -= rightType * p;
this->v[i * n + j] += topType * p;
this->v[i * n + j + 1] -= bottomType * p;
}
}
}
}
}
Код: Выделить всё
for (int _ = 0; _ < 10; ++_) {
for (int i = 0; i < numThreads; ++i) {
if (i != numThreads - 1) {
thread_pool.addTask([&, this, i]() {
this->solveIncompressibilityRedBlackForRows(sdtoverRelaxation, i * numRowsEachThread + 1, i*numRowsEachThread + numRowsEachThread + 1, true);
});
}
else {
thread_pool.addTask([&, this, i]() {
this->solveIncompressibilityRedBlackForRows(sdtoverRelaxation, i * numRowsEachThread + 1, i*numRowsEachThread + numRowsEachThread + numMissedRows + 1, true);
});
}
}
thread_pool.waitForCompletion();
for (int i = 0; i < numThreads; ++i) {
if (i != numThreads - 1) {
thread_pool.addTask([&, this, i]() {
this->solveIncompressibilityRedBlackForRows(sdtoverRelaxation, i * numRowsEachThread + 1, i*numRowsEachThread + numRowsEachThread + 1, false);
});
}
else {
thread_pool.addTask([&, this, i]() {
this->solveIncompressibilityRedBlackForRows(sdt, overRelaxation, i * numRowsEachThread + 1, i * numRowsEachThread + numRowsEachThread + numMissedRows + 1, false);
});
}
}
thread_pool.waitForCompletion();
}
Редактировать
Вот не очень минимальный, но воспроизводимый пример, который сравнивает один поток и десять потоков, изменяющих списки на основе шага несжимаемости. Я не использую пул потоков, а использую только базовые потоки. Хотя я не думаю, что это должно сильно измениться:
Код: Выделить всё
#include
#include
#include
#include
void solveIncompressibilityRedBlackForRows(const float overRelaxation, const int startColumn, const int endColumn, const bool red, const int gridHeight, std::vector& u, std::vector& v) {
int n = gridHeight;
for (int i = startColumn; i < endColumn; ++i) {
for (int j = 1; j < n - 1; ++j) {
if (red) {
if ((i + j) % 2 != 0) {
continue;
}
}
if (!red) {
if ((i + j) % 2 == 0) {
continue;
}
}
float leftType = 1;
float rightType = 1;
float topType = 1;
float bottomType = 1;
float divideBy = 4;
if (divideBy != 0.f) {
float divergence;
divergence = u[(i + 1) * n + j] - u[i * n + j] + v[i * n + j + 1] - v[i * n + j];
float p = divergence / divideBy;
p *= overRelaxation;
u[i * n + j] += leftType * p;
u[(i + 1) * n + j] -= rightType * p;
v[i * n + j] += topType * p;
v[i * n + j + 1] -= bottomType * p;
}
}
}
}
int main() {
int gridHeight = 100;
int gridWidth = 100;
std::vector u(gridHeight * gridWidth, 10);
std::vector v(gridHeight * gridWidth, 10);
int numThreads = 10;
std::vector threads;
float overRelaxation = 1.9;
int numRowsEachThread = (gridWidth - 2) / numThreads;
int numMissedRows = (gridWidth - 2) - numRowsEachThread * numThreads;
auto start = std::chrono::high_resolution_clock::now();
// main loop to run the incompressibility stuff
for (int i = 0; i < 1000; ++i) {
for (int i = 0; i < numThreads; ++i) {
if (i != numThreads - 1) {
threads.push_back(std::thread(solveIncompressibilityRedBlackForRows, overRelaxation, i * numRowsEachThread + 1, i *numRowsEachThread + numRowsEachThread + 1, true, gridHeight, std::ref(u), std::ref(v)));
}
else {
threads.push_back(std::thread(
solveIncompressibilityRedBlackForRows, overRelaxation, i * numRowsEachThread + 1, i *numRowsEachThread + numRowsEachThread + numMissedRows+ 1, true, gridHeight, std::ref(u), std::ref(v)));
}
}
for (auto& thread : threads) {
thread.join();
}
threads.clear();
for (int i = 0; i < numThreads; ++i) {
if (i != numThreads - 1) {
threads.push_back(std::thread(
solveIncompressibilityRedBlackForRows, overRelaxation, i * numRowsEachThread + 1, i *numRowsEachThread + numRowsEachThread + 1, false, gridHeight, std::ref(u), std::ref(v)));
}
else {
threads.push_back(std::thread(
solveIncompressibilityRedBlackForRows, overRelaxation, i * numRowsEachThread + 1, i *numRowsEachThread + numRowsEachThread + numMissedRows+ 1, false, gridHeight, std::ref(u), std::ref(v)));
}
}
for (auto& thread : threads) {
thread.join();
}
threads.clear();
}
auto end = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast(end - start);
std::cout
Подробнее здесь: [url]https://stackoverflow.com/questions/79033577/multithreaded-red-black-gauss-seidel-running-slow[/url]