Я наблюдаю, что время выполнения вызываемой numba-функции замедляется с течением итераций. Для первого ок. 1000 итераций, время выполнения функции numba составляет ок. 0,4 – 0,6 секунды. После этого время выполнения функции numba на итерации замедляется до 1,6 секунды на последних итерациях.
Вот общая структура кода:
EDIT: я заменил ранее упомянутую структуру кода на MVP. В этом коде проблема возникает еще быстрее, после 100 – 200 итераций.
Код: Выделить всё
import numpy as np
import pandas as pd
from numba import njit, prange
import copy
import random
import time
# Create simulated combinations list with 1000 different combination dictionaries
simulated_combinations_list = []
for i in range(1000):
simulated_combinations_list.append({"a": random.randint(50, 550), "b": random.randint(50, 550), "c": random.randint(50, 550), "d": random.randint(50, 550)})
# Create simulated arrays list
simulated_array_list = []
rows = 1380
columns = 17
for i in range(3000):
# Simulate 2d array
simulated_array_list.append(np.random.rand(rows, columns))
# Create MVP of numba function
@njit(parallel=True)
def numba_function(array_list, a_value, b_value, c_value, d_value):
# Get number of arrays in list
number_of_arrays = len(array_list)
# Loop through array_list and perform actions for each 2D array simultaneously --> usage of prange
for i in prange(number_of_arrays):
current_array = array_list[i]
data_manipulated = False
# Loop through every row of current 2D array
for j in range(current_array.shape[0]):
row = current_array[j]
value_column_3 = row[2]
# Make some if else checks for different values in that row and manipulate values in that row if a statement
# is true. I simplified the checks here a little bit, because the problem can still be reproduced
if data_manipulated == False:
if row[4] = value_column_3 + b_value:
row[12] = 1
row[15] = ((value_column_3 * 2) * (-1)) + b_value
row[16] = row[0]
data_manipulated = True
elif row[4] >= value_column_3 + c_value:
row[13] = 1
row[15] = (value_column_3 * 3) + c_value
row[16] = row[0]
data_manipulated = True
elif row[4] >= value_column_3 + d_value:
row[14] = 1
row[15] = (value_column_3 * 3) + d_value
row[16] = row[0]
data_manipulated = True
# if current row is end of array and nothing was manipulated
if (current_array.shape[0] - 1) and (data_manipulated == False):
row[15] = 0
row[16] = row[0]
# after loop through every row has ended for current array
# find manipulated row
cond1 = current_array[:, 11] == 1
cond2 = current_array[:, 12] == 1
cond3 = current_array[:, 13] == 1
cond4 = current_array[:, 14] == 1
cond5 = current_array[:, 16] != 0
indeces = np.where(cond1 | cond2 | cond3 | cond4)
row_index = indeces[0]
# if manipulated rows were found
if len(row_index) > 0:
#Replace first row of current array with manipulated row
current_array[0, :] = current_array[row_index, :]
# Replace current array in array list with manipulated array
array_list[i] = current_array
#Return manipulated array list
return array_list
# Loop through combinations list and call numba functions on array list with different variables a, b, c, d
counter = 0
for combination in simulated_combinations_list:
a_value = combination.get("a")
b_value = combination.get("b")
c_value = combination.get("c")
d_value = combination.get("d")
# deep copy simulated array list twice for calling in numba functions:
array_list_copy_one = [copy.deepcopy(element) for element in simulated_array_list]
array_list_copy_two = [copy.deepcopy(element) for element in simulated_array_list]
# I call two separate numba functions which have the same structure but different if else manipulating questions,
# to simulate that I call the same numba function twice here. It reproduces the same problem with the increasing
# performance time over the iterations
start_time_numba = time.time()
result_list_one = numba_function(array_list_copy_one, a_value, b_value, c_value, d_value)
result_list_two = numba_function(array_list_copy_two, a_value, b_value, c_value, d_value)
end_time_numba = time.time()
diff_time = end_time_numba - start_time_numba
print(f"Execution time numba functions round {counter}: {diff_time} seconds.")
counter += 1
# After the numba functions I process them further and save results in a SQLite3 database. I did not mention this
# code here as the increased performance time of the numba functions over the course of iterations occurs also without that part
Я ожидал, что время выполнения функции numba должно быть почти таким же полный цикл for, поскольку он всегда получает аргумент одного и того же размера, но с разными значениями, и всегда создает список результатов, который затем обрабатывается после функции numba.
Подробнее здесь: https://stackoverflow.com/questions/784 ... n-for-loop