Код: Выделить всё
import numpy as np
def jackknife_resampling(value, weight, label):
# label is an integer array, marking the subsample
def _resample(i):
_value = value[label != i]
_weight = weight[label != i]
return np.average(_value, weights=_weight)
return list(map(_resample, np.unique(label)))
Код: Выделить всё
from functools import partial
from multiprocessing import Pool
def _resample(value, weight, label, i):
_value = value[label != i]
_weight = weight[label != i]
return np.average(_value, weights=_weight)
def jackknife_resampling_mp(value, weight, label, Npro):
my_pool = Pool(Npro)
return my_pool.map(partial(_resample, value, weight, label), np.unique(label))
if __name__ == '__main__':
Nsamp = 1_0_000_000
value = np.random.uniform(0, 1, size=Nsamp)
weight = np.random.uniform(1, 2, size=Nsamp)
label = np.random.randint(0, 200, size=Nsamp)
t = time.time()
jackknife_resampling_mp(value, weight, label, 40)
print('time', time.time()-t)
Код: Выделить всё
def _resample(i):
global value, weight, label
_value = value[label != i]
_weight = weight[label != i]
return np.average(_value, weights=_weight)
def jackknife_resampling_mp(_value, _weight, _label, Npro):
global value, weight, label
value = _value
weight = _weight
label = _label
# label is a integer array, marking the subsample
my_pool = Pool(Npro)
return my_pool.map(_resample, np.unique(label))
Почему это? И есть ли лучший способ сделать это?
Подробнее здесь: https://stackoverflow.com/questions/790 ... l-function