# Convert to Polars DataFrame
try:
df = pl.from_pandas(data)
print('Conversion to Polars DataFrame done...')
except Exception as e:
return JsonResponse({'error': f'Error converting to Polars DataFrame: {str(e)}'}, status=500)
# Ensure the necessary columns are present
if 'Store_Key' not in df.columns or 'Product_Key' not in df.columns or 'Rain' not in df.columns:
return JsonResponse({'error': 'Store_Key, Product_Key, and Rain columns are required in the data.'}, status=400)
# Extract unique store and product combinations
unique_store_product_pairs = df[['Store_Key', 'Product_Key', 'Rain']].unique()
# Loop through all store-product combinations to generate predictions
print("# Loop through all store-product combinations to generate predictions")
def predict_sales_for_combination(store, product):
# Check historical sales data for this store-product pair directly from the DataFrame
total_sales = df.filter((pl.col('Store_Key') == store) & (pl.col('Product_Key') == product)).select(pl.sum('Sales')).to_numpy().flatten()[0]
# If historical sales are zero, return zero predictions
if total_sales == 0:
return {
'store_id': store,
'product': product,
'MLC_date': pd.to_datetime(user_date),
'day1': 0,
'day2': 0,
'day3': 0,
'day4': 0,
'day5': 0,
'day6': 0,
}
# Get rain values from Day_1 to Day_7
rain_values = df.filter((pl.col('Store_Key') == store) & (pl.col('Product_Key') == product))[['Day_1', 'Day_2', 'Day_3', 'Day_4', 'Day_5', 'Day_6', 'Day_7']].to_numpy().flatten()
# Create a DataFrame for predictions
future_rain_list = []
for day in range(7): # Create predictions for the next 7 days
day_data = {
'Store_Key': store,
'Product_Key': product,
'Rain': rain_values[day], # Use specific day rain for prediction
}
future_rain_list.append(day_data)
# Create a DataFrame from the future rain list
future_rain_df = pl.DataFrame(future_rain_list)
# Make predictions
predictions = model.predict(future_rain_df.to_pandas())
# Collect all days' predicted sales in a single row per store-product pair
return {
'store_id': store,
'product': product,
'MLC_date': pd.to_datetime(user_date), # Use current user_date
'day1': predictions[0] if predictions[0] >= 1 else 0,
'day2': predictions[1] if predictions[1] >= 1 else 0,
'day3': predictions[2] if predictions[2] >= 1 else 0,
'day4': predictions[3] if predictions[3] >= 1 else 0,
'day5': predictions[4] if predictions[4] >= 1 else 0,
'day6': predictions[5] if predictions[5] >= 1 else 0,
'day7': predictions[6] if predictions[6] >= 1 else 0,
}
# Use ThreadPoolExecutor for parallel processing
with ThreadPoolExecutor(max_workers=10) as executor: # Adjust max_workers as needed
# Create a list of futures for predictions
futures = {executor.submit(predict_sales_for_combination, row[0], row[1]): row for row in unique_store_product_pairs.iter_rows()}
for future in futures:
try:
predicted_sales.append(future.result())
except Exception as e:
print(f"Error during prediction for store-product pair: {str(e)}")
Подробнее здесь: https://stackoverflow.com/questions/790 ... prediction
Оптимизация отношений «один ко многим» для больших наборов данных в прогнозировании продаж ⇐ Python
-
- Похожие темы
- Ответы
- Просмотры
- Последнее сообщение
-
-
Оптимизация построения Trie во Flashtext для больших наборов данных с ключом-значением
Anonymous » » в форуме Python - 0 Ответы
- 30 Просмотры
-
Последнее сообщение Anonymous
-