Я написал символический инструмент регрессии в Python. Можно дать ныне и бинарные операторы. Если ничего не указано, я хочу определить потенциальных операторов. Итак, я хочу знать, есть ли подход для поиска необходимых операторов. Я пробовал подходы машинного обучения и корреляции, но они не дают хороших результатов для двух определений функций (2*log(x1)+3*x2-x1*x2+5 и exp (x1) ).
import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import train_test_split
import operator
import sympy
np.random.seed(42)
n = 1000
x1 = np.random.uniform(1, 10, size = n)
x2 = np.random.uniform(1, 10, size = n)
y = 2 * np.log(x1) + 3 * x2 - x1 * x2 + 5
#y = np.exp(x1)
unary_operators = {"neg": (lambda x: sympy.sympify("neg(" + str(x) + ")"), operator.neg),
"abs": (sympy.Abs, operator.abs),
"inv_": (lambda x: sympy.sympify("inv_(" + str(x) + ")"), lambda x: 1 / x),
"sqrt": (sympy.sqrt, np.sqrt),
"cos": (sympy.cos, np.cos),
"sin": (sympy.sin, np.sin),
"exp": (sympy.tan, np.tan),
"log": (sympy.log, np.log),
"exp": (sympy.exp, np.exp),
"sinh": (sympy.sinh, np.sinh),
"cosh": (sympy.cosh, np.cosh),
"floor": (lambda x: sympy.sympify("floor(" + str(x) + ")"), np.ceil),
"ceil": (lambda x: sympy.sympify("ceil(" + str(x) + ")"), np.floor)}
binary_operators = {"+": (operator.add, operator.add),
"-": (operator.sub, operator.sub),
"*": (operator.mul, operator.mul),
"/": (operator.truediv, operator.truediv),
"//": (operator.floordiv, operator.floordiv),
"%": (operator.mod, operator.mod),
"**": (sympy.Pow, operator.pow)}
symmetric_binary_operators = {"+": True, "-": True, "*": False, "conv": False}
symbols = sympy.symbols("x1 x2")
X = [x1, x2]
X_raw = pd.DataFrame()
for i in range(0, len(symbols)):
X_raw[str(symbols)] = X
feature_dict = {}
base_vars = [str(x) for x in symbols]
ops = {}
for k, v in unary_operators.items():
sym_op, num_op = v
for var in symbols:
feature_dict[str(sym_op(var))] = num_op(X_raw[str(var)])
ops[str(sym_op(var))] = k
for k, v in binary_operators.items():
sym_op, num_op = v
indices1 = list(range(0, len(symbols)))
for i1 in indices1:
indices2 = list(range(0, len(symbols)))
if (k in list(symmetric_binary_operators.keys())):
indices2 = list(range(i1 + 1 if v else i1, len(base_vars)))
for i2 in indices2:
feature_dict[str(sym_op(symbols[i1], symbols[i2]))] = num_op(X_raw[str(symbols[i1])], X_raw[str(symbols[i2])])
ops[str(sym_op(symbols[i1], symbols[i2]))] = k
X_feat = pd.DataFrame(feature_dict)
X_feat = X_feat.replace([np.inf, -np.inf, np.nan], 0)
X_feat = X_feat.loc[:, (X_feat.abs().max() < 1e6)]
correlations = X_feat.apply(lambda col: np.corrcoef(col, y)[0, 1])
# Trier par valeur absolue décroissante
correlations_sorted = correlations.abs().sort_values(ascending = False)
print("Top 10 features les plus corrélées (Pearson):\n")
for name in correlations_sorted.head(10).index:
print(f"{name:30} corr: {correlations[name]:+.4f}")
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_feat_scaled_array = scaler.fit_transform(X_feat)
X_feat = pd.DataFrame(X_feat_scaled_array, columns=X_feat.columns)
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_feat, y, test_size = 0.2, random_state = 0)
model = RandomForestRegressor(n_estimators = 200, random_state = 0)#, max_depth = 10)
model.fit(X_train, y_train)
importances = model.feature_importances_
features_sorted = sorted(zip(X_feat.columns, importances), key = lambda x: x[1], reverse = True)
imp = 0
for i in range(0, len(features_sorted)):
features_sorted = list(features_sorted)
x = features_sorted
imp_ = x[1]
x.append(abs(x[1] - imp))
imp = imp_
print("Top 10 features les plus utiles :\n")
for name, score, r in features_sorted[:10]:
print(f"{name:30} importance: {score:.4f} {r:.4f}")
min_score = 0.015
min_r = 0.05
un_ops = set()
bin_ops = set()
for name, score, r in features_sorted:
#if (score > min_score):
if (r > min_r):
if (ops[name] in unary_operators):
un_ops.add(ops[name])
elif (ops[name] in binary_operators):
bin_ops.add(ops[name])
print("un_ops", un_ops)
print("bin_ops", bin_ops)
< /code>
Соответствующий вывод: < /p>
Top 10 features les plus corrélées (Pearson):
x1*x2 corr: -0.9480
x1 + x2 corr: -0.8527
neg(x1) corr: +0.8298
Abs(x1) corr: -0.8298
ceil(x1) corr: -0.8244
floor(x1) corr: -0.8244
sqrt(x1) corr: -0.8170
log(x1) corr: -0.7880
inv_(x1) corr: +0.6826
sinh(x1) corr: -0.6305
Top 10 features les plus utiles :
x1*x2 importance: 0.7583 0.7583
x1 + x2 importance: 0.0850 0.6734
log(x1) importance: 0.0192 0.0658
inv_(x1) importance: 0.0170 0.0022
sinh(x1) importance: 0.0164 0.0007
neg(x1) importance: 0.0162 0.0002
sqrt(x1) importance: 0.0159 0.0003
exp(x1) importance: 0.0159 0.0001
cosh(x1) importance: 0.0151 0.0008
Abs(x1) importance: 0.0137 0.0014
un_ops {'log'}
bin_ops {'*', '+'}
Подробнее здесь: https://stackoverflow.com/questions/796 ... regression
Найдите необходимых операторов для символической регрессии ⇐ Python
-
- Похожие темы
- Ответы
- Просмотры
- Последнее сообщение
-
-
Замена операторов if циклом for для необходимых журналов в регистраторе [закрыто]
Anonymous » » в форуме Python - 0 Ответы
- 6 Просмотры
-
Последнее сообщение Anonymous
-