Проект A PHQ-9 Оценка одного месяца в будущем
Однако, когда я пытаюсь сделать это, в рамках данных существует только одно предсказание, хотя несколько записей с разными датами. /> main.py (Extract): < /strong> < /p>
Код: Выделить всё
# Get the user input from the GUI
user_data_df, date_series, feature_order = get_user_input_gui("reduced_training_data.csv", feature_name_map)
print("DEBUG - DataFrame Columns:", user_data_df.columns.tolist())
if user_data_df is not None:
# Select the latest input (last row of the DataFrame)
latest_input_row = user_data_df.iloc[-1] # This gets the last (most recent) row
# Ensure that date column is included and properly formatted (if missing)
latest_input_row['date'] = pd.to_datetime(latest_input_row['date'], errors='coerce')
# Project and guide the user based on their latest input
future_phq9, target_value, counterfactuals_result = project_phq9_and_suggest(
user_data_df=latest_input_row.to_frame().T, # Convert the row to a DataFrame (single row)
date_series=date_series,
model=model,
X_train=X_train,
X_test=X_test,
actionable_features=actionable_features,
reduced_features_path=reduced_features_path,
counterfactuals_func=counterfactuals,
num_solutions=num_solutions,
)
print("Final collected user data:")
print(user_data_df.head())
print("Dates:")
print(date_series)
Код: Выделить всё
import tkinter as tk
import pandas as pd
from tkinter import messagebox
def get_user_input_gui(csv_path, feature_name_map=None):
# Load the CSV and extract feature names
df = pd.read_csv(csv_path)
feature_names = [col for col in df.columns if col != 'PHQ9_Target']
medians = df[feature_names].median()
# Store multiple entries
all_entries = []
all_dates = [] # List to store dates separately
final_result = None # To store the result when the window closes
def submit_entry():
try:
user_data = {}
date_value = date_entry.get()
# Check if the date field is empty
if not date_value:
raise ValueError("Date field cannot be empty. Please provide a valid date.")
# Convert the date input to a datetime format
user_data["date"] = pd.to_datetime(date_value, errors='coerce')
if pd.isna(user_data["date"]):
raise ValueError(f"Invalid date format: {date_value}. Please use YYYY-MM-DD.")
# Collect feature data (without the 'date' feature)
for feature, entry in entry_widgets.items():
value = entry.get()
if feature == "date":
continue # Skip the date, as we already handled it
try:
user_data[feature] = float(value) # Ensure numeric data
except ValueError:
raise ValueError(f"Invalid numeric value: {value} for feature {feature}. Please enter a valid number.")
all_entries.append(user_data)
all_dates.append(user_data["date"]) # Store the date separately
# Debug print to check contents of entries
print("All Entries after adding this entry:", all_entries)
messagebox.showinfo("Success", "Entry added successfully!")
# Clear the entry fields
for entry in entry_widgets.values():
entry.delete(0, tk.END)
date_entry.delete(0, tk.END)
except ValueError as e:
messagebox.showerror("Invalid Input", str(e))
def finish():
nonlocal final_result # This ensures that final_result can be modified inside the function
if not all_entries:
messagebox.showwarning("No data", "No entries were added.")
return
# Debug print before finishing
print("Final entries to process:", all_entries)
# After collecting all entries, process them into a DataFrame and Date Series
final_df = pd.DataFrame(all_entries)
date_series = pd.Series(all_dates).reset_index(drop=True)
# Debugging outputs
print("DataFrame after all entries:", final_df)
print("Date Series:", date_series)
final_result = (final_df, date_series, feature_names) # Store the result in the nonlocal variable
window.quit() # Close the tkinter window
# Create GUI window
window = tk.Tk()
window.title("Enter User Data")
# Date input
tk.Label(window, text="Date (YYYY-MM-DD):").grid(row=0, column=0)
date_entry = tk.Entry(window)
date_entry.grid(row=0, column=1)
# Input fields + median labels for the features
entry_widgets = {}
for idx, feature in enumerate(feature_names):
row_num = idx + 1
friendly_name = feature_name_map.get(feature, feature) if feature_name_map else feature
tk.Label(window, text=friendly_name).grid(row=row_num, column=0)
entry = tk.Entry(window)
entry.grid(row=row_num, column=1)
entry_widgets[feature] = entry
# Show median value next to the entry
median_val = round(medians[feature], 2)
tk.Label(window, text=f"Median: {median_val}", fg="gray").grid(row=row_num, column=2)
# Buttons
tk.Button(window, text="Add Entry", command=submit_entry).grid(row=len(feature_names) + 1, column=0)
tk.Button(window, text="Done", command=finish).grid(row=len(feature_names) + 1, column=1)
window.mainloop()
# After window is closed, return the result
return final_result
def project_phq9_and_suggest(user_data_df, date_series, model, X_train, X_test, actionable_features, reduced_features_path, counterfactuals_func, num_solutions):
# Ensure 'date' column is properly set and datetime
user_data_df['date'] = pd.to_datetime(date_series.reset_index(drop=True), errors='coerce')
if user_data_df['date'].isna().any():
print("Dropping rows with invalid dates...")
user_data_df = user_data_df.dropna(subset=['date'])
if user_data_df.empty:
raise ValueError("No valid data left after date cleaning")
# Sort by date
user_data_df = user_data_df.sort_values('date').reset_index(drop=True)
# Filter feature columns (exclude known non-features)
columns_to_remove = ['date', 'PHQ9_Predicted', 'PHQ-9_Predicted', 'phq9_fit', 'days_since_start']
feature_columns = [col for col in user_data_df.columns if col not in columns_to_remove]
X_user = user_data_df[feature_columns].apply(pd.to_numeric, errors='coerce')
if X_user.isnull().any().any():
print("Found NaNs in input data. Dropping rows with missing values...")
valid_idx = ~X_user.isnull().any(axis=1)
user_data_df = user_data_df.loc[valid_idx].reset_index(drop=True)
X_user = X_user.loc[valid_idx].reset_index(drop=True)
if user_data_df.empty:
raise ValueError("No valid rows left after dropping NaNs")
# Predict PHQ-9 using the model
print("X_user shape:", X_user.shape)
phq9_preds = model.predict(X_user)
user_data_df['PHQ9_Predicted'] = phq9_preds
# Trendline regression (days since start)
user_data_df['days_since_start'] = (user_data_df['date'] - user_data_df['date'].min()).dt.days
X_days = user_data_df[['days_since_start']]
y_phq9 = user_data_df['PHQ9_Predicted']
reg = LinearRegression()
reg.fit(X_days, y_phq9)
user_data_df['phq9_fit'] = reg.predict(X_days)
# Project one month ahead
last_date = user_data_df['date'].max()
target_date = last_date + pd.DateOffset(months=1)
future_day = (target_date - user_data_df['date'].min()).days
future_phq9 = reg.predict(np.array([[future_day]]))[0]
target_value = future_phq9 - 0.5
print(f"\nProjected PHQ-9 on {target_date.date()}: {future_phq9:.2f}")
print(f"Target PHQ-9 Score (wellness goal): {target_value:.2f}")
# Plot everything
plt.figure(figsize=(12, 6))
plt.plot(user_data_df['date'], user_data_df['PHQ9_Predicted'], 'o-', label='Predicted PHQ-9')
plt.plot(user_data_df['date'], user_data_df['phq9_fit'], 'r-', label='Line of Best Fit')
plt.scatter(target_date, future_phq9, color='purple', s=100, label='Projected (1 Month)', zorder=5)
plt.axhline(target_value, color='green', linestyle='--', label=f'Target PHQ-9 = {target_value:.2f}')
plt.title('PHQ-9 Predictions Over Time')
plt.xlabel('Date')
plt.ylabel('Predicted PHQ-9')
plt.xticks(rotation=45)
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()
< /code>
Что я сделал: < /p>
Я собираю все пользовательские входы (функции + дата). < /p>
Я удаляю столбец даты перед прогнозом, потому что модель не поддерживает ее. < /p>
Я запускаю модель. Predict () на основе данных (x_user), а затем rattach to the Prestic DataFrame. < /P>
Я пытаюсь использовать matplotlib для построения всех прогнозов PHQ-9 против их соответствующей даты. < /P>
Ожидаемое: < /p>
Множественные прогнозируемые показатели PHQ-9 должны быть нанесены на график по значениям совпадения. Существуют строки данных. < /p>
Линия регрессии и проекция работают нормально, но рассеяние показывает только одно значение. < /p>
Результаты: < /p>
< /p>
Результаты: < /p>
Правильная форма Dataferfect после выхода кода GUI и вход Main.py - это [2 строки x 21 колонки] (правильная форма). Project_phq9_and_suggest.py Is (1, 20)
Подробнее здесь: https://stackoverflow.com/questions/795 ... odel-and-t