Вот код:
agent.py
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from collections import deque
class DQN(nn.Module):
def __init__(self, state_size, action_size):
super(DQN, self).__init__()
self.fc1 = nn.Linear(state_size, 128)
self.fc2 = nn.Linear(128, 128)
self.fc3 = nn.Linear(128, action_size)
def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
return self.fc3(x)
class DQNAgent:
def __init__(self, state_size, action_size):
self.state_size = state_size
self.action_size = action_size
# Hyperparameters
self.gamma = 0.95
self.epsilon = 1.0
self.epsilon_min = 0.01
self.epsilon_decay = 0.999
self.lr = 0.001
self.batch_size = 32
self.memory = deque(maxlen=2000)
# Q-Network
self.model = DQN(state_size, action_size)
self.target_model = DQN(state_size, action_size)
self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
self.update_target_model()
def update_target_model(self):
self.target_model.load_state_dict(self.model.state_dict())
def remember(self, state, action, reward, next_state, done):
self.memory.append((state, action, reward, next_state, done))
def act(self, state):
if np.random.rand() self.epsilon_min:
self.epsilon *= self.epsilon_decay
def train_dqn(self, env, episodes, train_days=3, target_update_freq=5000):
step_count = 0
for e in range(episodes):
total_reward = 0
for day in range(1, train_days + 1):
state = env.reset(day=day)
done = False
day_reward = 0
while not done:
action = self.act(state)
next_state, reward, done = env.step(action, day=day)
if next_state is not None:
self.remember(state, action, reward, next_state, done)
state = next_state
day_reward += reward
step_count += 1
if step_count % target_update_freq == 0:
self.update_target_model()
total_reward += day_reward
self.replay()
print(f"Episode {e + 1}/{episodes}, Total Reward: {total_reward:.2f}, Epsilon: {self.epsilon:.4f}")
def test_dqn(self, env, test_day):
self.epsilon = 0.0
state = env.reset(day=test_day, testing_mode=True)
done = False
total_reward = 0
while not done:
action = self.act(state)
print(f"Time Step {env.time_step}, Action: {env.get_action_name(action)}")
# Perform the step
next_state, reward, done = env.step(action, day=test_day)
if next_state is not None:
state = next_state
total_reward += reward
print(f"Total Reward for Day {test_day}: {total_reward:.2f}")
reward_function.py
class RewardFunction:REWARD = {
"Low": 0.5,
"Medium": 0.75,
"High": 1.0
}
PENALTY = {
"Low": -0.5,
"Medium": -0.75,
"High": -1.0
}
def __init__(self, battery_capacity, appliance_preferences):
self.battery_capacity = battery_capacity
self.appliance_preferences = appliance_preferences
self.max_daily_activations = 2
self.active_appliances = {}
self.appliance_activation_count = {}
self.delayed_appliance_penalties = {}
self.battery_idle_count = 0
self.charge_count = 0
self.discharge_count = 0
self.running_time_tracker = {}
def reset_daily_counters(self):
self.appliance_activation_count = {}
self.active_appliances = {}
self.delayed_appliance_penalties = {}
self.battery_idle_count = 0
self.charge_count = 0
self.discharge_count = 0
self.running_time_tracker = {}
def calculate_reward(self, action, current_data, hour, battery_charge):
production = current_data['production']
consumption = current_data['consumption']
reward = 0
if hour == 0:
self.reset_daily_counters()
# Battery Management Actions
if action == 0: # Charge battery
reward, battery_charge = self._charge_battery(production, consumption, battery_charge, reward)
elif action == 1: # Discharge battery
reward, battery_charge = self._discharge_battery(battery_charge, production, consumption, reward)
elif action == 2: # Idle battery
reward = self._idle_battery(reward, production, consumption)
# Appliance Actions
if action >= 3:
reward = self._manage_appliance_with_running_time(action, consumption, production, hour, battery_charge, reward)
# Energy Balancing
reward = self._balance_energy(production, consumption, reward)
return reward, battery_charge
def _charge_battery(self, production, consumption, battery_charge, reward):
total_consumption = sum(consumption)
net_production = production - total_consumption # Excess energy
self.charge_count += 1
reward += self.PENALTY["Medium"] * self.charge_count
if battery_charge >= self.battery_capacity:
reward += self.PENALTY['Medium']
elif net_production > 0 and battery_charge < self.battery_capacity:
charge_amount = min(net_production, self.battery_capacity - battery_charge)
battery_charge += charge_amount
reward += self.REWARD["High"]
return reward, battery_charge
def _discharge_battery(self, battery_charge, production, consumption, reward):
net_demand = sum(consumption) - production # Energy deficit
self.discharge_count += 1
reward += self.PENALTY["Medium"] * self.discharge_count
if net_demand > 0 and battery_charge > 0:
self.discharge_count += 1
discharge_amount = min(net_demand, battery_charge)
battery_charge -= discharge_amount
reward += self.REWARD["High"]
elif battery_charge = len(appliance_list):
return reward
appliance_name = appliance_list[appliance_index]
appliance = self.appliance_preferences[appliance_name]
comfort_level = appliance.get("comfort_level", 1)
appliance_consumption = consumption[appliance_index]
preferred_times = appliance.get("time_intervals", [])
mandatory = appliance.get('mandatory_usage', False)
running_time = appliance.get('running_time', 1)
if action % 2 != 0: # Activate appliance
if appliance_name in self.running_time_tracker:
last_activated_hour = self.running_time_tracker[appliance_name]
if hour < last_activated_hour + running_time:
reward += self.PENALTY["High"] * comfort_level * self.appliance_activation_count[appliance_name]
# Penalize reactivation before running time ends
self.running_time_tracker[appliance_name] = hour
self.appliance_activation_count.setdefault(appliance_name, 0)
self.appliance_activation_count[appliance_name] += 1
if self.appliance_activation_count[appliance_name] > self.max_daily_activations:
reward += self.PENALTY["High"] * comfort_level * self.appliance_activation_count[appliance_name]
if self._is_in_preferred_time(hour, preferred_times):
reward += self.REWARD["High"] * comfort_level if mandatory else self.REWARD["Medium"]
else:
reward += self.PENALTY["Medium"] * comfort_level if mandatory else self.PENALTY['High'] * comfort_level
if battery_charge >= appliance_consumption:
battery_charge -= appliance_consumption
reward += self.REWARD["High"]
else:
reward += self.PENALTY["Medium"]
elif action % 2 == 0: # Delay appliance
self.delayed_appliance_penalties[appliance_name] = self.delayed_appliance_penalties.get(
appliance_name, 0) + 1
if self.delayed_appliance_penalties[appliance_name] > self.max_daily_activations:
penalty = self.PENALTY["High"] * comfort_level * self.delayed_appliance_penalties[appliance_name]
reward += penalty * 2 if mandatory else penalty
if production == 0 and battery_charge == 0:
reward += self.REWARD["Low"]
self.delayed_appliance_penalties[appliance_name] = 0
return reward
def _balance_energy(self, production, consumption, reward):
energy_diff = production - sum(consumption)
if abs(energy_diff) 0:
reward += self.PENALTY["Low"] # Penalty for unused production
else:
reward += self.REWARD["High"] # Reward for managing deficits
return reward
def _is_in_preferred_time(self, hour, preferred_times):
return any(start 23:
done = True
next_state = self._get_state(day) if not done else None
return next_state, reward, done
def _get_state(self, day, data=False):
weather = self.weather_forecast[day][self.time_step]
if self.testing_mode:
production = self.avg_production[self.time_step]
consumption = [
self.avg_consumption[appliance][self.time_step]
for appliance in self.appliance_names
]
else:
production = self.hourly_productions.get(day, [0] * 24)[self.time_step]
consumption = [
self.appliance_consumptions.get(day, {}).get(appliance, [0] * 24)[self.time_step]
for appliance in self.appliance_names
]
if data:
return {
"solar_radiation": weather["solarRadiation"],
"temperature": weather["temperature"],
"humidity": weather["humidity"],
"production": production,
"consumption": consumption
}
return np.array([
self.time_step,
self.battery_charge,
self.battery_capacity,
weather["solarRadiation"],
weather["temperature"],
weather["humidity"],
production,
*consumption
])
def get_action_name(self, action_index):
return self.actions.get(action_index, "Unknown Action")
Подробнее здесь: https://stackoverflow.com/questions/792 ... timization
Мобильная версия