from ddopai.envs.inventory.single_period import NewsvendorEnv
from ddopai.dataloaders.tabular import XYDataLoader
from ddopai.experiments.experiment_functions import run_experiment, test_agent
PPO agents
PPO based agent
PPOAgent
PPOAgent (environment_info:ddopai.utils.MDPInfo, learning_rate_actor:float=0.0003, learning_rate_critic:float|None=None, batch_size:int=64, hidden_layers:List=None, activation:str='relu', std_0:float=0.1, n_epochs_policy:int=4, eps_ppo:float=0.2, lam:float=0.95, ent_coeff:float=0.0, n_steps_per_fit=1000, drop_prob:float=0.0, batch_norm:bool=False, init_method:str='xavier_uniform', optimizer:str='Adam', loss:str='MSE', obsprocessors:list|None=None, device:str='cpu', agent_name:str|None='SAC')
XXX
Type | Default | Details | |
---|---|---|---|
environment_info | MDPInfo | ||
learning_rate_actor | float | 0.0003 | |
learning_rate_critic | float | None | None | If none, then it is set to learning_rate_actor |
batch_size | int | 64 | |
hidden_layers | List | None | if None, then default is [64, 64] |
activation | str | relu | “relu”, “sigmoid”, “tanh”, “leakyrelu”, “elu” |
std_0 | float | 0.1 | tau: float = 0.005, |
n_epochs_policy | int | 4 | |
eps_ppo | float | 0.2 | |
lam | float | 0.95 | |
ent_coeff | float | 0.0 | |
n_steps_per_fit | int | 1000 | |
drop_prob | float | 0.0 | |
batch_norm | bool | False | |
init_method | str | xavier_uniform | “xavier_uniform”, “xavier_normal”, “he_normal”, “he_uniform”, “normal”, “uniform” |
optimizer | str | Adam | “Adam” or “SGD” or “RMSprop” |
loss | str | MSE | currently only MSE is supported |
obsprocessors | list | None | None | default: [] |
device | str | cpu | “cuda” or “cpu” |
agent_name | str | None | SAC |
= 8000 #90_000
val_index_start = 9000 #100_000
test_index_start
= np.random.standard_normal((10000, 2))
X = np.random.standard_normal((10000, 1))
Y += 2*X[:,0].reshape(-1, 1) + 3*X[:,1].reshape(-1, 1)
Y = X[:,0].reshape(-1, 1)
Y # truncate Y at 0:
= np.maximum(Y, 0)
Y # normalize Y max to 1
= Y/np.max(Y)
Y
print(np.max(Y))
print(X.shape, Y.shape)
= ClipAction(0., 1.)
clip_action
= XYDataLoader(X, Y, val_index_start, test_index_start, lag_window_params = {'lag_window': 0, 'include_y': False, 'pre_calc': True})
dataloader
= NewsvendorEnv(
environment = dataloader,
dataloader = 0.42857,
underage_cost = 1.0,
overage_cost = 0.999,
gamma = 365,
horizon_train = 1.0,
q_bound_high = -0.1,
q_bound_low = [clip_action],
postprocessors
)
= PPOAgent(environment.mdp_info,
agent = None, # default: []
obsprocessors ="cpu", # "cuda" or "cpu"
device
)
environment.test()eval()
agent.
= test_agent(agent, environment)
R, J
print(R, J)
environment.train()
agent.train()print=False
environment.
# run_experiment(agent, environment, n_epochs=50, n_steps=1000, run_id = "test", save_best=True, print_freq=1) # fit agent via run_experiment function
environment.test()eval()
agent.
= test_agent(agent, environment)
R, J
print(R, J)
1.0
(10000, 2) (10000, 1)
/Users/magnus/miniforge3/envs/inventory_gym_2/lib/python3.11/site-packages/gymnasium/spaces/box.py:130: UserWarning: WARN: Box bound precision lowered by casting to float32
gym.logger.warn(f"Box bound precision lowered by casting to {self.dtype}")
INFO:root:Actor network:
/Users/magnus/miniforge3/envs/inventory_gym_2/lib/python3.11/site-packages/torchinfo/torchinfo.py:462: UserWarning: TypedStorage is deprecated. It will be removed in the future and UntypedStorage will be the only storage class. This should only matter to you if you are using storages directly. To access UntypedStorage directly, use tensor.untyped_storage() instead of tensor.storage()
action_fn=lambda data: sys.getsizeof(data.storage()),
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
MLPActor [1, 1] --
├─Sequential: 1-1 [1, 1] --
│ └─Linear: 2-1 [1, 64] 192
│ └─ReLU: 2-2 [1, 64] --
│ └─Dropout: 2-3 [1, 64] --
│ └─Linear: 2-4 [1, 64] 4,160
│ └─ReLU: 2-5 [1, 64] --
│ └─Dropout: 2-6 [1, 64] --
│ └─Linear: 2-7 [1, 1] 65
│ └─Identity: 2-8 [1, 1] --
==========================================================================================
Total params: 4,417
Trainable params: 4,417
Non-trainable params: 0
Total mult-adds (M): 0.00
==========================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.02
Estimated Total Size (MB): 0.02
==========================================================================================
INFO:root:Critic network:
==========================================================================================
Layer (type:depth-idx) Output Shape Param #
==========================================================================================
MLPState [1, 1] --
├─Sequential: 1-1 [1, 1] --
│ └─Linear: 2-1 [1, 64] 192
│ └─ReLU: 2-2 [1, 64] --
│ └─Dropout: 2-3 [1, 64] --
│ └─Linear: 2-4 [1, 64] 4,160
│ └─ReLU: 2-5 [1, 64] --
│ └─Dropout: 2-6 [1, 64] --
│ └─Linear: 2-7 [1, 1] 65
│ └─Identity: 2-8 [1, 1] --
==========================================================================================
Total params: 4,417
Trainable params: 4,417
Non-trainable params: 0
Total mult-adds (M): 0.00
==========================================================================================
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.02
Estimated Total Size (MB): 0.02
==========================================================================================
-44.039980104932894 -28.64890791879266
-44.039980104932894 -28.64890791879266