-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtrain.py
189 lines (143 loc) · 5.79 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
"""Generate the weights for the grid."""
import pickle
import numpy as np
import pandas as pd
import torch
from sklearn.preprocessing import MinMaxScaler
from tqdm import tqdm
from grid import get_neighbors, load_grid
from lstm import LSTMModel, train_lstm_model
# Set device to GPU if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def create_sequences(data: np.ndarray, seq_length: int) -> np.ndarray:
"""
Create sequences of length seq_length from the data.
Args:
data (np.ndarray): Data to create sequences from.
seq_length (int): Length of the sequences.
Returns:
np.ndarray: Array of sequences of shape (len(data) - seq_length, seq_length).
"""
X = []
y = []
for i in range(len(data) - seq_length):
X.append(data[i: (i + seq_length)])
y.append(data[i + seq_length])
return np.array(X), np.array(y)
def create_models_for_pair(
stock_A: pd.Series, stock_B: pd.Series, seq_length: int = 5, epochs: int = 100
) -> tuple[tuple[LSTMModel, MinMaxScaler], tuple[LSTMModel, MinMaxScaler]]:
"""
Create LSTM models for a pair of stocks.
Args:
stock_A (pd.Series): Time series of stock A.
stock_B (pd.Series): Time series of stock B.
seq_length (int): Length of the input sequence.
epochs (int): Number of epochs to train for.
Returns:
Tuple: Tuple containing the trained models and the scaler used to scale the data.
"""
# Find the first row where both stocks have non-zero values
first_nonzero_row = max(np.nonzero(stock_A.values)[0][0], np.nonzero(stock_B.values)[0][0])
# Trim the data to start from the first non-zero row
stock_A = stock_A[first_nonzero_row:]
stock_B = stock_B[first_nonzero_row:]
# Prepare the input and target data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(
np.concatenate([stock_A.values.reshape(-1, 1), stock_B.values.reshape(-1, 1)])
)
scaled_stock_A = scaled_data[: len(stock_A)]
scaled_stock_B = scaled_data[len(stock_A):]
X_A, y_A = create_sequences(scaled_stock_A, seq_length)
X_B, y_B = create_sequences(scaled_stock_B, seq_length)
# Reshape input data for LSTM (samples, timesteps, features)
X_A = X_A.reshape(X_A.shape[0], seq_length, 1)
X_B = X_B.reshape(X_B.shape[0], seq_length, 1)
# Create LSTM models
model_A = LSTMModel()
model_B = LSTMModel()
# Move models to GPU if available
model_A = model_A.to(device)
model_B = model_B.to(device)
# Train LSTM models
train_lstm_model(model_A, X_A, y_A, epochs=epochs)
train_lstm_model(model_B, X_B, y_B, epochs=epochs)
return (model_A, scaler), (model_B, scaler)
def save_model_and_scaler(
model: LSTMModel, scaler: MinMaxScaler, filename: str
) -> None:
"""
Save the model and scaler to disk.
Args:
model (LSTMModel): LSTM model to save.
scaler (MinMaxScaler): Scaler used to scale the data.
filename (str): Name of the file to save the model to.
"""
torch.save(model.state_dict(), f"weights/{filename}.pth")
torch.save(model, f"models/{filename}.pt")
with open(f"scalers/{filename}.pkl", "wb") as f:
pickle.dump(scaler, f)
def load_model_and_scaler(filename: str) -> tuple[LSTMModel, MinMaxScaler]:
"""
Load the model and scaler from disk.
Args:
filename (str): Name of the file to load the model from.
Returns:
Tuple: Tuple containing the loaded model and scaler.
"""
model = LSTMModel()
model.load_state_dict(torch.load(f"weights/{filename}.pth"))
model.eval()
with open(f"scalers/{filename}.pkl", "rb") as f:
scaler = pickle.load(f)
return model, scaler
def create_and_train_models(stock_A, stock_B, stock_A_symbol, stock_B_symbol):
"""Create and train models for a pair of stocks.
Args:
stock_A (pd.Series): Time series of stock A.
stock_B (pd.Series): Time series of stock B.
stock_A_symbol (str): Symbol of stock A.
stock_B_symbol (str): Symbol of stock B.
Returns:
Tuple: Tuple containing the key for the pair and the trained models.
"""
model_key = f"{stock_A_symbol}-{stock_B_symbol}"
models_A, models_B = create_models_for_pair(stock_A, stock_B)
save_model_and_scaler(
models_A[0], models_A[1], f"model_A_{stock_A_symbol}-{stock_B_symbol}"
)
save_model_and_scaler(
models_B[0], models_B[1], f"model_B_{stock_A_symbol}-{stock_B_symbol}"
)
return model_key, (models_A, models_B)
if __name__ == "__main__":
# Load the grid of stocks
grid = load_grid()
rows, cols = grid.shape
print(f"Loaded grid of shape {grid.shape}")
# 'df' is the DataFrame with daily change percentages for 500 stocks
df = pd.read_csv("sp500_daily_change.csv")
print(f"Loaded DataFrame of shape {df.shape}")
models_dict = {}
stock_pairs = []
for row in range(rows):
for col in range(cols):
stock_symbol = grid.iat[row, col]
stock = df[stock_symbol]
neighbors = get_neighbors(grid, row, col)
for neighbor_row, neighbor_col in neighbors:
neighbor_stock_symbol = grid.iat[neighbor_row, neighbor_col]
neighbor_stock = df[neighbor_stock_symbol]
stock_pairs.append(
(stock, neighbor_stock, stock_symbol, neighbor_stock_symbol)
)
# Train models sequentially without multiprocessing
trained_models = [
create_and_train_models(*stock_pair)
for stock_pair in tqdm(stock_pairs, desc="Training Models", ncols=100)
]
# Update models_dict with trained models
for model_key, models in trained_models:
models_dict[model_key] = models
print("Done training models")