-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_training_sets_ETHA_winter_fun.py
181 lines (126 loc) · 5.87 KB
/
test_training_sets_ETHA_winter_fun.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# -*- coding: utf-8 -*-
"""
Created on Wed Aug 21 16:38:05 2019
@author: Vlasenko
This script splits the meteorlogical and concentration samples into test and training sets.
"""
import numpy as np
def ld(var):
act_lev = len(var[1])
llen=np.zeros([act_lev,1])
for i in range(0,act_lev):
llen[i,0]= len(var[1][i])
return(llen)
'''
Function tt2_vars.
inputs:
gas = name of species
prefix = location of the subdomain (do not touch)
path = path to the concentration and meteorlogical anomalies. They should be in the same folder
pathT = path where you whant to store the training and test data
1 !!!!!!!!!!!!!!!!! IMPORTANT !!!!!!!!!!!!!!!!!!!
Concentration and meteorological anomalies must be normalized by mapping them to the interval [-1,1]
with zero-mean. Normalization will significantly speed up the training and increase the accuracy of the neural network.
The best way to do that is to code and run the two lines below:
Your_data = Your_data - Climatology
Your_data = Your_data/np.max(np.abs(Your_data))
1 !!!!!!!!!!!!!!!!! IMPORTANT !!!!!!!!!!!!!!!!!!!
'''
def tt2_vars(lx1,lx2,ly1,ly2,prefix,gas,path, pathT):
Label = np.load(path+gas+'.npy', allow_pickle=True, fix_imports=True) # loading your lables (concentrations)
aux = np.squeeze(np.asarray(Label))
del Label
Label = 0.9*aux[:,:]
print('Label.shape', Label.shape)
del aux
Label = list(Label)
days = len(Label)
Label= np.squeeze(np.array(Label)) # this is just a safety procedure. Does not affec the result
Var = ['T2M','FUWIND','FVWIND'] # This is array containing feature names, i.e., meteorological variables.
# You can defien your own. The first letter F means that values
# are defined on edges and not in the centers of the grid cell.
counter = 0
lag = 0
season=days/29
print('season =',season)
print('days =',days)
for var in range(len(Var)):
counter = counter +1
varnam = Var[var]
print('Varnam = ', varnam)
print('path = ', path+varnam+'.npy')
Svar = np.load(path+varnam+'.npy', allow_pickle=True, fix_imports=True)
Svar= np.squeeze(np.asarray(Svar)) # this is just a safety procedure. Does not affec the result
print('Svar.shape = ', Svar.shape)
LL = list(Svar)
if varnam[0]=='F':
LL = list(Svar)
'''
To get rid of the boundary condition problem, the inputs have a larger subdomain
by SlabelxThe inputs subdomain is larger than the output subdomain by Slabelx and
Slabely offsets in x and y directions. These offsets allow getting rid of the boundary
condition problem.
'''
Slabelx = 5 # offset in x-direction
Slabely = 5 # offset in y-direction
if counter ==1:
LLabel = list(Label)
LL1 = []
LL2 = []
for fi in range(0,days):
tmp2 =np.reshape(LLabel[fi][0:4836],[78,62], order='C')
t2 =tmp2[lx1+Slabelx:lx2-Slabelx,ly1+Slabely:ly2-Slabely]
[s4,s5]=t2.shape
LL1 = LL1+[(0.5-(np.mod(fi,season))/season)] # adding time, not really helpful. Might be removed
LL2 = LL2+[np.reshape(t2,[s4*s5])]
del LL
del LLabel
LL = LL1
LLabel = LL2
LT=list()
for fi in range(0,days):
LT=LT+[(LL[fi])]
else:
if varnam[0]=='F':
LL1 = []
print('CONDITION F', LL[0].shape)
for fi in range(0,days):
tmp3 = LL[fi]
t3 =tmp3[lx1+Slabelx:lx2-Slabelx,ly1+Slabely:ly2-Slabely] # adding offsets
[s1,s2]=t3.shape
LL1 = LL1+[np.reshape(t3,[s1*s2])]
del LL
LL = LL1
for fi in range(0,days):
LT[fi]= np.append(LT[fi],LL[fi])
del Svar
ll2 = 3*(days-np.mod(days,4))/4 # defining the size of training set
print('float ll2 = ', days)
ll2 = int(ll2)
s = 3 # temporal offset. We need it to prevent the data appear from different years
II=[]
for ii in range(s,ll2-0): # pack your data in the training set
if ii == s:
train_x = [([ LT[ii], LT[ii-1],LT[ii-2]])]
train_y = [(LLabel[ii+lag])]
II = II+[(ii,ii)]
else:
if np.mod(ii,season)>s and np.mod(ii,season)<season -s:
train_x = train_x + [([ LT[ii], LT[ii-1],LT[ii-2]])]
train_y = train_y + [(LLabel[ii+lag])]
II = II+[(ii,ii)]
for ii in range(ll2+s,days-0): # pack your data in the test set
if ii == ll2+s:
test_x = [([ LT[ii], LT[ii-1],LT[ii-2]])]
test_y = [(LLabel[ii+lag])]
II = II+[(ii,ii)]
else:
if np.mod(ii,season)>s and np.mod(ii,season)<season -s:
test_x = test_x + [([ LT[ii], LT[ii-1],LT[ii-2]])]
test_y = test_y + [(LLabel[ii+lag])]
II = II+[(ii,ii)]
pathT = 'Your path to the training and test data'
np.save(pathT+'test_x'+gas+prefix, test_x, allow_pickle=True, fix_imports=True)
np.save(pathT+'test_y'+gas+prefix, test_y, allow_pickle=True, fix_imports=True)
np.save(pathT+'train_x'+gas+prefix, train_x, allow_pickle=True, fix_imports=True)
np.save(pathT+'train_y'+gas+prefix, train_y, allow_pickle=True, fix_imports=True)