-
Notifications
You must be signed in to change notification settings - Fork 15
/
Copy pathpredict.py
86 lines (69 loc) · 3.44 KB
/
predict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import argparse
from smd.data import preprocessing
from smd.data import postprocessing
import smd.utils as utils
import numpy as np
import keras.models
from tqdm import tqdm
import os
import glob
def test_data_processing(file, mean, std):
if os.path.splitext(file)[1] == '.npy':
spec = np.load(file)
else:
audio = utils.load_audio(file)
spec = preprocessing.get_spectrogram(audio)
mels = preprocessing.get_scaled_mel_bands(spec)
mels = preprocessing.normalize(mels, mean, std)
return mels.T
def predict(data_path, output_file, model_path, mean_path, std_path, smoothing):
mean = np.load(mean_path)
std = np.load(std_path)
print("Loading the model " + model_path + "..")
model = keras.models.load_model(model_path)
print("Start the prediction..")
if os.path.isdir(data_path):
if output_file != "":
raise ValueError("It is possible to set an output file only if the input is a file.")
files = glob.glob(os.path.abspath(data_path) + "/*.npy") + glob.glob(os.path.abspath(data_path) + "/*.wav")
for file in tqdm(files):
x = test_data_processing(file, mean, std)
x = x.reshape((1, x.shape[0], x.shape[1]))
output = model.predict(x, batch_size=1, verbose=0)[0].T
output = postprocessing.apply_threshold(output)
if smoothing:
output = postprocessing.smooth_output(output)
annotation = preprocessing.label_to_annotation(output)
output_path = file.replace(".npy", '') + "_prediction.txt"
utils.save_annotation(annotation, output_path)
else:
file = os.path.abspath(data_path)
x = test_data_processing(file, mean, std)
x = x.reshape((1, x.shape[0], x.shape[1]))
output = model.predict(x, batch_size=1, verbose=0)[0].T
output = postprocessing.apply_threshold(output)
if smoothing:
output = postprocessing.smooth_output(output)
annotation = preprocessing.label_to_annotation(output)
if output_file != "":
output_path = output_file
else:
output_path = file.replace(".npy", '') + "_prediction.txt"
utils.save_annotation(annotation, output_path)
if __name__ == "__main__":
parser = argparse.ArgumentParser(
description="Script to do the prediction with a neural network for the speech and music detection task.")
parser.add_argument('--data_path', type=str, default="audio_test/",
help='path to a file or a folder for prediction')
parser.add_argument('--output_file', type=str, default="",
help='set a name for a precise output file')
parser.add_argument('--model', type=str, default="trained/model.hdf5",
help='path of the model to load when the starting is resumed')
parser.add_argument('--mean_path', type=str, default="trained/mean.npy",
help='path of the mean of the normalization applied with the model')
parser.add_argument('--std_path', type=str, default="trained/std.npy",
help='path of the std of the normalization applied with the model')
parser.add_argument('--smoothing', type=bool, default=True,
help='true or false, apply to smoothing function to the ouput')
args = parser.parse_args()
predict(args.data_path, args.output_file, args.model, args.mean_path, args.std_path, args.smoothing)