forked from xiph/opus
-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
added osce testing related scripts (ietf120)
- Loading branch information
Jan Buethe
committed
Jul 26, 2024
1 parent
2554a89
commit fc96bb9
Showing
2 changed files
with
316 additions
and
0 deletions.
There are no files selected for viewing
123 changes: 123 additions & 0 deletions
123
dnn/torch/osce/stndrd/evaluation/commonvoice_clip_selection.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
import argparse | ||
import os | ||
import yaml | ||
import subprocess | ||
|
||
import numpy as np | ||
|
||
|
||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument('commonvoice_base_dir') | ||
parser.add_argument('output_dir') | ||
parser.add_argument('--clips-per-language', required=False, type=int, default=10) | ||
parser.add_argument('--seed', required=False, type=int, default=2024) | ||
|
||
|
||
def select_clips(dir, num_clips=10): | ||
|
||
if num_clips % 2: | ||
print(f"warning: number of clips will be reduced to {num_clips - 1}") | ||
female = dict() | ||
male = dict() | ||
|
||
clips = np.genfromtxt(os.path.join(dir, 'validated.tsv'), delimiter='\t', dtype=str, invalid_raise=False) | ||
clips_by_client = dict() | ||
|
||
if len(clips.shape) < 2 or len(clips) < num_clips: | ||
# not enough data to proceed | ||
return None | ||
|
||
for client in set(clips[1:,0]): | ||
client_clips = clips[clips[:, 0] == client] | ||
f, m = False, False | ||
if 'female_feminine' in client_clips[:, 8]: | ||
female[client] = client_clips[client_clips[:, 8] == 'female_feminine'] | ||
f = True | ||
if 'male_masculine' in client_clips[:, 8]: | ||
male[client] = client_clips[client_clips[:, 8] == 'male_masculine'] | ||
m = True | ||
|
||
if f and m: | ||
print(f"both male and female clips under client {client}") | ||
|
||
|
||
if min(len(female), len(male)) < num_clips // 2: | ||
return None | ||
|
||
# select num_clips // 2 random female clients | ||
female_client_selection = np.array(list(female.keys()), dtype=str)[np.random.choice(len(female), num_clips//2, replace=False)] | ||
female_clip_selection = [] | ||
for c in female_client_selection: | ||
s_idx = np.random.randint(0, len(female[c])) | ||
female_clip_selection.append(os.path.join(dir, 'clips', female[c][s_idx, 1].item())) | ||
|
||
# select num_clips // 2 random female clients | ||
male_client_selection = np.array(list(male.keys()), dtype=str)[np.random.choice(len(male), num_clips//2, replace=False)] | ||
male_clip_selection = [] | ||
for c in male_client_selection: | ||
s_idx = np.random.randint(0, len(male[c])) | ||
male_clip_selection.append(os.path.join(dir, 'clips', male[c][s_idx, 1].item())) | ||
|
||
return female_clip_selection + male_clip_selection | ||
|
||
def ffmpeg_available(): | ||
try: | ||
x = subprocess.run(['ffmpeg', '-h'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | ||
return x.returncode == 0 | ||
except: | ||
return False | ||
|
||
|
||
def convert_clips(selection, outdir): | ||
if not ffmpeg_available(): | ||
raise RuntimeError("ffmpeg not available") | ||
|
||
clipdir = os.path.join(outdir, 'clips') | ||
os.makedirs(clipdir, exist_ok=True) | ||
|
||
clipdict = dict() | ||
|
||
for lang, clips in selection.items(): | ||
clipdict[lang] = [] | ||
for clip in clips: | ||
clipname = os.path.splitext(os.path.split(clip)[-1])[0] | ||
target_name = os.path.join('clips', clipname + '.wav') | ||
call_args = ['ffmpeg', '-i', clip, '-ar', '16000', os.path.join(outdir, target_name)] | ||
print(call_args) | ||
r = subprocess.run(call_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | ||
if r.returncode != 0: | ||
raise RuntimeError(f'could not execute {call_args}') | ||
clipdict[lang].append(target_name) | ||
|
||
return clipdict | ||
|
||
|
||
if __name__ == "__main__": | ||
if not ffmpeg_available(): | ||
raise RuntimeError("ffmpeg not available") | ||
|
||
args = parser.parse_args() | ||
|
||
base_dir = args.commonvoice_base_dir | ||
output_dir = args.output_dir | ||
seed = args.seed | ||
|
||
np.random.seed(seed) | ||
|
||
langs = os.listdir(base_dir) | ||
selection = dict() | ||
|
||
for lang in langs: | ||
print(f"processing {lang}...") | ||
clips = select_clips(os.path.join(base_dir, lang)) | ||
if clips is not None: | ||
selection[lang] = clips | ||
|
||
|
||
os.makedirs(output_dir, exist_ok=True) | ||
|
||
clips = convert_clips(selection, output_dir) | ||
|
||
with open(os.path.join(output_dir, 'clips.yml'), 'w') as f: | ||
yaml.dump(clips, f) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,193 @@ | ||
import os | ||
import argparse | ||
import yaml | ||
import subprocess | ||
|
||
import numpy as np | ||
|
||
from moc2 import compare as moc | ||
|
||
DEBUG=False | ||
|
||
parser = argparse.ArgumentParser() | ||
|
||
parser.add_argument('inputdir', type=str, help='Input folder with test items') | ||
parser.add_argument('outputdir', type=str, help='Output folder') | ||
parser.add_argument('bitrate', type=int, help='bitrate to test') | ||
parser.add_argument('--reference_opus_demo', type=str, default='./opus_demo', help='reference opus_demo binary for generating bitstreams and reference output') | ||
parser.add_argument('--test_opus_demo', type=str, default='./opus_demo', help='opus_demo binary under test') | ||
parser.add_argument('--test_opus_demo_options', type=str, default='-dec_complexity 7', help='options for test opus_demo (e.g. "-dec_complexity 7")') | ||
parser.add_argument('--verbose', type=int, default=0, help='verbosity level: 0 for quiet (default), 1 for reporting individual test results, 2 for reporting per-item scores in failed tests') | ||
|
||
def run_opus_encoder(opus_demo_path, input_pcm_path, bitstream_path, application, fs, num_channels, bitrate, options=[], verbose=False): | ||
|
||
call_args = [ | ||
opus_demo_path, | ||
"-e", | ||
application, | ||
str(fs), | ||
str(num_channels), | ||
str(bitrate), | ||
"-bandwidth", | ||
"WB" | ||
] | ||
|
||
call_args += options | ||
|
||
call_args += [ | ||
input_pcm_path, | ||
bitstream_path | ||
] | ||
|
||
try: | ||
if verbose: | ||
print(f"running {call_args}...") | ||
subprocess.run(call_args) | ||
else: | ||
subprocess.run(call_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | ||
except: | ||
return 1 | ||
|
||
return 0 | ||
|
||
|
||
def run_opus_decoder(opus_demo_path, bitstream_path, output_pcm_path, fs, num_channels, options=[], verbose=False): | ||
|
||
call_args = [ | ||
opus_demo_path, | ||
"-d", | ||
str(fs), | ||
str(num_channels) | ||
] | ||
|
||
call_args += options | ||
|
||
call_args += [ | ||
bitstream_path, | ||
output_pcm_path | ||
] | ||
|
||
try: | ||
if verbose: | ||
print(f"running {call_args}...") | ||
subprocess.run(call_args) | ||
else: | ||
subprocess.run(call_args, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) | ||
except: | ||
return 1 | ||
|
||
return 0 | ||
|
||
def compute_moc_score(reference_pcm, test_pcm, delay=91): | ||
x_ref = np.fromfile(reference_pcm, dtype=np.int16).astype(np.float32) / (2 ** 15) | ||
x_cut = np.fromfile(test_pcm, dtype=np.int16).astype(np.float32) / (2 ** 15) | ||
|
||
moc_score = moc(x_ref, x_cut[delay:]) | ||
|
||
return moc_score | ||
|
||
def sox(*call_args): | ||
try: | ||
call_args = ["sox"] + list(call_args) | ||
subprocess.run(call_args) | ||
return 0 | ||
except: | ||
return 1 | ||
|
||
def process_clip_factory(ref_opus_demo, test_opus_demo, test_options): | ||
def process_clip(clip_path, processdir, bitrate): | ||
# derive paths | ||
clipname = os.path.splitext(os.path.split(clip_path)[1])[0] | ||
pcm_path = os.path.join(processdir, clipname + ".raw") | ||
bitstream_path = os.path.join(processdir, clipname + ".bin") | ||
ref_path = os.path.join(processdir, clipname + "_ref.raw") | ||
test_path = os.path.join(processdir, clipname + "_test.raw") | ||
|
||
# run sox | ||
sox(clip_path, pcm_path) | ||
|
||
# run encoder | ||
run_opus_encoder(ref_opus_demo, pcm_path, bitstream_path, "voip", 16000, 1, bitrate) | ||
|
||
# run decoder | ||
run_opus_decoder(ref_opus_demo, bitstream_path, ref_path, 16000, 1) | ||
run_opus_decoder(test_opus_demo, bitstream_path, test_path, 16000, 1, options=test_options) | ||
|
||
d_ref = compute_moc_score(pcm_path, ref_path) | ||
d_test = compute_moc_score(pcm_path, test_path) | ||
|
||
return d_ref, d_test | ||
|
||
|
||
return process_clip | ||
|
||
def main(inputdir, outputdir, bitrate, reference_opus_demo, test_opus_demo, test_option_string, verbose): | ||
|
||
# load clips list | ||
with open(os.path.join(inputdir, 'clips.yml'), "r") as f: | ||
clips = yaml.safe_load(f) | ||
|
||
# parse test options | ||
test_options = test_option_string.split() | ||
|
||
process_clip = process_clip_factory(reference_opus_demo, test_opus_demo, test_options) | ||
|
||
os.makedirs(outputdir, exist_ok=True) | ||
processdir = os.path.join(outputdir, 'process') | ||
os.makedirs(processdir, exist_ok=True) | ||
|
||
num_passed = 0 | ||
results = dict() | ||
min_rel_diff = 1000 | ||
min_mean = 1000 | ||
worst_clip = None | ||
worst_lang = None | ||
for lang, lang_clips in clips.items(): | ||
if verbose > 0: print(f"processing language {lang}...") | ||
results[lang] = np.zeros((len(lang_clips), 2)) | ||
for i, clip in enumerate(lang_clips): | ||
clip_path = os.path.join(inputdir, clip) | ||
d_ref, d_test = process_clip(clip_path, processdir, bitrate) | ||
results[lang][i, 0] = d_ref | ||
results[lang][i, 1] = d_test | ||
|
||
alpha = 0.5 | ||
rel_diff = ((results[lang][:, 0] ** alpha - results[lang][:, 1] ** alpha) /(results[lang][:, 0] ** alpha)) | ||
|
||
min_idx = np.argmin(rel_diff).item() | ||
if rel_diff[min_idx] < min_rel_diff: | ||
min_rel_diff = rel_diff[min_idx] | ||
worst_clip = lang_clips[min_idx] | ||
|
||
if np.mean(rel_diff) < min_mean: | ||
min_mean = np.mean(rel_diff).item() | ||
worst_lang = lang | ||
|
||
if np.min(rel_diff) < -0.1 or np.mean(rel_diff) < -0.025: | ||
if verbose > 0: print(f"FAIL ({np.mean(results[lang], axis=0)} {np.mean(rel_diff)} {np.min(rel_diff)})") | ||
if verbose > 1: | ||
for i, c in enumerate(lang_clips): | ||
print(f" {c:50s} {results[lang][i]} {rel_diff[i]}") | ||
else: | ||
if verbose > 0: print(f"PASS ({np.mean(results[lang], axis=0)} {np.mean(rel_diff)} {np.min(rel_diff)})") | ||
num_passed += 1 | ||
|
||
print(f"{num_passed}/{len(clips)} tests passed!") | ||
|
||
print(f"worst case occured at clip {worst_clip} with relative difference of {min_rel_diff}") | ||
print(f"worst mean relative difference was {min_mean} for test {worst_lang}") | ||
|
||
np.save(os.path.join(outputdir, f'results_' + "_".join(test_options) + f"_{bitrate}.npy"), results, allow_pickle=True) | ||
|
||
|
||
|
||
if __name__ == "__main__": | ||
args = parser.parse_args() | ||
|
||
main(args.inputdir, | ||
args.outputdir, | ||
args.bitrate, | ||
args.reference_opus_demo, | ||
args.test_opus_demo, | ||
args.test_opus_demo_options, | ||
args.verbose) |