Merge pull request #6 from gudgud96/fix/crepe-deps

[feat] Replace `crepe` with `torchcrepeV2` to support M1 Mac
gudgud96 · Sep 5, 2023 · 0ae8645 · 0ae8645
2 parents a159f7f + ea336f2
commit 0ae8645
Show file tree

Hide file tree

Showing 12 changed files with 30 additions and 114 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -1,14 +1,10 @@
-librosa 
-tensorflow 
+librosa==0.9.1
 torch==1.12.1
 torchvision==0.13.1
 torchaudio==0.12.1
 pyyaml
 mido 
-crepe==0.0.12 
-hmmlearn==0.2.7 
 nnAudio==0.3.1 
 numpy 
 bitstruct 
-torchcrepe 
 torchcrepeV2
diff --git a/setup.cfg b/setup.cfg
@@ -12,17 +12,15 @@ author_email = helloharry66@gmail.com
 packages = find:
 include_package_data = True
 install_requires =
-    librosa 
-    tensorflow 
+    librosa==0.9.1
     torch==1.12.1
     torchvision==0.13.1
+    torchaudio==0.12.1
+    pyyaml
     mido 
-    crepe==0.0.12 
-    hmmlearn==0.2.7 
     nnAudio==0.3.1 
     numpy 
     bitstruct 
-    torchcrepe 
     torchcrepeV2
 
 python_requires = >=3.7

diff --git a/syntheon/inferencer/dexed/dexed_inferencer.py b/syntheon/inferencer/dexed/dexed_inferencer.py
@@ -12,7 +12,6 @@
 import pickle
 import os
 import numpy as np
-import matplotlib.pyplot as plt
 
 
 class DexedInferenceOutput(InferenceOutput):

diff --git a/syntheon/inferencer/dexed/models/ddx7/synth.py b/syntheon/inferencer/dexed/models/ddx7/synth.py
@@ -2,7 +2,6 @@
 import torch.nn as nn
 import math
 from syntheon.inferencer.dexed.models.ddx7.core import *
-import matplotlib.pyplot as plt
 import soundfile as sf
 import librosa
 

diff --git a/syntheon/inferencer/vital/models/adsr_envelope.py b/syntheon/inferencer/vital/models/adsr_envelope.py
@@ -5,7 +5,6 @@
 import numpy as np
 import torch
 import os
-import matplotlib.pyplot as plt
 from torch import nn
 import yaml
 
@@ -53,7 +52,6 @@ def __init__(self, is_round_secs=False):
     def power_function(self, x, pow=2):
         if pow > 0: # convex
             # transpose
-            plt.plot(x.squeeze().detach().numpy(), label='test1')
 
             if x.squeeze()[0] > x.squeeze()[-1]:
                 y_intercept = x.squeeze()[-1]
@@ -71,13 +69,8 @@ def power_function(self, x, pow=2):
             # transpose back
             y = y * max_val + y_intercept
 
-            plt.plot(y.squeeze().detach().numpy(), label='test3')
-            plt.show()
-
         else:
             # transpose
-            plt.plot(x.squeeze().detach().numpy(), label='test1')
-
             if x.squeeze()[0] > x.squeeze()[-1]:
                 max_val = x.squeeze()[0]
                 y = x - x[:, 0, :]
@@ -89,22 +82,11 @@ def power_function(self, x, pow=2):
                 y_intercept = y.squeeze()[0]
                 y = y / -y_intercept
 
-            plt.plot(y.squeeze().detach().numpy(), label='test2')
-
             y = -(y ** -pow)
 
-            plt.plot(y.squeeze().detach().numpy(), label='test3')
-
             # transpose back
             y = y * -y_intercept + max_val
 
-            plt.plot(y.squeeze().detach().numpy(), label='test4')
-            plt.legend()
-            plt.show()
-
-            # plt.plot(y.squeeze().detach().numpy(), label='test3')
-            # plt.show()
-
         return y
 
     def gen_envelope(self, attack, decay, sus_level, release,
@@ -237,6 +219,8 @@ def get_amp_shaper(
 
 if __name__ == "__main__":
     # TODO: unit test for this class
+    import matplotlib.pyplot as plt
+
     shaper = ADSREnvelopeShaper(is_round_secs=False)
     adsrs = []
     for elem in [0.0, 0.001, 0.005, 0.01, 0.02]:

diff --git a/syntheon/inferencer/vital/models/core.py b/syntheon/inferencer/vital/models/core.py
@@ -7,10 +7,7 @@
 import torch.fft as fft
 import numpy as np
 import librosa as li
-import crepe
-from torchcrepeV2 import TorchCrepePredictor
 import math
-import matplotlib.pyplot as plt
 import yaml 
 import os
 
@@ -24,10 +21,6 @@
     config = yaml.safe_load(stream)
 
 device = config["device"]
-if device == "cuda":
-    crepe_predictor = TorchCrepePredictor()
-else:
-    crepe_predictor = TorchCrepePredictor(device="cpu")
 
 
 def safe_log(x):
@@ -154,35 +147,6 @@ def extract_loudness(audio, sampling_rate, block_size=None, n_fft=2048, frame_ra
     return loudness
 
 
-def extract_pitch(signal, sampling_rate, block_size, model_capacity="full"):
-    length = signal.shape[-1] // block_size
-    if device == "cpu":
-        f0 = crepe.predict(
-            signal,
-            sampling_rate,
-            step_size=int(1000 * block_size / sampling_rate),
-            verbose=1,
-            center=True,
-            viterbi=True,
-            model_capacity="full"
-        )
-        f0 = f0[1].reshape(-1)[:-1]
-    else:
-        f0 = crepe_predictor.predict(
-            signal,
-            sampling_rate
-        )
-
-    if f0.shape[-1] != length:
-        f0 = np.interp(
-            np.linspace(0, 1, length, endpoint=False),
-            np.linspace(0, 1, f0.shape[-1], endpoint=False),
-            f0,
-        )
-
-    return f0
-
-
 def mlp(in_size, hidden_size, n_layers):
     channels = [in_size] + (n_layers) * [hidden_size]
     net = []

diff --git a/syntheon/inferencer/vital/models/preprocessor.py b/syntheon/inferencer/vital/models/preprocessor.py
@@ -4,7 +4,8 @@
 import numpy as np
 import os
 import torch
-from syntheon.inferencer.vital.models.core import extract_loudness, extract_pitch
+from syntheon.utils.pitch_extractor import extract_pitch
+from syntheon.inferencer.vital.models.core import extract_loudness
 import librosa
 import yaml 
 from nnAudio import Spectrogram

diff --git a/syntheon/inferencer/vital/models/wavetable_synth.py b/syntheon/inferencer/vital/models/wavetable_synth.py
@@ -5,9 +5,7 @@
 from torch import nn
 import numpy as np
 from syntheon.inferencer.vital.models.utils import *
-from tqdm import tqdm
 import soundfile as sf
-import matplotlib.pyplot as plt
 from syntheon.inferencer.vital.models.core import upsample
 from syntheon.inferencer.vital.models.adsr_envelope import *
 

diff --git a/syntheon/main.py b/syntheon/main.py
@@ -1,18 +1,11 @@
 """
 Function APIs to be called externally.
 """
-from .converter.dexed.dexed_converter import DexedConverter
-from .inferencer.dexed.dexed_inferencer import DexedInferencer
 from .converter.vital.vital_converter import VitalConverter
 from .inferencer.vital.vital_inferencer import VitalInferencer
 
 
 obj_dict = {
-    "dexed": {
-        "converter": DexedConverter,
-        "inferencer": DexedInferencer,
-        "file_ext": "syx"
-    },
     "vital": {
         "converter": VitalConverter,
         "inferencer": VitalInferencer,

diff --git a/syntheon/utils/pitch_extractor.py b/syntheon/utils/pitch_extractor.py
@@ -4,8 +4,7 @@
 
 import numpy as np
 import os
-import crepe
-from torchcrepeV2 import TorchCrepePredictor
+from torchcrepeV2 import ONNXTorchCrepePredictor
 import yaml 
 
 with open(
@@ -16,34 +15,19 @@
 ) as stream:
     config = yaml.safe_load(stream)
 
-device = config["device"]
-if device == "cuda":
-    crepe_predictor = TorchCrepePredictor()
-else:
-    crepe_predictor = TorchCrepePredictor(device="cpu")
+
+crepe_predictor = ONNXTorchCrepePredictor()
 
 
-# TODO: use ONNX runtime to enable inference optimization to reduce latency
 def extract_pitch(signal, sampling_rate, block_size, model_capacity="full"):
     length = signal.shape[-1] // block_size
-    if device == "cpu":
-        # use TF crepe for cpu as hardware acceleration
-        f0 = crepe.predict(
-            signal,
-            sampling_rate,
-            step_size=int(1000 * block_size / sampling_rate),
-            verbose=1,
-            center=True,
-            viterbi=True,
-            model_capacity="full"
-        )
-        f0 = f0[1].reshape(-1)[:-1]
-    else:
-        # use torchcrepe for gpu
-        f0 = crepe_predictor.predict(
-            signal,
-            sampling_rate
-        )
+    f0 = crepe_predictor.predict(
+        audio=signal, 
+        sr=sampling_rate, 
+        viterbi=True, 
+        center=True, 
+        step_size=int(1000 * block_size / sampling_rate),
+    )
 
     if f0.shape[-1] != length:
         f0 = np.interp(

diff --git a/syntheon/version.py b/syntheon/version.py
@@ -1 +1 @@
-version = "0.0.2"
+version = "0.1.0"
diff --git a/test/test_inferencer.py b/test/test_inferencer.py
@@ -3,18 +3,18 @@
 from syntheon import infer_params
 
 
-def test_dexed_inferencer():
-    """
-    just check if everything runs well for Dexed
-    """
-    output_params_file, eval_dict = infer_params(
-        "test/test_audio/dexed_test_audio_1.wav", 
-        "dexed", 
-        enable_eval=True
-    )
-    assert os.path.exists(output_params_file)
+# def test_dexed_inferencer():
+#     """
+#     just check if everything runs well for Dexed
+#     """
+#     output_params_file, eval_dict = infer_params(
+#         "test/test_audio/dexed_test_audio_1.wav", 
+#         "dexed", 
+#         enable_eval=True
+#     )
+#     assert os.path.exists(output_params_file)
 
-    os.remove(output_params_file)
+#     os.remove(output_params_file)
 
 
 def test_vital_inferencer_1():