diff --git a/bin/download-models.py b/bin/download-models.py index 239c325..fc06645 100755 --- a/bin/download-models.py +++ b/bin/download-models.py @@ -23,7 +23,6 @@ def _flatten(data, prefix=""): result.extend(_flatten(v, key).items()) else: result.append((key[:-1], v)) - return dict(result) @@ -33,6 +32,7 @@ def _flatten(data, prefix=""): default=f"{serenade.config.base_url}/models", help="URL or S3 path where models are stored", ) + def main(url): """Download the models specified in config/models.yaml""" models = _flatten(serenade.config.models()) @@ -43,7 +43,6 @@ def main(url): version = f.read().strip() if version == model: continue - archive = serenade.config.library_path("models", path, f"{model}.tar.gz") if not os.path.exists(archive): shutil.rmtree(os.path.dirname(archive), ignore_errors=True) @@ -53,7 +52,6 @@ def main(url): with tarfile.open(archive, mode="r:gz") as f: f.extractall(os.path.dirname(archive)) - os.remove(archive) diff --git a/client/build.gradle b/client/build.gradle index a0e821f..014b71d 100644 --- a/client/build.gradle +++ b/client/build.gradle @@ -44,6 +44,28 @@ task installServer(dependsOn: [ installSpeechEngineModels ]) {} +task initElectron(type: Exec) { + commandLine "bash", "-c", "./bin/build.py" +} + +task clientDistLinux(type: Exec, dependsOn: [":client:initElectron"]) { + commandLine "npm", "run", "package:dist-linux" +} + +task fullDistLinux(type: Exec, dependsOn: [":client:initElectron", + ":installServer"]) { + commandLine "npm", "run", "package:dist-linux" +} + +task clientDistMac(type: Exec, dependsOn: [":client:initElectron"]) { + commandLine "npm", "run", "package:dist-mac" +} + +task clientDistWindows(type: Exec, dependsOn: [":client:initElectron"]) { + commandLine "npm", "run", "package:dist-win" +} + clean { delete "static/local" -} + delete "node_modules" +} \ No newline at end of file diff --git a/client/package.json b/client/package.json index 27d4dd1..933361e 100644 --- a/client/package.json +++ b/client/package.json @@ -73,7 +73,9 @@ "electron:serve": "wait-on http-get://localhost:4000/ && npm run dev:main", "package": "npm-run-all build package:dist", "package:unsigned": "cross-env SKIP_SIGN=1 npm-run-all build package:distunsigned", - "package:dist": "electron-builder", + "package:dist-linux": "electron-builder", + "package:dist-win": "electron-builder --win", + "package:dist-mac": "electron-builder --mac", "package:distunsigned": "electron-builder -c.mac.identity=null", "release": "npm-run-all build release:publish", "release:publish": "electron-builder --publish always", diff --git a/code-engine/build.gradle b/code-engine/build.gradle index ce6b5c2..6bf8903 100644 --- a/code-engine/build.gradle +++ b/code-engine/build.gradle @@ -3,7 +3,7 @@ plugins { } task buildCMake(type: Exec, dependsOn: [rootProject.downloadModels]) { - commandLine "bash", "-c", "mkdir -p server/build && cd server/build && cmake .. && cmake --build . -j2" + commandLine "bash", "-c", "mkdir -p server/build && cd server/build && cmake .. && cmake --build . -j8" } task distTar(type: Tar) { diff --git a/code-engine/server/CMakeLists.txt b/code-engine/server/CMakeLists.txt index a074a38..dc27c7d 100644 --- a/code-engine/server/CMakeLists.txt +++ b/code-engine/server/CMakeLists.txt @@ -110,7 +110,7 @@ if(APPLE) ) else() target_link_libraries(serenade-code-engine - "-Wl,--start-group;/opt/intel/mkl/lib/intel64/libmkl_intel_ilp64.a;/opt/intel/mkl/lib/intel64/libmkl_sequential.a;/opt/intel/mkl/lib/intel64/libmkl_core.a;-Wl,--end-group" + "-Wl,--start-group;/usr/lib/x86_64-linux-gnu/libmkl_intel_ilp64.a;/usr/lib/x86_64-linux-gnu/libmkl_sequential.a;/usr/lib/x86_64-linux-gnu/libmkl_core.a;-Wl,--end-group" dl gomp ) diff --git a/code-engine/server/server/code_engine_server.cc b/code-engine/server/server/code_engine_server.cc index 0ca4c0b..b082aa5 100644 --- a/code-engine/server/server/code_engine_server.cc +++ b/code-engine/server/server/code_engine_server.cc @@ -296,7 +296,7 @@ int main(int argc, char* argv[]) { return result; }); - app.loglevel(crow::LogLevel::Error); + app.loglevel(crow::LogLevel::Debug); app.port(17203).server_name("").multithreaded().run(); } catch (const std::exception& e) { diff --git a/docs/building.md b/docs/building-client.md similarity index 73% rename from docs/building.md rename to docs/building-client.md index e5ae7d1..1d3823d 100644 --- a/docs/building.md +++ b/docs/building-client.md @@ -1,13 +1,62 @@ -# Building Serenade +Serenade is built using the [Gradle](https://gradle.org) build system. We also have a few scripts useful for running +various Serenade services. -Serenade is built using the [Gradle](https://gradle.org) build system. We also have a few scripts useful for running various Serenade services. +## Building the client +Building Serenade is supported on Linux and Mac. The first thing you need to do is to download the source code. -## Client +```shell +git clone https://github.com/serenadeai/serenade.git +``` -To run the Serenade app, simply run: +For Linux: - cd client - ./bin/dev.py +- Use Ubuntu Focal (20.04) or Jammy (22.04) +- Run the scripts to install the Ubuntu prerequisites. When prompted for configuration questions, just use the defaults. + +```shell +cd serenade/ +./scripts/setup/setup-ubuntu.sh +# If prompted for anything during the installation, select the default options and continue. +# As requested by the script, add the export lines to your ~/.bashrc~/ or .zshrc file and reload the +# configs + +cd client +./bin/build.py +``` + +Note: If you run into to dependency problems, trying to run the scripts from inside a fresh +Ubuntu Jammy VM or container. + +For MacOS: + +- Run the scripts to install the MacOS prerequisites: + +```shell +cd serenade/ +./scripts/setup/setup-mac.sh + +cd client +./bin/build.py +``` + +Once you have the build successfully completed, you can run the client locally, or package it: + +```shell +cd client +# To run the client locally: +npm run dev + +# To generate the app image: +npm run package:dist-linux + +# To generate the windows executable: +npm run package:dist-win + +# To generate the mac executable: +npm run package:dist-mac +``` + +``` This will run a local version of the client that uses Serenade Cloud as the backend. @@ -15,6 +64,16 @@ If you'd instead like the client to connect to a specific endpoint (e.g., a loca ENDPOINT=http://localhost:17200 ./bin/dev.py + +## Other Dependencies + +Now run the common dependency builder and installer: + +```shell +./scripts/setup/build-dependencies.sh +# Sit back and relax, this will take a while to finish +``` + ## Service Setup ### Docker @@ -133,6 +192,7 @@ If you'd like to build your own version Serenade Local to be used by the client, gradle installd gradle client:installServer + gradle client:clientDistLinux | client:clientDistWin | client:clientDistMac Then, when you run the client (following the instructions above) and use the Serenade Local endpoint, you'll be running the version that you built locally. diff --git a/docs/building-quickstart.md b/docs/building-quickstart.md new file mode 100644 index 0000000..644169e --- /dev/null +++ b/docs/building-quickstart.md @@ -0,0 +1,56 @@ +# Building Serenade Quickstart + +This is a quick guide to get your Serenade build as fast as possible. It covers +build process for the client and the server. The client can be built from Linux +or Windows (Ubuntu Focal or Jammy). The server can be built on Linux (Ubuntu +Focal or Jammy). + +We strongly recommend you to run this from inside the VM or a container. The +first thing you need to do is to download the source code: + +```shell +git clone https://github.com/serenadeai/serenade.git +``` + +## Linux Build + +For both the client and the server, you must use the following script to +install the prerequisites: + +```shell +cd serenade/ +./scripts/setup/setup-ubuntu.sh +``` + +Once done, export the variables like indicated by the script. + +### Building the client + +```shell +cd serenade/ +gradle :client:clientDistLinux +``` + +The output by of this command is an appImage that can be run on any Linux +system. You can find the appImage in `client/dist/Serenade-.AppImage`. +This image must be used with one of the remote servers. To create an image that +has the local server, you must also be the server. + + +### Building the server + +Run the build dependencies script. This script will install all the necessary +dependencies to build the server. It takes a while to complete: + +```shell +cd serenade/ +./scripts/setup/build-dependencies.sh +``` + +Once done, you can build the server: + +```shell +cd serenade/ +gradle installd +gradle client:fullDistLinux +``` diff --git a/docs/training-models.md b/docs/training-models.md index dbf10ba..6963654 100644 --- a/docs/training-models.md +++ b/docs/training-models.md @@ -4,7 +4,7 @@ This document explains how to train the models that are used by Serenade. If you ## Setup -Make sure all of the dependencies for training models are installed and built. You can do so by following the instructions in [Building](building.md)—make sure you do *not* use the `--minimal` flag or `serenade-minimal` Docker image (which are used only for running Serenade). +Make sure all of the dependencies for training models are installed and built. You can do so by following the instructions in [Building](building-client.md)—make sure you do *not* use the `--minimal` flag or `serenade-minimal` Docker image (which are used only for running Serenade). Ensure that you have plenty of disk space. The language model for the speech engine is trained on all of the source code data available in all programming languages supported by Serenade, totaling to about 50 GB. diff --git a/scripts/setup/build-dependencies.sh b/scripts/setup/build-dependencies.sh index 3bab645..01cd8a1 100755 --- a/scripts/setup/build-dependencies.sh +++ b/scripts/setup/build-dependencies.sh @@ -45,7 +45,8 @@ pip3 install --upgrade \ pyenchant \ pyyaml \ requests \ - sentencepiece==0.1.95 + sentencepiece==0.1.95 \ + boto3 sudo-non-docker npm install -g \ prettier \ @@ -88,9 +89,9 @@ cd protobuf-src ./autogen.sh ./configure --prefix=$PWD/../protobuf --disable-shared --with-pic if [[ `uname` == "Darwin" ]] ; then - make CFLAGS="-mmacosx-version-min=$osx_version" CXXFLAGS="-g -std=c++11 -DNDEBUG -mmacosx-version-min=$osx_version" -j2 + make CFLAGS="-mmacosx-version-min=$osx_version" CXXFLAGS="-g -std=c++11 -DNDEBUG -mmacosx-version-min=$osx_version" -j4 else - make -j2 + make -j4 fi make install cd .. @@ -105,7 +106,7 @@ cmake .. \ -DCMAKE_OSX_ARCHITECTURES=x86_64 \ -DCMAKE_INSTALL_PREFIX=$PWD/../../sentencepiece \ -DCMAKE_OSX_DEPLOYMENT_TARGET=$osx_version -cmake --build . --config Release -j2 +cmake --build . --config Release -j4 cmake --install . cd ../.. rm -rf sentencepiece-src @@ -132,14 +133,16 @@ elif [[ `uname` == "Darwin" ]] ; then -DUSE_APPLE_ACCELERATE=on \ -DCMAKE_OSX_DEPLOYMENT_TARGET=$osx_version else + export export LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu/:$LD_LIBRARY_PATH cmake .. \ -DBUILD_ARCH=x86-64 \ -DCMAKE_OSX_ARCHITECTURES=x86_64 \ -DCOMPILE_CUDA=off \ -DUSE_DOXYGEN=off fi +make -j4 rm -f ../src/3rd_party/sentencepiece/version -cmake --build . --config Release -j2 + cd ../.. git clone https://github.com/kaldi-asr/kaldi @@ -150,7 +153,7 @@ cd tools if [[ `uname` == 'Darwin' ]] ; then perl -i -pe"s/-g -O2/-g -O2 -mmacosx-version-min=$osx_version/g" Makefile fi -make -j2 +make -j4 cd ../src if [[ `uname` == 'Darwin' ]] ; then ./configure --shared --use-cuda=no @@ -161,7 +164,7 @@ else fi perl -i -pe's/-g //g' kaldi.mk make -j clean depend -make -j2 +make -j4 cd ../tools ./extras/install_phonetisaurus.sh cd ../.. @@ -188,3 +191,4 @@ if [[ "$minimal" == "true" ]] ; then find kaldi -type f -name "*.so*" -delete find kaldi -type f -name "*.o" -delete fi + diff --git a/scripts/setup/setup-ubuntu.sh b/scripts/setup/setup-ubuntu.sh index 9e584e3..7a93c3e 100755 --- a/scripts/setup/setup-ubuntu.sh +++ b/scripts/setup/setup-ubuntu.sh @@ -1,6 +1,4 @@ -#!/bin/bash - -set -e +#!/bin/bash -e HERE=$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &> /dev/null && pwd) . $HERE/paths.sh @@ -15,6 +13,9 @@ while [[ $# -gt 0 ]]; do --cpu) gpu=false ;; + --debug) + set -x + ;; *) echo "Unknown argument: $1" exit 1 @@ -23,27 +24,37 @@ while [[ $# -gt 0 ]]; do shift done +ubuntu_codename=$(lsb_release -cs) + sudo-non-docker apt-get update sudo-non-docker apt-get install --upgrade -y \ apt-transport-https \ curl \ gnupg2 \ - wget + wget \ + ca-certificates if [[ "$gpu" == "true" ]] ; then sudo-non-docker apt-get install --upgrade -y ubuntu-drivers-common sudo-non-docker ubuntu-drivers autoinstall fi -curl -sL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS-2019.PUB | sudo-non-docker apt-key add - +curl -sL https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | sudo-non-docker gpg --dearmor -o /etc/apt/trusted.gpg.d/intel-mkl.gpg echo "deb https://apt.repos.intel.com/mkl all main" | sudo-non-docker tee /etc/apt/sources.list.d/intel-mkl.list + +NODE_MAJOR=18 +curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key | sudo-non-docker gpg --dearmor -o /etc/apt/trusted.gpg.d/nodesource.gpg +echo "deb [arch=amd64] https://deb.nodesource.com/node_$NODE_MAJOR.x nodistro main" | sudo-non-docker tee /etc/apt/sources.list.d/nodesource.list sudo-non-docker apt-get update -sudo-non-docker apt-get install --upgrade -y \ +sudo-non-docker apt-get install nodejs -y + +# '< /dev/null > /dev/null' is used to suppress the output of the command and avoid prompts +sudo-non-docker DEBIAN_FRONTEND=noninteractive apt-get install -qq -y --upgrade \ autoconf \ automake \ build-essential \ ca-certificates \ - clang-format-9 \ + clang-format \ cmake \ ffmpeg \ fonts-liberation \ @@ -53,7 +64,7 @@ sudo-non-docker apt-get install --upgrade -y \ gfortran \ git \ groff \ - intel-mkl-64bit-2020.2-108 \ + intel-mkl \ libasound2 \ libc++-dev \ libssl-dev \ @@ -61,8 +72,6 @@ sudo-non-docker apt-get install --upgrade -y \ libtool \ logrotate \ lsb-release \ - nodejs \ - npm \ $([[ "$gpu" == "true" ]] && echo "nvidia-cuda-toolkit") \ postgresql-client \ psmisc \ @@ -79,15 +88,39 @@ sudo-non-docker apt-get install --upgrade -y \ vim \ xdg-utils \ yarn \ - zlib1g-dev + zlib1g-dev \ + pkg-config \ + libx11-dev \ + uglifyjs \ + libxtst-dev \ + libfuse2 \ + libblas-dev \ + libblas3 \ + $([[ "$ubuntu_codename" != "focal" ]] && echo "libcublas11") \ + libgsl-dev \ + libatlas-base-dev \ + intel-mkl-64bit-2020.0-088 \ + < /dev/null > /dev/null curl https://download.java.net/java/GA/jdk14.0.1/664493ef4a6946b186ff29eb326336a2/7/GPL/openjdk-14.0.1_linux-x64_bin.tar.gz -Lso jdk.tar.gz tar xf jdk.tar.gz rm jdk.tar.gz -echo "" +curl https://services.gradle.org/distributions/gradle-7.4.2-bin.zip -Lso gradle-7.4.2-bin.zip +unzip -qq gradle-7.4.2-bin.zip +rm gradle-7.4.2-bin.zip + +sudo ln -s $SERENADE_LIBRARY_ROOT/gradle-7.4.2/bin/gradle /usr/local/bin/gradle +sudo ln -s jdk-14.0.1/bin/java /usr/local/bin/java + +if [[ "${ubuntu_codename}" == "focal" ]] ; then + python3 -m pip install pip --upgrade + pip install pyopenssl --upgrade +fi + +echo "" && echo "" && echo "" echo "Install complete!" -echo "Now, run build-dependencies.sh and add the following to your ~/.zshrc or ~/.bashrc:" +echo "Now, run ./scripts/setup/build-dependencies.sh and add the following to your ~/.zshrc or ~/.bashrc:" echo "export PATH=\"$SERENADE_LIBRARY_ROOT/jdk-14.0.1/bin:$SERENADE_LIBRARY_ROOT/gradle-7.4.2/bin:\$PATH\"" echo "export JAVA_HOME=\"$SERENADE_LIBRARY_ROOT/jdk-14.0.1\"" diff --git a/speech-engine/build.gradle b/speech-engine/build.gradle index 858b47f..66e74e7 100644 --- a/speech-engine/build.gradle +++ b/speech-engine/build.gradle @@ -3,7 +3,7 @@ plugins { } task buildCMake(type: Exec, dependsOn: [rootProject.downloadModels]) { - commandLine "bash", "-c", "mkdir -p server/build && cd server/build && cmake .. && cmake --build . -j2" + commandLine "bash", "-c", "mkdir -p server/build && cd server/build && cmake .. && cmake --build . -j8" } task distTar(type: Tar) { diff --git a/speech-engine/server/CMakeLists.txt b/speech-engine/server/CMakeLists.txt index 99dd9b8..1e3f046 100644 --- a/speech-engine/server/CMakeLists.txt +++ b/speech-engine/server/CMakeLists.txt @@ -139,7 +139,7 @@ if(APPLE) target_link_libraries(serenade-speech-engine "-framework Accelerate") else() target_link_libraries(serenade-speech-engine - "-Wl,--start-group;/opt/intel/mkl/lib/intel64/libmkl_intel_ilp64.a;/opt/intel/mkl/lib/intel64/libmkl_sequential.a;/opt/intel/mkl/lib/intel64/libmkl_core.a;-Wl,--end-group" + "-Wl,--start-group;/usr/lib/x86_64-linux-gnu/libmkl_intel_ilp64.a;/usr/lib/x86_64-linux-gnu/libmkl_sequential.a;/usr/lib/x86_64-linux-gnu/libmkl_core.a;-Wl,--end-group" dl gomp ) diff --git a/speech-engine/server/include/lattice_ops.h b/speech-engine/server/include/lattice_ops.h index 3e313e5..7928610 100644 --- a/speech-engine/server/include/lattice_ops.h +++ b/speech-engine/server/include/lattice_ops.h @@ -1,6 +1,7 @@ #ifndef LIBRARY_LATTICE_OPS_H #define LIBRARY_LATTICE_OPS_H +#include #include "lat/kaldi-lattice.h" namespace speech_engine { diff --git a/speech-engine/server/include/recognizer.h b/speech-engine/server/include/recognizer.h index b194cd5..9d11fea 100644 --- a/speech-engine/server/include/recognizer.h +++ b/speech-engine/server/include/recognizer.h @@ -12,6 +12,8 @@ #include "recognizer.h" #include "recognizer_config.h" +#include + namespace speech_engine { class Recognizer { diff --git a/speech-engine/server/include/rescorer.h b/speech-engine/server/include/rescorer.h index a150c96..66faf53 100644 --- a/speech-engine/server/include/rescorer.h +++ b/speech-engine/server/include/rescorer.h @@ -6,6 +6,8 @@ #include "lat/kaldi-lattice.h" #include "lm/const-arpa-lm.h" +#include + namespace speech_engine { class Rescorer { diff --git a/speech-engine/server/server/speech_engine_server.cc b/speech-engine/server/server/speech_engine_server.cc index d819fe5..f6814c1 100644 --- a/speech-engine/server/server/speech_engine_server.cc +++ b/speech-engine/server/server/speech_engine_server.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include "base/kaldi-common.h" #include "crow.h" @@ -411,7 +412,7 @@ int main(int argc, char* argv[]) { (*stream)->HandleAudioToAlternatives(request); }); - app.loglevel(crow::LogLevel::Error); + app.loglevel(crow::LogLevel::Debug); app.port(17202).server_name("").multithreaded().run(); } catch (const std::exception& e) { std::cerr << "speech-engine exception: " << e.what() << std::endl; diff --git a/speech-engine/server/src/lattice_ops.cc b/speech-engine/server/src/lattice_ops.cc index 9ceaf9e..49256d2 100644 --- a/speech-engine/server/src/lattice_ops.cc +++ b/speech-engine/server/src/lattice_ops.cc @@ -1,5 +1,7 @@ #include "lattice_ops.h" +#include + using namespace kaldi; namespace speech_engine { diff --git a/speech-engine/server/src/recognizer.cc b/speech-engine/server/src/recognizer.cc index 9d72950..13671fe 100644 --- a/speech-engine/server/src/recognizer.cc +++ b/speech-engine/server/src/recognizer.cc @@ -7,6 +7,8 @@ #include "fstext/table-matcher.h" #include "util/common-utils.h" +#include + using namespace kaldi; namespace speech_engine { diff --git a/speech-engine/server/src/rescorer.cc b/speech-engine/server/src/rescorer.cc index a5f976f..6e9dd8e 100644 --- a/speech-engine/server/src/rescorer.cc +++ b/speech-engine/server/src/rescorer.cc @@ -4,6 +4,8 @@ #include "lat/lattice-functions.h" #include "lattice_ops.h" +#include + using namespace kaldi; namespace speech_engine {