DeepSpeech/tc-tests-utils.sh
2018-01-31 18:50:36 +01:00

444 lines
13 KiB
Bash
Executable File

#!/bin/bash
set -xe
OS=$(uname)
if [ "${OS}" = "Linux" ]; then
export DS_ROOT_TASK=${HOME}
fi;
if [ "${OS}" = "Darwin" ]; then
export DS_ROOT_TASK=${TASKCLUSTER_TASK_DIR}
export SWIG_LIB="$(find ${DS_ROOT_TASK}/homebrew/Cellar/swig/ -type f -name "swig.swg" | xargs dirname)"
# It seems chaining |export DYLD_LIBRARY_PATH=...| does not work, maybe
# because of SIP? Who knows ...
if [ ! -z "${EXTRA_ENV}" ]; then
eval "export ${EXTRA_ENV}"
fi;
fi;
export TASKCLUSTER_ARTIFACTS=${TASKCLUSTER_ARTIFACTS:-/tmp/artifacts}
export TASKCLUSTER_TMP_DIR=${TASKCLUSTER_TMP_DIR:-/tmp}
mkdir -p ${TASKCLUSTER_TMP_DIR} || true
export DS_TFDIR=${DS_ROOT_TASK}/DeepSpeech/tf
export DS_DSDIR=${DS_ROOT_TASK}/DeepSpeech/ds
export BAZEL_CTC_TARGETS="//native_client:libctc_decoder_with_kenlm.so"
export EXTRA_AOT_CFLAGS=""
export EXTRA_AOT_LDFLAGS=""
export EXTRA_AOT_LIBS="-ldeepspeech_model"
# FIXME:
# Previously, with r1.3, we could use timesteps of 64
# With r1.4 it seems to eat too much resources at tfcompile step
export BAZEL_AOT_BUILD_FLAGS="--define=DS_NATIVE_MODEL=1 --define=DS_MODEL_TIMESTEPS=16"
export BAZEL_AOT_TARGETS="
//native_client:libdeepspeech_model.so
"
model_source=${DEEPSPEECH_TEST_MODEL}
model_name=$(basename "${model_source}")
SUPPORTED_PYTHON_VERSIONS=${SUPPORTED_PYTHON_VERSIONS:-2.7.13 3.4.6 3.5.3 3.6.2}
# 7.10.0 and 8.0.0 targets fails to build
# > ../deepspeech_wrap.cxx:966:23: error: 'WeakCallbackData' in namespace 'v8' does not name a type
SUPPORTED_NODEJS_VERSIONS=${SUPPORTED_NODEJS_VERSIONS:-4.8.6 5.12.0 6.12.0 7.10.1 8.9.1 9.2.0}
assert_correct_inference()
{
phrase=$1
expected=$2
if [ -z "${phrase}" -o -z "${expected}" ]; then
echo "One or more empty strings:"
echo "phrase: <${phrase}>"
echo "expected: <${expected}>"
return 1
fi;
if [ "${phrase}" = "${expected}" ]; then
echo "Proper output has been produced:"
echo "${phrase}"
return 0
else
echo "!! Non matching output !!"
echo "got: <${phrase}>"
echo "xxd:"; echo "${phrase}" | xxd
echo "-------------------"
echo "expected: <${expected}>"
echo "xxd:"; echo "${expected}" | xxd
return 1
fi;
}
assert_correct_ldc93s1()
{
assert_correct_inference "$1" "she had your dark suit in greasy wash water all year"
}
assert_correct_ldc93s1_prodmodel()
{
assert_correct_inference "$1" "she had the duck so ingrecywachworallyear"
}
assert_correct_ldc93s1_somodel()
{
somodel_nolm=$1
somodel_withlm=$2
# We want to be able to return non zero value from the function, while not
# failing the whole execution
set +e
assert_correct_ldc93s1 "${somodel_nolm}"
so_nolm=$?
assert_correct_ldc93s1 "${somodel_withlm}"
so_lm=$?
set -e
# We accept that with no LM there may be errors, but we do not accept that
# for LM. For now.
if [ ${so_lm} -eq 1 ] && [ ${so_nolm} -eq 1 -o ${so_nolm} -eq 0 ];
then
exit 1
elif [ ${so_lm} -eq 0 ] && [ ${so_nolm} -eq 1 -o ${so_nolm} -eq 0 ];
then
exit 0
else
echo "Unexpected status"
exit 2
fi
}
generic_download_tarxz()
{
target_dir=$1
url=$2
if [ -z "${target_dir}" -o -z "${url}" ]; then
echo "Empty name for target directory or URL:"
echo " target_dir=${target_dir}"
echo " url=${url}"
exit 1
fi;
mkdir -p ${target_dir} || true
wget ${url} -O - | pixz -d | tar -C ${target_dir} -xf -
}
download_native_client_files()
{
generic_download_tarxz "$1" "${DEEPSPEECH_ARTIFACTS_ROOT}/native_client.tar.xz"
}
download_aot_model_files()
{
generic_download_tarxz "$1" "${DEEPSPEECH_AOT_ARTIFACTS_ROOT}/native_client.tar.xz"
}
download_ctc_kenlm()
{
generic_download_tarxz "$1" "${DEEPSPEECH_LIBCTC}"
}
download_data()
{
wget ${model_source} -O ${TASKCLUSTER_TMP_DIR}/${model_name}
wget https://catalog.ldc.upenn.edu/desc/addenda/LDC93S1.wav -O ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/alphabet.txt ${TASKCLUSTER_TMP_DIR}/alphabet.txt
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/lm.binary ${TASKCLUSTER_TMP_DIR}/lm.binary
cp ${DS_ROOT_TASK}/DeepSpeech/ds/data/lm/trie ${TASKCLUSTER_TMP_DIR}/trie
}
download_material()
{
target_dir=$1
maybe_aot=$2
if [ "${maybe_aot}" = "--aot" ]; then
download_aot_model_files "${target_dir}"
else
download_native_client_files "${target_dir}"
fi
download_data
ls -hal ${TASKCLUSTER_TMP_DIR}/${model_name} ${TASKCLUSTER_TMP_DIR}/LDC93S1.wav ${TASKCLUSTER_TMP_DIR}/alphabet.txt
}
install_pyenv()
{
if [ -z "${PYENV_ROOT}" ]; then
echo "No PYENV_ROOT set";
exit 1;
fi;
git clone --quiet https://github.com/pyenv/pyenv.git ${PYENV_ROOT}
pushd ${PYENV_ROOT}
git checkout --quiet 0c909f7457a027276a1d733d78bfbe70ba652047
popd
eval "$(pyenv init -)"
}
install_pyenv_virtualenv()
{
PYENV_VENV=$1
if [ -z "${PYENV_VENV}" ]; then
echo "No PYENV_VENV set";
exit 1;
fi;
git clone --quiet https://github.com/pyenv/pyenv-virtualenv.git ${PYENV_VENV}
pushd ${PYENV_VENV}
git checkout --quiet 27270877575fe8c3e7be5385b8b6a1e4089b39aa
popd
eval "$(pyenv virtualenv-init -)"
}
do_get_model_parameters()
{
local __result=$2
model_url=$1
model_file=/tmp/$(basename "${model_url}")
if [ -z "${model_url}" ]; then
echo "Empty URL for model"
exit 1
fi;
wget "${model_url}" -O "${model_file}"
wget "${SUMMARIZE_GRAPH_BINARY}" -O "/tmp/summarize_graph"
chmod +x /tmp/summarize_graph
if [ ! -f "${model_file}" ]; then
echo "No such model: ${model_file}"
exit 1
fi;
model_width=$(/tmp/summarize_graph --in_graph="${model_file}" | grep "inputs" | grep -Eo "shape=\[\?,\?,[[:digit:]]+" | cut -d',' -f3)
eval $__result="'--define=DS_MODEL_FRAMESIZE=${model_width} --define=DS_MODEL_FILE=${model_file}'"
}
# Checks whether we run a patched version of bazel.
# Patching is required to dump computeKey() parameters to .ckd files
# See bazel.patch
# Return 0 (success exit code) on patched version, 1 on release version
is_patched_bazel()
{
bazel_version=$(bazel version | grep 'Build label:' | cut -d':' -f2)
if [ -z "${bazel_version}" ]; then
return 0;
else
return 1;
fi;
}
verify_bazel_rebuild()
{
bazel_explain_file="$1"
if [ ! -f "${bazel_explain_file}" ]; then
echo "No such explain file: ${bazel_explain_file}"
exit 1
fi;
spurious_rebuilds=$(grep 'Executing action' "${bazel_explain_file}" | grep 'Compiling' | grep -v -E 'no entry in the cache|unconditional execution is requested' | wc -l)
if [ "${spurious_rebuilds}" -ne 0 ]; then
echo "Bazel rebuilds some file it should not, please check."
if is_patched_bazel; then
mkdir -p ${DS_ROOT_TASK}/DeepSpeech/ckd/ds ${DS_ROOT_TASK}/DeepSpeech/ckd/tf
tar xf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-tf.tar --strip-components=4 -C ${DS_ROOT_TASK}/DeepSpeech/ckd/ds/
tar xf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-ds.tar --strip-components=4 -C ${DS_ROOT_TASK}/DeepSpeech/ckd/tf/
echo "Making a diff between CKD files"
mkdir -p ${TASKCLUSTER_ARTIFACTS}
diff -urNw ${DS_ROOT_TASK}/DeepSpeech/ckd/tf/ ${DS_ROOT_TASK}/DeepSpeech/ckd/ds/ | tee ${TASKCLUSTER_ARTIFACTS}/ckd.diff
rm -fr ${DS_ROOT_TASK}/DeepSpeech/ckd/tf/ ${DS_ROOT_TASK}/DeepSpeech/ckd/ds/
else
echo "Cannot get CKD information from release, please use patched Bazel"
fi;
exit 1
fi;
}
do_bazel_build()
{
cd ${DS_ROOT_TASK}/DeepSpeech/tf
eval "export ${BAZEL_ENV_FLAGS}"
if is_patched_bazel; then
find ${DS_ROOT_TASK}/DeepSpeech/tf/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-tf.tar -T -
fi;
bazel ${BAZEL_OUTPUT_USER_ROOT} build \
-s --explain bazel_monolithic.log --verbose_explanations --experimental_strict_action_env --config=monolithic -c opt ${BAZEL_BUILD_FLAGS} ${BAZEL_TARGETS}
if is_patched_bazel; then
find ${DS_ROOT_TASK}/DeepSpeech/tf/bazel-out/ -iname "*.ckd" | tar -cf ${DS_ROOT_TASK}/DeepSpeech/bazel-ckd-ds.tar -T -
fi;
verify_bazel_rebuild "${DS_ROOT_TASK}/DeepSpeech/tf/bazel_monolithic.log"
}
do_bazel_shared_build()
{
cd ${DS_ROOT_TASK}/DeepSpeech/tf
eval "export ${BAZEL_ENV_FLAGS}"
bazel ${BAZEL_OUTPUT_USER_ROOT} build \
-s --explain bazel_shared.log --verbose_explanations --experimental_strict_action_env -c opt ${BAZEL_BUILD_FLAGS} ${BAZEL_TARGETS}
}
do_deepspeech_binary_build()
{
cd ${DS_DSDIR}
make -C native_client/ \
TARGET=${SYSTEM_TARGET} \
TFDIR=${DS_TFDIR} \
RASPBIAN=/tmp/multistrap-raspbian-jessie \
EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" \
EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" \
EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" \
deepspeech
}
do_deepspeech_python_build()
{
rename_to_gpu=$1
unset PYTHON_BIN_PATH
unset PYTHONPATH
export PYENV_ROOT="${DS_ROOT_TASK}/DeepSpeech/.pyenv"
export PATH="${PYENV_ROOT}/bin:$PATH"
install_pyenv "${PYENV_ROOT}"
install_pyenv_virtualenv "$(pyenv root)/plugins/pyenv-virtualenv"
mkdir -p wheels
SETUP_FLAGS=""
if [ "${rename_to_gpu}" ]; then
SETUP_FLAGS="--project_name deepspeech-gpu"
fi
for pyver in ${SUPPORTED_PYTHON_VERSIONS}; do
pyenv install ${pyver}
pyenv virtualenv ${pyver} deepspeech
source ${PYENV_ROOT}/versions/${pyver}/envs/deepspeech/bin/activate
EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" make -C native_client/ \
TARGET=${SYSTEM_TARGET} \
RASPBIAN=/tmp/multistrap-raspbian-jessie \
TFDIR=${DS_TFDIR} \
SETUP_FLAGS="${SETUP_FLAGS}" \
bindings-clean bindings
cp native_client/dist/*.whl wheels
make -C native_client/ bindings-clean
deactivate
pyenv uninstall --force deepspeech
done;
}
do_deepspeech_nodejs_build()
{
rename_to_gpu=$1
npm update && npm install node-gyp node-pre-gyp
export PATH="$(npm root)/.bin/:$PATH"
for node in ${SUPPORTED_NODEJS_VERSIONS}; do
EXTRA_CFLAGS="${EXTRA_LOCAL_CFLAGS}" EXTRA_LDFLAGS="${EXTRA_LOCAL_LDFLAGS}" EXTRA_LIBS="${EXTRA_LOCAL_LIBS}" make -C native_client/javascript \
TARGET=${SYSTEM_TARGET} \
RASPBIAN=/tmp/multistrap-raspbian-jessie \
TFDIR=${DS_TFDIR} \
NODE_ABI_TARGET=--target=$node \
clean node-wrapper
done;
if [ "${rename_to_gpu}" ]; then
make -C native_client/javascript clean npm-pack PROJECT_NAME=deepspeech-gpu
else
make -C native_client/javascript clean npm-pack
fi
tar -czf native_client/javascript/wrapper.tar.gz \
-C native_client/javascript/ lib/
}
do_deepspeech_npm_package()
{
cd ${DS_DSDIR}
npm update && npm install node-gyp node-pre-gyp
export PATH="$(npm root)/.bin/:$PATH"
all_tasks="$(curl -s https://queue.taskcluster.net/v1/task/${TASK_ID} | python -c 'import json; import sys; print(" ".join(json.loads(sys.stdin.read())["dependencies"]));')"
for dep in ${all_tasks}; do
curl -L https://queue.taskcluster.net/v1/task/${dep}/artifacts/public/wrapper.tar.gz | tar -C native_client/javascript -xzvf -
done;
make -C native_client/javascript clean npm-pack
}
package_native_client()
{
tensorflow_dir=${DS_TFDIR}
deepspeech_dir=${DS_DSDIR}
artifacts_dir=${TASKCLUSTER_ARTIFACTS}
artifact_name=$1
if [ ! -d ${tensorflow_dir} -o ! -d ${deepspeech_dir} -o ! -d ${artifacts_dir} ]; then
echo "Missing directory. Please check:"
echo "tensorflow_dir=${tensorflow_dir}"
echo "deepspeech_dir=${deepspeech_dir}"
echo "artifacts_dir=${artifacts_dir}"
exit 1
fi;
if [ -z "${artifact_name}" ]; then
echo "Please specify artifact name."
fi;
if [ -f "${tensorflow_dir}/bazel-bin/native_client/libdeepspeech_model.so" ]; then
tar -cf - \
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie \
-C ${tensorflow_dir}/bazel-bin/native_client/ libctc_decoder_with_kenlm.so \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech_model.so \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech_utils.so \
-C ${deepspeech_dir}/ LICENSE \
-C ${deepspeech_dir}/native_client/ deepspeech \
-C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \
| pixz -9 > "${artifacts_dir}/${artifact_name}"
else
tar -cf - \
-C ${tensorflow_dir}/bazel-bin/native_client/ generate_trie \
-C ${tensorflow_dir}/bazel-bin/native_client/ libctc_decoder_with_kenlm.so \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech.so \
-C ${tensorflow_dir}/bazel-bin/native_client/ libdeepspeech_utils.so \
-C ${deepspeech_dir}/ LICENSE \
-C ${deepspeech_dir}/native_client/ deepspeech \
-C ${deepspeech_dir}/native_client/kenlm/ README.mozilla \
| pixz -9 > "${artifacts_dir}/${artifact_name}"
fi;
}