mirror of
https://github.com/mozilla/DeepSpeech.git
synced 2025-10-26 11:19:39 +00:00
Undo extraction of shared DataSets code from importers
This commit is contained in:
parent
182e20187a
commit
c0bb34cfd3
@ -1,61 +0,0 @@
|
||||
import tensorflow as tf
|
||||
|
||||
from math import ceil
|
||||
from threading import Thread
|
||||
from util.gpu import get_available_gpus
|
||||
from util.text import ctc_label_dense_to_sparse
|
||||
|
||||
class DataSets(object):
|
||||
def __init__(self, train, dev, test):
|
||||
self._dev = dev
|
||||
self._test = test
|
||||
self._train = train
|
||||
|
||||
@property
|
||||
def train(self):
|
||||
return self._train
|
||||
|
||||
@property
|
||||
def dev(self):
|
||||
return self._dev
|
||||
|
||||
@property
|
||||
def test(self):
|
||||
return self._test
|
||||
|
||||
class BaseDataSet(object):
|
||||
def __init__(self, session, txt_files, thread_count, batch_size, num_mfcc_features, num_context):
|
||||
self._session = session
|
||||
self._num_mfcc_features = num_mfcc_features
|
||||
self._x = tf.placeholder(tf.float32, [None, num_mfcc_features + (2 * num_mfcc_features * num_context)])
|
||||
self._x_length = tf.placeholder(tf.int32, [])
|
||||
self._y = tf.placeholder(tf.int32, [None,])
|
||||
self._y_length = tf.placeholder(tf.int32, [])
|
||||
self._example_queue = tf.PaddingFIFOQueue(shapes=[[None, num_mfcc_features + (2 * num_mfcc_features * num_context)], [], [None,], []],
|
||||
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
|
||||
capacity=2 * self._get_device_count() * batch_size)
|
||||
self._enqueue_op = self._example_queue.enqueue([self._x, self._x_length, self._y, self._y_length])
|
||||
self._txt_files = txt_files
|
||||
self._batch_size = batch_size
|
||||
self._num_context = num_context
|
||||
self._thread_count = thread_count
|
||||
|
||||
def _get_device_count(self):
|
||||
available_gpus = get_available_gpus()
|
||||
return max(len(available_gpus), 1)
|
||||
|
||||
def _start_queue_threads(self):
|
||||
batch_threads = [Thread(target=self._populate_batch_queue) for i in xrange(self._thread_count)]
|
||||
for batch_thread in batch_threads:
|
||||
batch_thread.daemon = True
|
||||
batch_thread.start()
|
||||
|
||||
def next_batch(self):
|
||||
source, source_lengths, target, target_lengths = self._example_queue.dequeue_many(self._batch_size)
|
||||
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._batch_size)
|
||||
return source, source_lengths, sparse_labels
|
||||
|
||||
@property
|
||||
def total_batches(self):
|
||||
# Note: If len(_txt_files) % _batch_size != 0, this re-uses initial _txt_files
|
||||
return int(ceil(float(len(self._txt_files)) /float(self._batch_size)))
|
||||
@ -1,20 +1,65 @@
|
||||
import tensorflow as tf
|
||||
|
||||
from os import path
|
||||
from glob import glob
|
||||
from util.datasets import BaseDataSet, DataSets
|
||||
from util.text import text_to_char_array
|
||||
from math import ceil
|
||||
from threading import Thread
|
||||
from util.gpu import get_available_gpus
|
||||
from util.text import text_to_char_array, ctc_label_dense_to_sparse
|
||||
from util.audio import audiofile_to_input_vector
|
||||
from tensorflow.contrib.learn.python.learn.datasets import base
|
||||
|
||||
class DataSet(BaseDataSet):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DataSet, self).__init__(*args, **kwargs)
|
||||
class DataSets(object):
|
||||
def __init__(self, train, dev, test):
|
||||
self._dev = dev
|
||||
self._test = test
|
||||
self._train = train
|
||||
|
||||
@property
|
||||
def train(self):
|
||||
return self._train
|
||||
|
||||
@property
|
||||
def dev(self):
|
||||
return self._dev
|
||||
|
||||
@property
|
||||
def test(self):
|
||||
return self._test
|
||||
|
||||
class DataSet(object):
|
||||
def __init__(self, session, txt_files, thread_count, batch_size, numcep, numcontext):
|
||||
self._session = session
|
||||
self._numcep = numcep
|
||||
self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)])
|
||||
self._x_length = tf.placeholder(tf.int32, [])
|
||||
self._y = tf.placeholder(tf.int32, [None,])
|
||||
self._y_length = tf.placeholder(tf.int32, [])
|
||||
self._example_queue = tf.PaddingFIFOQueue(shapes=[[None, numcep + (2 * numcep * numcontext)], [], [None,], []],
|
||||
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
|
||||
capacity=2 * self._get_device_count() * batch_size)
|
||||
self._enqueue_op = self._example_queue.enqueue([self._x, self._x_length, self._y, self._y_length])
|
||||
self._txt_files = txt_files
|
||||
self._batch_size = batch_size
|
||||
self._numcontext = numcontext
|
||||
self._thread_count = thread_count
|
||||
self._start_queue_threads()
|
||||
|
||||
|
||||
def _get_device_count(self):
|
||||
available_gpus = get_available_gpus()
|
||||
return max(len(available_gpus), 1)
|
||||
|
||||
def _start_queue_threads(self):
|
||||
batch_threads = [Thread(target=self._populate_batch_queue) for i in xrange(self._thread_count)]
|
||||
for batch_thread in batch_threads:
|
||||
batch_thread.daemon = True
|
||||
batch_thread.start()
|
||||
|
||||
def _compute_source_target(self):
|
||||
txt_file = self._txt_files[0]
|
||||
wav_file = path.splitext(txt_file)[0] + ".wav"
|
||||
|
||||
audio_waves = audiofile_to_input_vector(wav_file, self._num_mfcc_features, self._num_context)
|
||||
audio_waves = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)
|
||||
|
||||
with open(txt_file) as open_txt_file:
|
||||
original = ' '.join(open_txt_file.read().strip().lower().split(' ')[2:]).replace('.', '')
|
||||
@ -32,6 +77,17 @@ class DataSet(BaseDataSet):
|
||||
self._y: target,
|
||||
self._y_length: target_len})
|
||||
|
||||
def next_batch(self):
|
||||
source, source_lengths, target, target_lengths = self._example_queue.dequeue_many(self._batch_size)
|
||||
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._batch_size)
|
||||
return source, source_lengths, sparse_labels
|
||||
|
||||
@property
|
||||
def total_batches(self):
|
||||
# Note: If len(_txt_files) % _batch_size != 0, this re-uses initial _txt_files
|
||||
return int(ceil(float(len(self._txt_files)) /float(self._batch_size)))
|
||||
|
||||
|
||||
def read_data_sets(session, data_dir, batch_size, numcep, numcontext, thread_count=1):
|
||||
# Conditionally download data
|
||||
LDC93S1_BASE = "LDC93S1"
|
||||
|
||||
@ -3,6 +3,7 @@ import os
|
||||
import random
|
||||
import subprocess
|
||||
import tarfile
|
||||
import tensorflow as tf
|
||||
|
||||
from glob import glob
|
||||
from itertools import cycle
|
||||
@ -12,16 +13,58 @@ from Queue import PriorityQueue
|
||||
from shutil import rmtree
|
||||
from tensorflow.contrib.learn.python.learn.datasets import base
|
||||
from tensorflow.python.platform import gfile
|
||||
from threading import Thread
|
||||
from util.audio import audiofile_to_input_vector
|
||||
from util.datasets import BaseDataSet, DataSets
|
||||
from util.text import text_to_char_array
|
||||
from util.gpu import get_available_gpus
|
||||
from util.text import text_to_char_array, ctc_label_dense_to_sparse
|
||||
|
||||
class DataSet(BaseDataSet):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DataSet, self).__init__(*args, **kwargs)
|
||||
class DataSets(object):
|
||||
def __init__(self, train, dev, test):
|
||||
self._dev = dev
|
||||
self._test = test
|
||||
self._train = train
|
||||
|
||||
@property
|
||||
def train(self):
|
||||
return self._train
|
||||
|
||||
@property
|
||||
def dev(self):
|
||||
return self._dev
|
||||
|
||||
@property
|
||||
def test(self):
|
||||
return self._test
|
||||
|
||||
class DataSet(object):
|
||||
def __init__(self, session, txt_files, thread_count, batch_size, numcep, numcontext):
|
||||
self._session = session
|
||||
self._numcep = numcep
|
||||
self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)])
|
||||
self._x_length = tf.placeholder(tf.int32, [])
|
||||
self._y = tf.placeholder(tf.int32, [None,])
|
||||
self._y_length = tf.placeholder(tf.int32, [])
|
||||
self._example_queue = tf.PaddingFIFOQueue(shapes=[[None, numcep + (2 * numcep * numcontext)], [], [None,], []],
|
||||
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
|
||||
capacity=2 * self._get_device_count() * batch_size)
|
||||
self._enqueue_op = self._example_queue.enqueue([self._x, self._x_length, self._y, self._y_length])
|
||||
self._txt_files = txt_files
|
||||
self._batch_size = batch_size
|
||||
self._numcontext = numcontext
|
||||
self._thread_count = thread_count
|
||||
self._files_circular_list = self._create_files_circular_list()
|
||||
self._start_queue_threads()
|
||||
|
||||
def _get_device_count(self):
|
||||
available_gpus = get_available_gpus()
|
||||
return max(len(available_gpus), 1)
|
||||
|
||||
def _start_queue_threads(self):
|
||||
batch_threads = [Thread(target=self._populate_batch_queue) for i in xrange(self._thread_count)]
|
||||
for batch_thread in batch_threads:
|
||||
batch_thread.daemon = True
|
||||
batch_thread.start()
|
||||
|
||||
def _create_files_circular_list(self):
|
||||
priorityQueue = PriorityQueue()
|
||||
for txt_file in self._txt_files:
|
||||
@ -36,7 +79,7 @@ class DataSet(BaseDataSet):
|
||||
|
||||
def _populate_batch_queue(self):
|
||||
for txt_file, wav_file in self._files_circular_list:
|
||||
source = audiofile_to_input_vector(wav_file, self._num_mfcc_features, self._num_context)
|
||||
source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)
|
||||
source_len = len(next_source)
|
||||
with open(txt_file) as open_txt_file:
|
||||
target = text_to_char_array(open_txt_file.read())
|
||||
@ -47,6 +90,17 @@ class DataSet(BaseDataSet):
|
||||
self._y: target,
|
||||
self._y_length: target_len})
|
||||
|
||||
def next_batch(self):
|
||||
source, source_lengths, target, target_lengths = self._example_queue.dequeue_many(self._batch_size)
|
||||
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._batch_size)
|
||||
return source, source_lengths, sparse_labels
|
||||
|
||||
@property
|
||||
def total_batches(self):
|
||||
# Note: If len(_txt_files) % _batch_size != 0, this re-uses initial _txt_files
|
||||
return int(ceil(float(len(self._txt_files)) /float(self._batch_size)))
|
||||
|
||||
|
||||
def read_data_sets(session, data_dir, batch_size, numcep, numcontext, thread_count=8):
|
||||
# Check if we can convert FLAC with SoX before we start
|
||||
sox_help_out = subprocess.check_output(["sox", "-h"])
|
||||
|
||||
@ -3,6 +3,7 @@ import random
|
||||
import tarfile
|
||||
import threading
|
||||
import numpy as np
|
||||
import tensorflow as tf
|
||||
|
||||
from os import path
|
||||
from os import rmdir
|
||||
@ -16,19 +17,60 @@ from itertools import cycle
|
||||
from os.path import getsize
|
||||
from threading import Thread
|
||||
from Queue import PriorityQueue
|
||||
from util.datasets import BaseDataSet, DataSets
|
||||
from util.stm import parse_stm_file
|
||||
from util.text import text_to_char_array
|
||||
from util.gpu import get_available_gpus
|
||||
from util.text import text_to_char_array, ctc_label_dense_to_sparse
|
||||
from tensorflow.python.platform import gfile
|
||||
from util.audio import audiofile_to_input_vector
|
||||
from tensorflow.contrib.learn.python.learn.datasets import base
|
||||
|
||||
class DataSet(BaseDataSet):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(DataSet, self).__init__(*args, **kwargs)
|
||||
class DataSets(object):
|
||||
def __init__(self, train, dev, test):
|
||||
self._dev = dev
|
||||
self._test = test
|
||||
self._train = train
|
||||
|
||||
@property
|
||||
def train(self):
|
||||
return self._train
|
||||
|
||||
@property
|
||||
def dev(self):
|
||||
return self._dev
|
||||
|
||||
@property
|
||||
def test(self):
|
||||
return self._test
|
||||
|
||||
class DataSet(object):
|
||||
def __init__(self, session, txt_files, thread_count, batch_size, numcep, numcontext):
|
||||
self._session = session
|
||||
self._numcep = numcep
|
||||
self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)])
|
||||
self._x_length = tf.placeholder(tf.int32, [])
|
||||
self._y = tf.placeholder(tf.int32, [None,])
|
||||
self._y_length = tf.placeholder(tf.int32, [])
|
||||
self._example_queue = tf.PaddingFIFOQueue(shapes=[[None, numcep + (2 * numcep * numcontext)], [], [None,], []],
|
||||
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
|
||||
capacity=2 * self._get_device_count() * batch_size)
|
||||
self._enqueue_op = self._example_queue.enqueue([self._x, self._x_length, self._y, self._y_length])
|
||||
self._txt_files = txt_files
|
||||
self._batch_size = batch_size
|
||||
self._numcontext = numcontext
|
||||
self._thread_count = thread_count
|
||||
self._files_circular_list = self._create_files_circular_list()
|
||||
self._start_queue_threads()
|
||||
|
||||
|
||||
def _get_device_count(self):
|
||||
available_gpus = get_available_gpus()
|
||||
return max(len(available_gpus), 1)
|
||||
|
||||
def _start_queue_threads(self):
|
||||
batch_threads = [Thread(target=self._populate_batch_queue) for i in xrange(self._thread_count)]
|
||||
for batch_thread in batch_threads:
|
||||
batch_thread.daemon = True
|
||||
batch_thread.start()
|
||||
|
||||
def _create_files_circular_list(self):
|
||||
priorityQueue = PriorityQueue()
|
||||
for txt_file in self._txt_files:
|
||||
@ -45,7 +87,7 @@ class DataSet(BaseDataSet):
|
||||
|
||||
def _populate_batch_queue(self):
|
||||
for txt_file, wav_file in self._files_circular_list:
|
||||
source = audiofile_to_input_vector(wav_file, self._num_mfcc_features, self._num_context)
|
||||
source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)
|
||||
source_len = len(source)
|
||||
with open(txt_file) as open_txt_file:
|
||||
target = text_to_char_array(open_txt_file.read())
|
||||
@ -56,6 +98,17 @@ class DataSet(BaseDataSet):
|
||||
self._y: target,
|
||||
self._y_length: target_len})
|
||||
|
||||
def next_batch(self):
|
||||
source, source_lengths, target, target_lengths = self._example_queue.dequeue_many(self._batch_size)
|
||||
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._batch_size)
|
||||
return source, source_lengths, sparse_labels
|
||||
|
||||
@property
|
||||
def total_batches(self):
|
||||
# Note: If len(_txt_files) % _batch_size != 0, this re-uses initial _txt_files
|
||||
return int(ceil(float(len(self._txt_files)) /float(self._batch_size)))
|
||||
|
||||
|
||||
def read_data_sets(session, data_dir, batch_size, numcep, numcontext, thread_count=8):
|
||||
# Conditionally download data
|
||||
TED_DATA = "TEDLIUM_release2.tar.gz"
|
||||
|
||||
Loading…
Reference in New Issue
Block a user