Undo extraction of shared DataSets code from importers

This commit is contained in:
Reuben Morais 2016-11-08 03:04:58 -02:00
parent 182e20187a
commit c0bb34cfd3
4 changed files with 183 additions and 81 deletions

View File

@ -1,61 +0,0 @@
import tensorflow as tf
from math import ceil
from threading import Thread
from util.gpu import get_available_gpus
from util.text import ctc_label_dense_to_sparse
class DataSets(object):
def __init__(self, train, dev, test):
self._dev = dev
self._test = test
self._train = train
@property
def train(self):
return self._train
@property
def dev(self):
return self._dev
@property
def test(self):
return self._test
class BaseDataSet(object):
def __init__(self, session, txt_files, thread_count, batch_size, num_mfcc_features, num_context):
self._session = session
self._num_mfcc_features = num_mfcc_features
self._x = tf.placeholder(tf.float32, [None, num_mfcc_features + (2 * num_mfcc_features * num_context)])
self._x_length = tf.placeholder(tf.int32, [])
self._y = tf.placeholder(tf.int32, [None,])
self._y_length = tf.placeholder(tf.int32, [])
self._example_queue = tf.PaddingFIFOQueue(shapes=[[None, num_mfcc_features + (2 * num_mfcc_features * num_context)], [], [None,], []],
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
capacity=2 * self._get_device_count() * batch_size)
self._enqueue_op = self._example_queue.enqueue([self._x, self._x_length, self._y, self._y_length])
self._txt_files = txt_files
self._batch_size = batch_size
self._num_context = num_context
self._thread_count = thread_count
def _get_device_count(self):
available_gpus = get_available_gpus()
return max(len(available_gpus), 1)
def _start_queue_threads(self):
batch_threads = [Thread(target=self._populate_batch_queue) for i in xrange(self._thread_count)]
for batch_thread in batch_threads:
batch_thread.daemon = True
batch_thread.start()
def next_batch(self):
source, source_lengths, target, target_lengths = self._example_queue.dequeue_many(self._batch_size)
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._batch_size)
return source, source_lengths, sparse_labels
@property
def total_batches(self):
# Note: If len(_txt_files) % _batch_size != 0, this re-uses initial _txt_files
return int(ceil(float(len(self._txt_files)) /float(self._batch_size)))

View File

@ -1,20 +1,65 @@
import tensorflow as tf
from os import path
from glob import glob
from util.datasets import BaseDataSet, DataSets
from util.text import text_to_char_array
from math import ceil
from threading import Thread
from util.gpu import get_available_gpus
from util.text import text_to_char_array, ctc_label_dense_to_sparse
from util.audio import audiofile_to_input_vector
from tensorflow.contrib.learn.python.learn.datasets import base
class DataSet(BaseDataSet):
def __init__(self, *args, **kwargs):
super(DataSet, self).__init__(*args, **kwargs)
class DataSets(object):
def __init__(self, train, dev, test):
self._dev = dev
self._test = test
self._train = train
@property
def train(self):
return self._train
@property
def dev(self):
return self._dev
@property
def test(self):
return self._test
class DataSet(object):
def __init__(self, session, txt_files, thread_count, batch_size, numcep, numcontext):
self._session = session
self._numcep = numcep
self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)])
self._x_length = tf.placeholder(tf.int32, [])
self._y = tf.placeholder(tf.int32, [None,])
self._y_length = tf.placeholder(tf.int32, [])
self._example_queue = tf.PaddingFIFOQueue(shapes=[[None, numcep + (2 * numcep * numcontext)], [], [None,], []],
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
capacity=2 * self._get_device_count() * batch_size)
self._enqueue_op = self._example_queue.enqueue([self._x, self._x_length, self._y, self._y_length])
self._txt_files = txt_files
self._batch_size = batch_size
self._numcontext = numcontext
self._thread_count = thread_count
self._start_queue_threads()
def _get_device_count(self):
available_gpus = get_available_gpus()
return max(len(available_gpus), 1)
def _start_queue_threads(self):
batch_threads = [Thread(target=self._populate_batch_queue) for i in xrange(self._thread_count)]
for batch_thread in batch_threads:
batch_thread.daemon = True
batch_thread.start()
def _compute_source_target(self):
txt_file = self._txt_files[0]
wav_file = path.splitext(txt_file)[0] + ".wav"
audio_waves = audiofile_to_input_vector(wav_file, self._num_mfcc_features, self._num_context)
audio_waves = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)
with open(txt_file) as open_txt_file:
original = ' '.join(open_txt_file.read().strip().lower().split(' ')[2:]).replace('.', '')
@ -32,6 +77,17 @@ class DataSet(BaseDataSet):
self._y: target,
self._y_length: target_len})
def next_batch(self):
source, source_lengths, target, target_lengths = self._example_queue.dequeue_many(self._batch_size)
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._batch_size)
return source, source_lengths, sparse_labels
@property
def total_batches(self):
# Note: If len(_txt_files) % _batch_size != 0, this re-uses initial _txt_files
return int(ceil(float(len(self._txt_files)) /float(self._batch_size)))
def read_data_sets(session, data_dir, batch_size, numcep, numcontext, thread_count=1):
# Conditionally download data
LDC93S1_BASE = "LDC93S1"

View File

@ -3,6 +3,7 @@ import os
import random
import subprocess
import tarfile
import tensorflow as tf
from glob import glob
from itertools import cycle
@ -12,16 +13,58 @@ from Queue import PriorityQueue
from shutil import rmtree
from tensorflow.contrib.learn.python.learn.datasets import base
from tensorflow.python.platform import gfile
from threading import Thread
from util.audio import audiofile_to_input_vector
from util.datasets import BaseDataSet, DataSets
from util.text import text_to_char_array
from util.gpu import get_available_gpus
from util.text import text_to_char_array, ctc_label_dense_to_sparse
class DataSet(BaseDataSet):
def __init__(self, *args, **kwargs):
super(DataSet, self).__init__(*args, **kwargs)
class DataSets(object):
def __init__(self, train, dev, test):
self._dev = dev
self._test = test
self._train = train
@property
def train(self):
return self._train
@property
def dev(self):
return self._dev
@property
def test(self):
return self._test
class DataSet(object):
def __init__(self, session, txt_files, thread_count, batch_size, numcep, numcontext):
self._session = session
self._numcep = numcep
self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)])
self._x_length = tf.placeholder(tf.int32, [])
self._y = tf.placeholder(tf.int32, [None,])
self._y_length = tf.placeholder(tf.int32, [])
self._example_queue = tf.PaddingFIFOQueue(shapes=[[None, numcep + (2 * numcep * numcontext)], [], [None,], []],
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
capacity=2 * self._get_device_count() * batch_size)
self._enqueue_op = self._example_queue.enqueue([self._x, self._x_length, self._y, self._y_length])
self._txt_files = txt_files
self._batch_size = batch_size
self._numcontext = numcontext
self._thread_count = thread_count
self._files_circular_list = self._create_files_circular_list()
self._start_queue_threads()
def _get_device_count(self):
available_gpus = get_available_gpus()
return max(len(available_gpus), 1)
def _start_queue_threads(self):
batch_threads = [Thread(target=self._populate_batch_queue) for i in xrange(self._thread_count)]
for batch_thread in batch_threads:
batch_thread.daemon = True
batch_thread.start()
def _create_files_circular_list(self):
priorityQueue = PriorityQueue()
for txt_file in self._txt_files:
@ -36,7 +79,7 @@ class DataSet(BaseDataSet):
def _populate_batch_queue(self):
for txt_file, wav_file in self._files_circular_list:
source = audiofile_to_input_vector(wav_file, self._num_mfcc_features, self._num_context)
source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)
source_len = len(next_source)
with open(txt_file) as open_txt_file:
target = text_to_char_array(open_txt_file.read())
@ -47,6 +90,17 @@ class DataSet(BaseDataSet):
self._y: target,
self._y_length: target_len})
def next_batch(self):
source, source_lengths, target, target_lengths = self._example_queue.dequeue_many(self._batch_size)
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._batch_size)
return source, source_lengths, sparse_labels
@property
def total_batches(self):
# Note: If len(_txt_files) % _batch_size != 0, this re-uses initial _txt_files
return int(ceil(float(len(self._txt_files)) /float(self._batch_size)))
def read_data_sets(session, data_dir, batch_size, numcep, numcontext, thread_count=8):
# Check if we can convert FLAC with SoX before we start
sox_help_out = subprocess.check_output(["sox", "-h"])

View File

@ -3,6 +3,7 @@ import random
import tarfile
import threading
import numpy as np
import tensorflow as tf
from os import path
from os import rmdir
@ -16,19 +17,60 @@ from itertools import cycle
from os.path import getsize
from threading import Thread
from Queue import PriorityQueue
from util.datasets import BaseDataSet, DataSets
from util.stm import parse_stm_file
from util.text import text_to_char_array
from util.gpu import get_available_gpus
from util.text import text_to_char_array, ctc_label_dense_to_sparse
from tensorflow.python.platform import gfile
from util.audio import audiofile_to_input_vector
from tensorflow.contrib.learn.python.learn.datasets import base
class DataSet(BaseDataSet):
def __init__(self, *args, **kwargs):
super(DataSet, self).__init__(*args, **kwargs)
class DataSets(object):
def __init__(self, train, dev, test):
self._dev = dev
self._test = test
self._train = train
@property
def train(self):
return self._train
@property
def dev(self):
return self._dev
@property
def test(self):
return self._test
class DataSet(object):
def __init__(self, session, txt_files, thread_count, batch_size, numcep, numcontext):
self._session = session
self._numcep = numcep
self._x = tf.placeholder(tf.float32, [None, numcep + (2 * numcep * numcontext)])
self._x_length = tf.placeholder(tf.int32, [])
self._y = tf.placeholder(tf.int32, [None,])
self._y_length = tf.placeholder(tf.int32, [])
self._example_queue = tf.PaddingFIFOQueue(shapes=[[None, numcep + (2 * numcep * numcontext)], [], [None,], []],
dtypes=[tf.float32, tf.int32, tf.int32, tf.int32],
capacity=2 * self._get_device_count() * batch_size)
self._enqueue_op = self._example_queue.enqueue([self._x, self._x_length, self._y, self._y_length])
self._txt_files = txt_files
self._batch_size = batch_size
self._numcontext = numcontext
self._thread_count = thread_count
self._files_circular_list = self._create_files_circular_list()
self._start_queue_threads()
def _get_device_count(self):
available_gpus = get_available_gpus()
return max(len(available_gpus), 1)
def _start_queue_threads(self):
batch_threads = [Thread(target=self._populate_batch_queue) for i in xrange(self._thread_count)]
for batch_thread in batch_threads:
batch_thread.daemon = True
batch_thread.start()
def _create_files_circular_list(self):
priorityQueue = PriorityQueue()
for txt_file in self._txt_files:
@ -45,7 +87,7 @@ class DataSet(BaseDataSet):
def _populate_batch_queue(self):
for txt_file, wav_file in self._files_circular_list:
source = audiofile_to_input_vector(wav_file, self._num_mfcc_features, self._num_context)
source = audiofile_to_input_vector(wav_file, self._numcep, self._numcontext)
source_len = len(source)
with open(txt_file) as open_txt_file:
target = text_to_char_array(open_txt_file.read())
@ -56,6 +98,17 @@ class DataSet(BaseDataSet):
self._y: target,
self._y_length: target_len})
def next_batch(self):
source, source_lengths, target, target_lengths = self._example_queue.dequeue_many(self._batch_size)
sparse_labels = ctc_label_dense_to_sparse(target, target_lengths, self._batch_size)
return source, source_lengths, sparse_labels
@property
def total_batches(self):
# Note: If len(_txt_files) % _batch_size != 0, this re-uses initial _txt_files
return int(ceil(float(len(self._txt_files)) /float(self._batch_size)))
def read_data_sets(session, data_dir, batch_size, numcep, numcontext, thread_count=8):
# Conditionally download data
TED_DATA = "TEDLIUM_release2.tar.gz"