import os
import sys
import sklearn.decomposition
from edgel3.edgel3_exceptions import EdgeL3Error
from edgel3.core import process_file
from edgel3.models import load_embedding_model
from argparse import ArgumentParser, RawDescriptionHelpFormatter, ArgumentTypeError
from collections import Iterable
def positive_float(value):
"""An argparse type method for accepting only positive floats"""
try:
fvalue = float(value)
except (ValueError, TypeError) as e:
raise ArgumentTypeError('Expected a positive float, error message: '
'{}'.format(e))
if fvalue <= 0:
raise ArgumentTypeError('Expected a positive float')
return fvalue
def positive_int(value):
"""An argparse type method for accepting only positive integers"""
try:
ivalue = int(value)
except (ValueError, TypeError) as e:
raise ArgumentTypeError('Expected a positive int, error message: '
'{}'.format(e))
if ivalue <= 0:
raise ArgumentTypeError('Expected a positive int')
return ivalue
def get_file_list(input_list):
"""Get list of files from the list of inputs"""
if not isinstance(input_list, Iterable) or isinstance(input_list, str):
raise ArgumentTypeError('input_list must be iterable (and not string)')
file_list = []
for item in input_list:
if os.path.isfile(item):
file_list.append(os.path.abspath(item))
elif os.path.isdir(item):
for fname in os.listdir(item):
path = os.path.join(item, fname)
if os.path.isfile(path):
file_list.append(path)
else:
raise EdgeL3Error('Could not find {}'.format(item))
return file_list
[docs]def run(
inputs,
output_dir=None,
suffix=None,
model_type='sparse',
emb_dim=128,
retrain_type='ft',
sparsity=95.45,
center=True,
hop_size=0.1,
verbose=False
):
"""
Computes and saves L3 embedding for given inputs.
Parameters
----------
inputs : list of str, or str
File/directory path or list of file/directory paths to be processed
output_dir : str or None
Path to directory for saving output files. If None, output files will
be saved to the directory containing the input file.
suffix : str or None
String to be appended to the output filename, i.e. <base filename>_<suffix>.npy.
If None, then no suffix will be added, i.e. <base filename>.npy.
model_type : {sea, sparse}
Type of smaller version of L3 model.
If ``sea`` is selected, the audio model is a UST specialized (SEA) model. ``sparse`` gives a sparse L3 model with the desired ``sparsity``.
emb_dim : {512, 256, 128, 64}
Desired embedding dimension of the UST specialized embedding approximated (SEA) models.
retrain_type : str
Type of retraining after sparsification of the L3 audio. Finetuned model is returned for ``ft``
and ``kd`` gives knowledge distilled sparse audio.
sparsity : {95.45, 53.5, 63.5, 72.3, 87.0}
The desired sparsity to be achieved for the audio model of L3. Sparsity of 95.45 corresponds to the EdgeL3 model.
center : boolean
If True, pads beginning of signal so timestamps correspond
to center of window.
hop_size : float
Hop size in seconds.
quiet : boolean
If True, suppress all non-error output to stdout
Returns
-------
"""
if isinstance(inputs, str):
file_list = [inputs]
elif isinstance(inputs, Iterable):
file_list = get_file_list(inputs)
else:
raise EdgeL3Error('Invalid input: {}'.format(str(inputs)))
if len(file_list) == 0:
print('Edgel3: No WAV files found in {}. Aborting.'.format(str(inputs)))
sys.exit(-1)
# Load model
model = load_embedding_model(model_type, emb_dim, retrain_type, sparsity)
# Process all files in the arguments
for filepath in file_list:
if verbose:
print('Edgel3: Processing: {}'.format(filepath))
process_file(filepath,
output_dir=output_dir,
suffix=suffix,
model=model,
center=center,
hop_size=hop_size,
verbose=verbose)
if verbose:
print('Edgel3: Done!')
def parse_args(args):
parser = ArgumentParser(description='Extracts audio embeddings from pruned Look, Listen, and Learn models (Arandjelovic and Zisserman 2017).')
parser.add_argument('inputs', nargs='+',
help='Path or paths to files to process, or path to '
'a directory of files to process.')
parser.add_argument('--output-dir', '-o', default=None,
help='Directory to save the ouptut file(s); '
'if not given, the output will be '
'saved to the same directory as the input WAV '
'file(s).')
parser.add_argument('--suffix', '-x', default=None,
help='String to append to the output filenames.'
'If not provided, no suffix is added.')
parser.add_argument('--model-type', '-mtype', type=str, default='sparse',
choices=['sea', 'sparse'],
help='Type of edge L3 model')
parser.add_argument('--emb-dim', '-e', type=positive_int, default=128,
choices=[512, 256, 128, 64],
help='Embedding dimension of the UST SEA model.'
'Ignored for `sparse` models.')
parser.add_argument('--retrain-type', '-retrain', type=str, default='ft',
choices=['ft', 'kd'],
help='The type of retraining after L3 audio is sparsified')
parser.add_argument('--model-sparsity', '-sp', type=positive_float, default=95.45,
choices=[95.45, 53.5, 63.5, 72.3, 87.0],
help='Overall model sparsity desired in L3')
parser.add_argument('--no-centering', '-n', action='store_true', default=False,
help='Do not pad signal; timestamps will correspond to '
'the beginning of each analysis window.')
parser.add_argument('--hop-size', '-t', type=positive_float, default=0.1,
help='Hop size in seconds for processing audio files.')
parser.add_argument('--quiet', '-q', action='store_true', default=False,
help='Suppress all non-error messages to stdout.')
return parser.parse_args(args)
def main():
"""
Extracts audio embeddings from smaller versions of Look, Listen, and Learn models (Arandjelovic and Zisserman 2017).
"""
args = parse_args(sys.argv[1:])
print(args)
run(
args.inputs,
output_dir=args.output_dir,
suffix=args.suffix,
model_type=args.model_type,
emb_dim=args.emb_dim,
retrain_type=args.retrain_type,
sparsity=args.model_sparsity,
center=not args.no_centering,
hop_size=args.hop_size,
verbose=not args.quiet
)