Source code for birdvoxclassify.core

import librosa
import logging
import hashlib
import json
import sys
import numpy as np
import operator
import os
import warnings
import traceback
import soundfile as sf
from collections import OrderedDict
from contextlib import redirect_stderr

with warnings.catch_warnings():
    # Suppress TF and Keras warnings when importing
    os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
    warnings.simplefilter("ignore")
    import tensorflow as tf
    tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)
    with redirect_stderr(open(os.devnull, "w")):
        from tensorflow import keras

from .birdvoxclassify_exceptions import BirdVoxClassifyError


DEFAULT_MODEL_SUFFIX = "taxonet_tv1hierarchical" \
                       "-3c6d869456b2705ea5805b6b7d08f870"
MODEL_PREFIX = 'birdvoxclassify'
DEFAULT_MODEL_NAME = "{}-{}".format(MODEL_PREFIX, DEFAULT_MODEL_SUFFIX)


[docs]def process_file(filepaths, output_dir=None, output_summary_path=None,
                 classifier=None, taxonomy=None, batch_size=512, suffix='',
                 select_best_candidates=False, hierarchical_consistency=True,
                 logger_level=logging.INFO, model_name=DEFAULT_MODEL_NAME):
    """
    Runs bird species classification model on one or more audio clips.

    Parameters
    ----------
    filepaths : list or str
        Filepath or list of filepaths of audio files for which to run prediction
    output_dir : str or None [default: ``None``]
        Output directory used for outputting per-file prediction JSON files. If
        ``None``, no per-file prediction JSON files are produced.
    output_summary_path : str or None [default: ``None``]
        Output path for summary prediction JSON file for all processed audio
        files. If ``None``, no summary prediction file is produced.
    classifier : keras.models.Model or None [default: ``None``]
        Bird species classification model object. If ``None``, the model
        corresponding to ``model_name`` is loaded.
    taxonomy : dict or None [default: ``None``]
        Taxonomy JSON object. If ``None``, the taxonomy corresponding to
        ``model_name`` is loaded.
    batch_size : int [default: ``512``]
        Batch size for predictions
    suffix : str [default: ``""``]
        String to append to filename
    select_best_candidates : bool [default: ``False``]
        If ``True``, best candidates will be provided in output dictionary
        instead of all classes and their probabilities.
    hierarchical_consistency : bool [default: ``True``]
        If ``True`` and if ``select_best_candidates`` is ``True``, apply
        hierarchical consistency when selecting best candidates.
    logger_level : int [default: ``logging.INFO``]
        Logger level
    model_name : str [default birdvoxclassify.DEFAULT_MODEL_NAME]
        Name of classifier model. Should be in format
        ``<model id>_<taxonomy version>-<taxonomy md5sum>``.
        *v0.3.1 UPDATE: model names with taxonomy md5sum
        ``2e7e1bbd434a35b3961e315cfe3832fc`` or
        ``beb9234f0e13a34c7ac41db72e85addd`` are not available in this version
        but are restored in v0.3.1 for backwards compatibility. They will no
        longer be supported starting with v0.4. Please use model names with
        taxonomy md5 checksums 3c6d869456b2705ea5805b6b7d08f870 and
        2f6efd9017669ef5198e48d8ec7dce4c (respectively) instead.*


    Returns
    -------
    output_dict : dict[str, dict]
        Output dictionary mapping audio filename to prediction dictionary. If
        ``select_best_candidates`` is ``False``, the dictionary is in the format
        produced by ``format_pred``. Otherwise, the dictionary is in the format
        produced by ``get_best_candidates``.
    """
    # Set logger level.
    logging.getLogger().setLevel(logger_level)

    # Print model.
    logging.info("Loading model: {}".format(model_name))

    # Load the classifier.
    if classifier is None:
        classifier = load_classifier(model_name)

    if taxonomy is None:
        taxonomy_path = get_taxonomy_path(model_name)
        taxonomy = load_taxonomy(taxonomy_path)

    # Create output_dir if necessary.
    if output_dir is not None:
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)

    if isinstance(filepaths, str):
        filepaths = [filepaths]

    batch_gen = batch_generator(filepaths, batch_size=batch_size)

    output_dict = {}

    for batch, batch_filepaths in batch_gen:
        batch_pred = predict(batch, classifier, logger_level)

        for idx, filepath in enumerate(batch_filepaths):
            pred = [p[idx] for p in batch_pred]
            pred_dict = format_pred(pred, taxonomy)

            if select_best_candidates:
                file_dict = get_best_candidates(
                    formatted_pred_dict=pred_dict,
                    taxonomy=taxonomy,
                    hierarchical_consistency=hierarchical_consistency)
            else:
                file_dict = pred_dict

            output_dict[filepath] = file_dict

            if output_dir:
                output_path = get_output_path(filepath,
                                              suffix + '.json',
                                              output_dir)
                with open(output_path, 'w') as f:
                    json.dump(pred_dict, f)

            # Print final messages.
            logging.info("Done with file: {}.".format(filepath))

    if output_summary_path is not None:
        with open(output_summary_path, 'w') as f:
            json.dump(output_dict, f)

    return output_dict


[docs]def format_pred(pred_list, taxonomy):
    """
    Formats a list of predictions for a single audio clip into a more
    human-readable JSON object using the given taxonomy object.

    The output will be in the following format:

    .. code-block:: javascript

        {
          <prediction level> : {
            <taxonomy id> : {
              "probability": <float>,
              "common_name": <str>,
              "scientific_name": <str>,
              "taxonomy_level_names": <str>,
              "taxonomy_level_aliases": <dict of aliases>,
              "child_ids": <list of children IDs>
            },
            ...
          },
          ...
        }

    Parameters
    ----------
    pred_list : list[np.ndarray [shape (1, num_labels) or (num_labels,)]
        List of predictions at the taxonomical levels predicted by the model
        for a single example. ``num_labels`` may be different for each of the
        different levels of the taxonomy.
    taxonomy : dict
        Taxonomy JSON object

    Returns
    -------
    formatted_pred_dict : dict
        Prediction dictionary object

    """
    _validate_prediction(pred_list, taxonomy)
    formatted_pred_dict = {}
    encoding_items = taxonomy['output_encoding'].items()
    for pred, (level, encoding_list) in zip(pred_list, encoding_items):

        formatted_pred_dict[level] = {}

        if pred.ndim == 2:
            if pred.shape[0] != 1:
                err_msg = 'Attempted to provide prediction of a batch larger ' \
                          'than 1. Please use `format_pred_batch`.'
                raise BirdVoxClassifyError(err_msg)
            pred = pred.flatten()

        # Handle the binary case
        if pred.shape[-1] == 1:
            pred = np.concatenate([pred, 1.0-pred], axis=-1)

        for prob, item in zip(pred, encoding_list):
            # Assumption: only "other" class has more than one ref id
            # Get reference id
            if len(item['ids']) == 1:
                ref_id = item['ids'][0]
            else:
                ref_id = "other"

            # Set probability
            formatted_pred_dict[level][ref_id] = {'probability': float(prob)}

            if ref_id != "other":
                # Update dictionary with taxonomy information
                formatted_pred_dict[level][ref_id].update(
                    get_taxonomy_node(ref_id, taxonomy))
            else:
                # Update dictionary with "other" taxonomy information
                formatted_pred_dict[level][ref_id].update({
                    "common_name": "other",
                    "scientific_name": "other",
                    "taxonomy_level_names": level,
                    "taxonomy_level_aliases": {},
                    "child_ids": item['ids']
                })

    return formatted_pred_dict


def _validate_batch_pred_list(batch_pred_list):
    """
    Perform sanity check on a list of batch predictions to ensure that the
    number of predictions for each level are consistent.


    Parameters
    ----------
    batch_pred_list : list[np.ndarray [shape (batch_size, num_labels)] ]
        List of predictions at the taxonomical levels predicted by the model
        for a batch of examples. ``num_labels`` may be different for each of the
        different levels of the taxonomy.

    """
    for level_pred in batch_pred_list:
        if len(level_pred) != len(batch_pred_list[0]):
            err_msg = 'Number of predictions for each level are not consistent.'
            raise BirdVoxClassifyError(err_msg)


def _validate_prediction(prediction, taxonomy):
    """
    Perform sanity check on a prediction to ensure that the number of
    classes in each prediction are consistent with the given taxonomy.


    Parameters
    ----------
    prediction : list or dict
        Unformatted prediction list or formatted prediction dictionary
        for a single example.
    taxonomy : dict or None [default: ``None``]
        Taxonomy JSON object used to apply hierarchical consistency.
        If ``None``, then ``hierarchical_consistency`` must be ``False``.

    """
    if len(prediction) != len(taxonomy['output_encoding']):
        err_msg = "Taxonomy expects {} outputs but model produced {} outputs."
        raise BirdVoxClassifyError(err_msg.format(
            len(taxonomy['output_encoding']), len(prediction)
        ))
    for idx, (level, encoding_list) \
            in enumerate(taxonomy['output_encoding'].items()):
        if type(prediction) == list:
            n_classes_est = prediction[idx].shape[-1]
        else:
            n_classes_est = len(prediction[level])
        n_classes_exp = len(encoding_list)
        if (n_classes_est != n_classes_exp) \
                and not (n_classes_est == 1 and n_classes_exp == 2):
            # Note that we make an exception for the binary case
            err_msg = "Taxonomy expects {} classes at level {} but model " \
                      "predicted {} classes."
            raise BirdVoxClassifyError(err_msg.format(
                n_classes_exp, level, n_classes_est
            ))


[docs]def format_pred_batch(batch_pred_list, taxonomy):
    """
    Formats a list of predictions for a batch of audio clips into a more
    human-readable JSON object using the given taxonomy object. The output will
    be in the form of a list of JSON objects in the format returned by
    ``format_pred``.


    Parameters
    ----------
    batch_pred_list : list[np.ndarray [shape (batch_size, num_labels)] ]
        List of predictions at the taxonomical levels predicted by the model
        for a batch of examples. ``num_labels`` may be different for each of the
        different levels of the taxonomy.
    taxonomy : dict
        Taxonomy JSON object

    Returns
    -------
    pred_dict_list : list[dict]
        List of JSON dictionary objects

    """
    _validate_batch_pred_list(batch_pred_list)
    pred_dict_list = []
    for idx in range(len(batch_pred_list[0])):
        pred_list = [p[idx] for p in batch_pred_list]
        pred_dict = format_pred(pred_list, taxonomy)
        pred_dict_list.append(pred_dict)

    return pred_dict_list


[docs]def get_taxonomy_node(ref_id, taxonomy):
    """
    Gets node in taxonomy corresponding to the given reference ID (e.g. ``1.4.1``)

    Parameters
    ----------
    ref_id : str
        Taxonomy reference ID
    taxonomy : dict
        Taxonomy JSON object

    Returns
    -------
    node : dict[str, *]
        Taxonomy node, containing information about the entity corresponding to
        the given taxonomy reference ID

    """
    if ref_id == 'other':
        return {"id": "other"}

    # Not the most efficient but shouldn't be too bad
    for item in taxonomy['taxonomy']:
        if "id" not in item:
            raise BirdVoxClassifyError("Taxonomy node does not contain an id")

        if item["id"] == ref_id:
            return item

    err_msg = "Could not find id {} in taxonomy"
    raise BirdVoxClassifyError(err_msg.format(ref_id))


[docs]def batch_generator(filepath_list, batch_size=512):
    """
    Returns a generator that, from a list of filepaths, yields batches of PCEN
    images and the corresponding filenames.

    Parameters
    ----------
    filepath_list : list[str]
        (Non-empty) list of filepaths to audio files for which to generate
        batches of PCEN images and the corresponding filenames
    batch_size : int [default: ``512``]
        Size of yielded batches

    Yields
    ------
    batch : np.ndarray [shape: (batch_size, top_freq_id, n_hops, 1)]
        PCEN batch
    batch_filepaths : list[str]
        List of filepaths corresponding to the clips in the batch

    """
    if batch_size <= 0 or not isinstance(batch_size, int):
        err_msg = 'Batch size must be a positive integer. Got {}'
        raise BirdVoxClassifyError(err_msg.format(batch_size))

    if type(filepath_list) != list or len(filepath_list) == 0:
        raise BirdVoxClassifyError("Must provide non-empty filepath list.")

    batch = []
    batch_filepaths = []
    file_count = 0
    for filepath in filepath_list:
        # Print new line and file name.
        logging.info("-" * 72)
        logging.info("Loading file: {}".format(filepath))

        # Check for existence of the input file.
        if not os.path.exists(filepath):
            raise BirdVoxClassifyError(
                'File "{}" could not be found.'.format(filepath))

        try:
            audio, sr = sf.read(filepath)
        except Exception:
            exc_str = 'Could not open file "{}":\n{}'
            exc_formatted_str = exc_str.format(filepath, traceback.format_exc())
            raise BirdVoxClassifyError(exc_formatted_str)

        pcen = compute_pcen(audio, sr, input_format=True)[np.newaxis, ...]

        batch.append(pcen)
        batch_filepaths.append(filepath)
        file_count += 1

        if file_count == batch_size:
            yield np.vstack(batch), batch_filepaths
            file_count = 0
            batch = []
            batch_filepaths = []

    # Yield final batch
    if file_count > 0:
        yield np.vstack(batch), batch_filepaths

    return


[docs]def compute_pcen(audio, sr, input_format=True):
    """
    Computes PCEN (per-channel-energy normalization) for the given audio clip.

    Parameters
    ----------
    audio : np.ndarray [shape: (N,)]
        Audio array
    sr : int
        Sample rate
    input_format : bool [default: ``True``]
        If True, adds an additional channel dimension (of size 1) and ensures
        that a fixed number of PCEN frames (corresponding to
        ``get_pcen_settings()['n_hops']``) is returned. If number of frames is
        greater, the center frames are returned. If the the number of frames is
        less, empty frames are padded.

    Returns
    -------
    pcen : np.ndarray [shape: (top_freq_id, n_hops, 1) or (top_freq_id, num_frames)]
        Per-channel energy normalization processed Mel spectrogram. If
        ``input_format=True``, will be in shape ``(top_freq_id, n_hops, 1)``.
        Otherwise it will be in shape ``(top_freq_id, num_frames)``, where
        ``num_frames`` is the number of PCEN frames for the entire audio clip.

    """
    # Load settings.
    pcen_settings = get_pcen_settings()

    # Standardize type to be float32 [-1, 1]
    if audio.dtype.kind == 'i':
        max_val = max(np.iinfo(audio.dtype).max, -np.iinfo(audio.dtype).min)
        audio = audio.astype('float64') / max_val
    elif audio.dtype.kind == 'f':
        audio = audio.astype('float64')
    else:
        err_msg = 'Invalid audio dtype: {}'
        raise BirdVoxClassifyError(err_msg.format(audio.dtype))

    # Map to the range [-2**31, 2**31]
    audio = (audio * (2**31)).astype('float32')

    # Resample to 22,050 kHz
    if not sr == pcen_settings["sr"]:
        audio = librosa.resample(audio, sr, pcen_settings["sr"])

    # Compute Short-Term Fourier Transform (STFT).
    stft = librosa.stft(
        audio,
        n_fft=pcen_settings["n_fft"],
        win_length=pcen_settings["win_length"],
        hop_length=pcen_settings["hop_length"],
        window=pcen_settings["window"])

    # Compute squared magnitude coefficients.
    abs2_stft = (stft.real*stft.real) + (stft.imag*stft.imag)

    # Gather frequency bindon't know if I have as good intuition about the impact of mel frequency bands here, though it is interesting to think of effect of different time-freq front-ends between the BVD and BVC. I guess s according to the Mel scale.
    # NB: as of librosa v0.6.2, melspectrogram is type-instable and thus
    # returns 64-bit output even with a 32-bit input. Therefore, we need
    # to convert PCEN to single precision eventually. This might not be
    # necessary in the future, if the whole PCEN pipeline is kept type-stable.
    melspec = librosa.feature.melspectrogram(
        y=None,
        S=abs2_stft,
        sr=pcen_settings["sr"],
        n_fft=pcen_settings["n_fft"],
        n_mels=pcen_settings["n_mels"],
        htk=True,
        fmin=pcen_settings["fmin"],
        fmax=pcen_settings["fmax"])

    # Compute PCEN.
    pcen = librosa.pcen(
        melspec,
        sr=pcen_settings["sr"],
        hop_length=pcen_settings["hop_length"],
        gain=pcen_settings["pcen_norm_exponent"],
        bias=pcen_settings["pcen_delta"],
        power=pcen_settings["pcen_power"],
        time_constant=pcen_settings["pcen_time_constant"])

    # Convert to single floating-point precision.
    pcen = pcen.astype('float32')

    # Truncate spectrum to range 2-10 kHz.
    pcen = pcen[:pcen_settings["top_freq_id"], :]

    # Format for input to network
    if input_format:
        # Trim TFR in time to required number of hops.
        pcen_width = pcen.shape[1]
        n_hops = pcen_settings["n_hops"]
        if pcen_width >= n_hops:
            first_col = int((pcen_width - n_hops) / 2)
            last_col = int((pcen_width + n_hops) / 2)
            pcen = pcen[:, first_col:last_col]
        else:
            # Pad if not enough frames
            pad_length = n_hops - pcen_width
            left_pad = pad_length // 2
            right_pad = pad_length - left_pad
            pcen = np.pad(pcen, [(0, 0), (left_pad, right_pad)],
                          mode='constant')

        # Add channel dimension
        pcen = pcen[:, :, np.newaxis]

    # Return.
    return pcen


[docs]def predict(pcen, classifier, logger_level=logging.INFO):
    """
    Performs bird species classification on PCEN arrays using the given model.

    Parameters
    ----------
    pcen : np.ndarray [shape (n_mels, n_hops, 1) or (batch_size, n_mels, n_hops, 1)
        PCEN array for a single clip or a batch of clips
    classifier : keras.models.Model
        Bird species classification model object
    logger_level : int [default: ``logging.INFO``]
        Logger level

    Returns
    -------
    pred_list : list[np.ndarray [shape (batch_size or 1, num_labels)] ]
        List of predictions at the taxonomical levels predicted by the model.
        num_labels may be different for each of the different levels of the
        taxonomy. If a single example is given (i.e. there is no batch dimension
        in the input PCEN), ``batch_size = 1``.

    """
    pcen_settings = get_pcen_settings()

    # Add batch dimension if we are classifying a single clip
    if pcen.ndim == 3:
        pcen = pcen[np.newaxis, ...]
    elif pcen.ndim not in (3, 4):
        err_msg = 'Invalid number of PCEN dimension. ' \
                  'Expected 3 or 4, but got {}'
        raise BirdVoxClassifyError(err_msg.format(pcen.ndim))

    if pcen.shape[1] != pcen_settings['top_freq_id']:
        err_msg = 'Invalid number of mel-frequency bins in input PCEN. ' \
                  'Expected {} but got {}.'
        raise BirdVoxClassifyError(err_msg.format(
            pcen_settings['top_freq_id'],
            pcen.shape[1]
        ))

    if pcen.shape[2] != pcen_settings['n_hops']:
        err_msg = 'Invalid number of frames in input PCEN. ' \
                  'Expected {} but got {}.'
        raise BirdVoxClassifyError(err_msg.format(
            pcen_settings['n_hops'],
            pcen.shape[2]
        ))

    if pcen.shape[3] != 1:
        err_msg = 'Invalid number of channels in input PCEN. ' \
                  'Expected 1 but got {}.'
        raise BirdVoxClassifyError(err_msg.format(pcen.shape[3]))

    # Predict
    verbose = (logger_level < 15)
    pred = classifier.predict(pcen, verbose=verbose)
    return pred


[docs]def get_output_path(filepath, suffix, output_dir):
    """
    Returns output path to file containing bird species classification
    predictions for a given audio clip file.

    Parameters
    ----------
    filepath : str
        Path to audio file to be processed
    suffix : str
        String to append to filename (including extension)
    output_dir : str or None
        Path to directory where file will be saved.
        If None, will use directory of given filepath.

    Returns
    -------
    output_path : str
        Path to output file
    """
    base_filename = os.path.splitext(os.path.basename(filepath))[0]
    if not output_dir:
        output_dir = os.path.dirname(filepath)

    if suffix[0] != '.':
        output_filename = "{}_{}".format(base_filename, suffix)
    else:
        output_filename = base_filename + suffix

    return os.path.join(output_dir, output_filename)


[docs]def get_pcen_settings():
    """
    Returns dictionary of Mel spectrogram and PCEN parameters for preparing the
    input to the bird species classification models.

    Returns
    -------
    pcen_settings : dict[str, *]
        Dictionary of Mel spectrogram and PCEN parameters

    """
    pcen_settings = {
        "fmin": 2000,
        "fmax": 11025,
        "hop_length": 32,
        "n_fft": 1024,
        "n_mels": 128,
        "pcen_delta": 10.0,
        "pcen_time_constant": 0.06,
        "pcen_norm_exponent": 0.8,
        "pcen_power": 0.25,
        "sr": 22050.0,
        "top_freq_id": 120,
        "win_length": 256,
        "n_hops": 104,
        "window": "flattop"}
    return pcen_settings


[docs]def get_model_path(model_name):
    """
    Returns path to the bird species classification model of the given name.


    Parameters
    ----------
    model_name : str
        Name of classifier model. Should be in format
        ``<model id>_<taxonomy version>-<taxonomy md5sum>``.
        *v0.3.1 UPDATE: model names with taxonomy md5 checksum
        ``2e7e1bbd434a35b3961e315cfe3832fc`` or
        ``beb9234f0e13a34c7ac41db72e85addd`` are not available in this version
        but are restored in v0.3.1 for backwards compatibility. They will no
        longer be supported starting with v0.4. Please use model names with
        taxonomy md5 checksums 3c6d869456b2705ea5805b6b7d08f870 and
        2f6efd9017669ef5198e48d8ec7dce4c (respectively) instead.*

    Returns
    -------
    model_path : str
        Path to classifier model weights. Should be in format
        ``<BirdVoxClassify dir>/resources/models/<model id>_<taxonomy version>-<taxonomy md5sum>.h5``

    """
    # Python 3.8 requires a different model for compatibility
    if sys.version_info.major == 3 and sys.version_info.minor == 8:
        model_name = model_name.replace(MODEL_PREFIX, MODEL_PREFIX + '-py3pt8')

    if model_name.endswith("2e7e1bbd434a35b3961e315cfe3832fc"):
        warnings.warn(f"The version of taxonomy with md5 "
                      f"checksum 2e7e1bbd434a35b3961e315cfe3832fc has been "
                      f"deprecated and will be removed in v0.4. Please use "
                      f"model names with "
                      f"3c6d869456b2705ea5805b6b7d08f870 instead.",
                      DeprecationWarning, stacklevel=2)
    elif model_name.endswith("beb9234f0e13a34c7ac41db72e85addd"):
        warnings.warn(f"The version of taxonomy with md5 "
                      f"checksum beb9234f0e13a34c7ac41db72e85addd has been "
                      f"deprecated and will be removed in v0.4. Please use "
                      f"model names with "
                      f"2f6efd9017669ef5198e48d8ec7dce4c instead.",
                      DeprecationWarning, stacklevel=2)

    path = os.path.join(os.path.dirname(__file__),
                        "resources",
                        "models",
                        model_name + '.h5')
    # Use abspath to get rid of the relative path
    return os.path.abspath(path)


[docs]def load_classifier(model_name):
    """
    Loads bird species classification model of the given name.

    Parameters
    ----------
    model_name : str
        Name of classifier model. Should be in format
        ``<model id>_<taxonomy version>-<taxonomy md5sum>``.
        *v0.3.1 UPDATE: model names with taxonomy md5 checksum
        ``2e7e1bbd434a35b3961e315cfe3832fc`` or
        ``beb9234f0e13a34c7ac41db72e85addd`` are not available in this version
        but are restored in v0.3.1 for backwards compatibility. They will no
        longer be supported starting with v0.4. Please use model names with
        taxonomy md5 checksums 3c6d869456b2705ea5805b6b7d08f870 and
        2f6efd9017669ef5198e48d8ec7dce4c (respectively) instead.*

    Returns
    -------
    classifier : keras.models.Model
        Bird species classification model

    """
    model_path = get_model_path(model_name)

    if not os.path.exists(model_path):
        raise BirdVoxClassifyError(
            'Model "{}" could not be found.'.format(model_name))
    try:
        classifier = keras.models.load_model(model_path, compile=False)
    except Exception:
        exc_str = 'Could not open model "{}":\n{}'
        formatted_trace = traceback.format_exc()
        exc_formatted_str = exc_str.format(model_path, formatted_trace)
        raise BirdVoxClassifyError(exc_formatted_str)

    return classifier


[docs]def get_taxonomy_path(model_name):
    """
    Get the path to the taxonomy corresponding to the model of the given name.

    Specifically, with a model name of the format:

    ``<model id>_<taxonomy version>-<taxonomy md5sum>``

    the path to taxonomy file
    ``<BirdVoxClassify dir>/resources/taxonomy/<taxonomy version>.json``
    is returned. The MD5 checksum of this file is compared to <taxonomy md5sum>
    to ensure that the content of the taxonomy file matches the format of the
    output that the model is expected to produce.


    Parameters
    ----------
    model_name : str
        Name of model. Should be in format
        `<model id>_<taxonomy version>-<taxonomy md5sum>`.
        *v0.3.1 UPDATE: model names with taxonomy md5 checksums
        ``2e7e1bbd434a35b3961e315cfe3832fc`` or
        ``beb9234f0e13a34c7ac41db72e85addd`` are not available in this version
        but are restored in v0.3.1 for backwards compatibility. They will no
        longer be supported starting with v0.4. Please use model names with
        taxonomy md5 checksums 3c6d869456b2705ea5805b6b7d08f870 and
        2f6efd9017669ef5198e48d8ec7dce4c (respectively) instead.*


    Returns
    -------
    taxonomy_path : str
        Path to taxonomy file, which should be in format
        `<BirdVoxClassify dir>/resources/taxonomy/<taxonomy version>.json`

    """
    taxonomy_version, exp_md5sum = model_name.split('_')[1].split('-')
    if taxonomy_version == "tv1hierarchical" \
            and exp_md5sum == "2e7e1bbd434a35b3961e315cfe3832fc":
        warnings.warn(f"The version of taxonomy {taxonomy_version} with md5 "
                      f"checksum {exp_md5sum} has been deprecated and will be "
                      f"removed in v0.4. Please use model names with "
                      f"3c6d869456b2705ea5805b6b7d08f870 instead.",
                      DeprecationWarning, stacklevel=2)
        taxonomy_version = "tv1deprecatedhierarchical"
    elif taxonomy_version == "tv1fine" \
            and exp_md5sum == "beb9234f0e13a34c7ac41db72e85addd":
        warnings.warn(f"The version of taxonomy {taxonomy_version} with md5 "
                      f"checksum {exp_md5sum} has been deprecated and will be "
                      f"removed in v0.4. Please use model names with "
                      f"2f6efd9017669ef5198e48d8ec7dce4c instead.",
                      DeprecationWarning, stacklevel=2)
        taxonomy_version = "tv1deprecatedfine"
    taxonomy_path = os.path.abspath(
                        os.path.join(
                            os.path.dirname(__file__),
                            "resources",
                            "taxonomy",
                            taxonomy_version + '.json'))

    # Verify the MD5 checksum
    hash_md5 = hashlib.md5()
    with open(taxonomy_path, "rb") as f:
        hash_md5.update(f.read())
    md5sum = hash_md5.hexdigest()

    if exp_md5sum != md5sum:
        err_msg = 'Taxonomy corresponding to model {} has bad checksum. ' \
                  'Expected {} but got {}.'
        raise BirdVoxClassifyError(err_msg.format(
            model_name, exp_md5sum, md5sum
        ))

    return taxonomy_path


[docs]def get_batch_best_candidates(batch_pred_list=None,
                              batch_formatted_pred_list=None,
                              taxonomy=None,
                              hierarchical_consistency=True):
    """
    Obtain the best candidate classes for each prediction in a batch.

    Parameters
    ----------
    batch_pred_list : list or None [default: ``None``]
        List of batch predictions. If not provided,
        ``batch_formatted_pred_list`` must be provided.
    batch_formatted_pred_list : list or None [default: ``None``]
        List of formatted batch predictions. If not provided,
        ``batch_pred_list`` must be provided.
    taxonomy : dict or None [default: ``None``]
        Taxonomy JSON object used to apply hierarchical consistency.
        If ``None``, then ``hierarchical_consistency`` must be ``False``.
    hierarchical_consistency : bool [default: ``True``]
        If ``True``, apply hierarchical consistency to predictions.

    Returns
    -------
    batch_best_candidates_list : list
        List of formatted dictionaries specifying the best candidates
        for each taxonomic level.

    """
    if (batch_pred_list is not None) == (batch_formatted_pred_list is not None):
        err_msg = "Both batch_pred_list and batch_formatted_pred_dict were, " \
                  "provided, but only one can be provided."
        raise BirdVoxClassifyError(err_msg)

    if hierarchical_consistency and taxonomy is None:
        err_msg = "Must provide taxonomy if hierarchical consistency is applied."
        raise BirdVoxClassifyError(err_msg)

    if batch_formatted_pred_list is None:
        batch_formatted_pred_list = format_pred_batch(batch_pred_list, taxonomy)

    batch_best_candidates_list = []
    for formatted_pred_dict in batch_formatted_pred_list:
        best_candidate_dict = get_best_candidates(
            formatted_pred_dict=formatted_pred_dict, taxonomy=taxonomy,
            hierarchical_consistency=hierarchical_consistency)
        batch_best_candidates_list.append(best_candidate_dict)
    return batch_best_candidates_list


[docs]def get_best_candidates(pred_list=None, formatted_pred_dict=None, taxonomy=None,
                        hierarchical_consistency=True):
    """
    Obtain the best predicted candidate class for a prediction at all
    taxonomic levels. The output will be in the following format:

    .. code-block:: javascript

        {
          <prediction level> : {
            "probability": <float>,
            "common_name": <str>,
            "scientific_name": <str>,
            "taxonomy_level_names": <str>,
            "taxonomy_level_aliases": <dict of aliases>,
            "child_ids": <list of children IDs>
          },
          ...
        }

    Parameters
    ----------
    pred_list : list[np.ndarray [shape (1, num_labels) or (num_labels,)] or None [default: ``None``]
        List of predictions at the taxonomical levels predicted by the model
        for a single example. If provided, ``taxonomy``, must also be provided.
         If not provided, ``formatted_pred_dict`` must be provided.
    formatted_pred_dict : dict or None [default: ``None``]
        Formatted dictionary of predictions. If not provided,
        ``pred_list`` must be provided.
    taxonomy : dict or None [default: ``None``]
        Taxonomy JSON object used to apply hierarchical consistency.
        If ``None``, then ``hierarchical_consistency`` must be ``False``.
    hierarchical_consistency : bool [default: ``True``]
        If ``True``, apply hierarchical consistency to predictions.

    Returns
    -------
    best_candidates_dict : dict
        Formatted dictionary specifying the best candidate
        for each taxonomic level.

    """
    if (pred_list is not None) == (formatted_pred_dict is not None):
        err_msg = "Both pred_list and formatted_pred_dict were provided, " \
                  "but only one can be provided."
        raise BirdVoxClassifyError(err_msg)

    if hierarchical_consistency and taxonomy is None:
        err_msg = "Must provide taxonomy if hierarchical consistency is applied."
        raise BirdVoxClassifyError(err_msg)

    if formatted_pred_dict is None:
        if taxonomy is None:
            err_msg = "Must provide taxonomy if unformatted prediction is provided."
            raise BirdVoxClassifyError(err_msg)
        # Format prediction if not provided
        formatted_pred_dict = format_pred(pred_list, taxonomy)

    if hierarchical_consistency:
        return apply_hierarchical_consistency(formatted_pred_dict, taxonomy)
    else:
        # Simply get the taxon dict w/ maximum probability, with no
        # consistency enforced
        return {level: max(taxon_dict.values(),
                           key=operator.itemgetter('probability'))
                for level, taxon_dict in formatted_pred_dict.items()}


[docs]def load_taxonomy(taxonomy_path):
    """
    Loads taxonomy JSON file as an OrderedDict to ensure consistent ordering.

    Parameters
    ----------
    taxonomy_path : str
        Path to taxonomy file.

    Returns
    -------
    taxonomy : OrderedDict
        Taxonomy object

    """
    with open(taxonomy_path, 'r') as f:
        # Assumption: output encodings levels are enumerated from coarsest
        # to finest, so we load them with OrderedDicts to ensure consistent
        # ordering.
        taxonomy = json.load(f, object_pairs_hook=OrderedDict)
    return taxonomy


[docs]def apply_hierarchical_consistency(formatted_pred_dict, taxonomy,
                                   level_threshold_dict=None,
                                   detection_threshold=0.5):
    """
    Obtain the best predicted candidate class for a prediction at all
    taxonomic levels, enforcing "top-down" hierarchical consistency.
    That is, starting from the "coarsest" taxonomic level, if the most
    probable class is considered "present" (estimated probability
    greater than a threshold), it is considered the best candidate
    for that level, and only taxonomic children of this class
    will be considered when choosing candidates for "finer" taxonomic
    levels. If the most probable class is not considered "present"
    (estimated probability below the same threshold), then
    the "other" class is chosen as the best candidate, with the
    probability assigned to be the complement of the most probable
    "consistent" class.

    Parameters
    ----------
    formatted_pred_dict : dict
        Formatted dictionary of predictions.
    taxonomy : dict or None [default: ``None``]
        Taxonomy JSON object used to apply hierarchical consistency.
        If ``None``, then ``hierarchical_consistency`` must be ``False``.
    level_threshold_dict : dict or None [default: ``None``]
        Optional dictionary of detection thresholds for each
        taxonomic level.
    detection_threshold : float [default: ``0.5``]
        Detection threshold applied uniformly to all classes
        at all levels. If ``level_threshold_dict`` is provided, this
        is ignored.

    Returns
    -------
    best_candidates_dict : dict
        Formatted dictionary specifying the best candidate
        for each taxonomic level.

    """
    _validate_prediction(formatted_pred_dict, taxonomy)

    # Assumption: "output_encoding" contains hierarchy levels in order from
    # coarsest to finest
    taxon_levels = list(taxonomy["output_encoding"].keys())

    # Set thresholds. Note: a threshold of 0.5 corresponds to comparing the
    # argmax in-vocab class with "other" defined by 1 - max
    if level_threshold_dict is not None:
        if set(taxon_levels) != set(level_threshold_dict.keys()):
            err_msg = f'Levels in level_threshold_dict ' \
                      f'({tuple(level_threshold_dict.keys())}) ' \
                      f'do not match taxonomy levels ' \
                      f'({tuple(taxon_levels)})'
            raise BirdVoxClassifyError(err_msg)
        for level, threshold in level_threshold_dict.items():
            if not (0 < threshold < 1):
                err_msg = f'Threshold ({threshold}) for level {level} must ' \
                          f'be in (0, 1)'
                raise BirdVoxClassifyError(err_msg)
    else:
        if not (0 < detection_threshold < 1):
            err_msg = f'detection_threshold ({detection_threshold}) must ' \
                      f'be in (0, 1)'
            raise BirdVoxClassifyError(err_msg)
        level_threshold_dict = {level: detection_threshold
                                for level in taxon_levels}

    best_candidate_dict = {}
    prev_level = None
    other_reached = False
    for level_idx, level in enumerate(taxon_levels):
        other_dict = formatted_pred_dict[level]["other"]
        # Get maximum in-vocab dict
        invocab_cand_dict = \
            max([taxon_dict
                 for taxon_dict in formatted_pred_dict[level].values()
                 if 'id' in taxon_dict],
                key=operator.itemgetter('probability'))

        if not other_reached:
            if prev_level is not None:
                # Prev level's candidate assumed not to be "other" here
                prev_cand_dict = best_candidate_dict[prev_level]
                # Get most probable "hierarchically consistent" dict
                hc_cand_dict \
                    = max([taxon_dict
                           for taxon_dict in formatted_pred_dict[level].values()
                           # Make sure not "other"
                           if 'id' in taxon_dict
                           # Make sure prev level candidate's leaf ids subsume
                           # the taxon leaf ids
                           and set(prev_cand_dict['child_ids']).issuperset(
                                taxon_dict['child_ids']
                                if len(taxon_dict['child_ids']) > 0
                                # Handle leaf case (i.e. no children)
                                else {taxon_dict['id']})],
                          key=operator.itemgetter('probability'))
                # Correct candidate to be hierarchically consistent
                invocab_cand_dict = hc_cand_dict

            if invocab_cand_dict['probability'] > level_threshold_dict[level]:
                # If most probable class likelihood is above threshold,
                # accept it as best candidate
                best_candidate_dict[level] = invocab_cand_dict
            else:
                # Otherwise, use "other" as best candidate
                best_candidate_dict[level] = dict(other_dict)
                # Make sure that probability is adjusted to correspond
                # to candidate in-vocab class
                best_candidate_dict[level]['probability'] \
                    = 1 - invocab_cand_dict['probability']
                other_reached = True
        else:
            # If a previous level was already "other", so impose that this level
            # is also "other"
            best_candidate_dict[level] = other_dict
            # The probability is adjusted to the "other" probability from the
            # previous level
            best_candidate_dict[level]['probability'] \
                = best_candidate_dict[taxon_levels[level_idx-1]]['probability']

        prev_level = level

    return best_candidate_dict