Source code for neurox.interpretation.iou_probe

"""Module for IoU method to rank neurons

This module implements the Intersection over Union method to rank neurons based on their activation values and the true class.

.. seealso::
        Mu, J., & Andreas, J. (2020). Compositional explanations of neurons. Advances in Neural Information Processing Systems, 33, 17153-17163.

"""
import numpy as np
from sklearn.metrics import average_precision_score


[docs]def get_neuron_ordering(X_train, y_train, threshold=0.05):
    """
    Returns a list of top neurons w.r.t a tag e.g. noun

    Parameters
    ----------
    X_train : numpy.ndarray
        Numpy Matrix of size [``NUM_TOKENS`` x ``NUM_NEURONS``]. Usually the
        output of ``interpretation.utils.create_tensors``
    y_train : numpy.ndarray
        Numpy Vector of size [``NUM_TOKENS``] with class labels for each input
        token. Usually the output of ``interpretation.utils.create_tensors``.
    threshold : float
        The minimum absolute activation value below which the neuron is ignored for ranking purposes

    Returns
    -------
    ranking : list
        list of ``NUM_NEURONS`` neuron indices, in decreasing order of importance.

    """
    X_train[np.abs(X_train) < threshold] = 0
    scores = []
    for i in range(X_train.shape[1]):
        scores.append(average_precision_score(y_train, X_train[:, i]))
    scores = np.array(scores)
    ranking = np.argsort(scores)[::-1]
    return ranking