Source code for humancompatible.explain.facts.metrics

from typing import List, Tuple, Dict, Callable

import numpy as np
from pandas import DataFrame

from .predicate import Predicate

##### Metrics as guided by AReS paper.


[docs]
def incorrectRecoursesIfThen(
    ifclause: Predicate, thenclause: Predicate, X_aff: DataFrame, model
) -> int:
    """Compute the number of incorrect recourses given an if-then clause.

    Args:
        ifclause: The if-clause predicate.
        thenclause: The then-clause predicate.
        X_aff: The affected DataFrame.
        model: The ML model under study. Expected to have a "predict" method.

    Returns:
        The number of incorrect recourses.

    Raises:
        ValueError: If there are no covered instances for the given if-clause.
    """
    X_aff_covered_bool = (X_aff[ifclause.features] == ifclause.values).all(axis=1)
    X_aff_covered = X_aff[X_aff_covered_bool].copy()
    if X_aff_covered.shape[0] == 0:
        raise ValueError(
            "Assuming non-negative frequent itemset threshold, total absence of covered instances should be impossible!"
        )

    X_aff_covered[thenclause.features] = thenclause.values

    preds = model.predict(X_aff_covered)
    return np.shape(preds)[0] - np.sum(preds)



##### Subgroup cost metrics of the "macro" and "micro" viewpoints.


[docs]
def if_group_cost_min_change_correctness_threshold(
    ifclause: Predicate,
    thens_corrs_costs: List[Tuple[Predicate, float, float]],
    cor_thres: float = 0.5,
) -> float:
    """Calculate the minimum feature change for a given if-clause and a list of then-clauses with a minimum correctness threshold.

    Args:
        ifclause: The if-clause predicate.
        thenclauses: The list of then-clause predicates with their corresponding correctness values.
        cor_thres: The minimum correctness threshold. Only then-clauses with a correctness value greater than or equal to this threshold will be considered.
        params: The parameter proxy.

    Returns:
        The minimum feature change value.

    """
    costs = np.array(
        [
            cost
            for thenclause, cor, cost in thens_corrs_costs
            if cor >= cor_thres
        ]
    )
    if costs.size > 0:
        ret = costs.min()
    else:
        ret = np.inf
    return ret



[docs]
def if_group_cost_recoursescount_correctness_threshold(
    ifclause: Predicate,
    thens_corrs_costs: List[Tuple[Predicate, float, float]],
    cor_thres: float = 0.5,
) -> float:
    """Calculate the negative count of feature changes for a given if-clause and a list of then-clauses with a minimum correctness threshold.

    Args:
        ifclause: The if-clause predicate.
        thenclauses: The list of then-clause predicates with their corresponding correctness values.
        cor_thres: The minimum correctness threshold. Only then-clauses with a correctness value greater than or equal to this threshold will be considered.
        params: The parameter proxy.

    Returns:
        The negative count of feature changes.

    """
    costs = np.array(
        [
            cost
            for thenclause, cor, cost in thens_corrs_costs
            if cor >= cor_thres
        ]
    )
    return -costs.size



[docs]
def if_group_maximum_correctness(
    ifclause: Predicate,
    thens_corrs_costs: List[Tuple[Predicate, float, float]],
) -> float:
    """Calculate the maximum correctness value for a given if-clause and a list of then-clauses.

    Args:
        ifclause: The if-clause predicate.
        then_corrs_costs: The list of then-clause predicates with their corresponding correctness and cost values.
        params: The parameter proxy.

    Returns:
        The maximum correctness value.

    """
    return max(cor for _then, cor, _cost in thens_corrs_costs)



[docs]
def if_group_cost_max_correctness_cost_budget(
    ifclause: Predicate,
    then_corrs_costs: List[Tuple[Predicate, float, float]],
    cost_thres: float = 0.5,
) -> float:
    """Calculate the maximum correctness value for a given if-clause and a list of then-clauses with cost below a threshold.

    Args:
        ifclause: The if-clause predicate.
        then_corrs_costs: The list of then-clause predicates with their corresponding correctness and cost values.
        cor_thres: The correctness threshold.
        cost_thres: The cost threshold. Only then-clauses with cost below this threshold will be considered.
        params: The parameter proxy.

    Returns:
        The maximum correctness value.

    """
    corrs = np.array(
        [cor for _then, cor, cost in then_corrs_costs if cost <= cost_thres]
    )
    if corrs.size > 0:
        ret = corrs.max()
    else:
        ret = np.inf
    return ret




[docs]
def if_group_average_recourse_cost_conditional(
    ifclause: Predicate,
    thens: List[Tuple[Predicate, float, float]],
) -> float:
    """Calculate the average recourse cost conditional on the correctness for a given if-clause and a list of then-clauses.

    Args:
        ifclause: The if-clause predicate.
        thens: The list of then-clause predicates with their corresponding correctness and cost values.
        params: The parameter proxy.

    Returns:
        The average recourse cost conditional on the correctness.

    """
    mincost_cdf = np.array([corr for then, corr, cost in thens])
    costs = np.array([cost for then, corr, cost in thens])

    mincost_pmf = np.diff(mincost_cdf, prepend=0)

    total_prob = np.sum(mincost_pmf)
    if total_prob > 0:
        return np.dot(mincost_pmf, costs) / np.sum(mincost_pmf)
    else:
        return np.inf



##### Aggregations of if-group cost for all protected subgroups and subgroups in a list

if_group_cost_f_t = Callable[[Predicate, List[Tuple[Predicate, float, float]]], float]



[docs]
def calculate_if_subgroup_costs(
    ifclause: Predicate,
    thenclauses: Dict[str, Tuple[float, List[Tuple[Predicate, float, float]]]],
    group_calculator: if_group_cost_f_t
) -> Dict[str, float]:
    """Calculate the costs for each subgroup of a given if-clause.

    Args:
        ifclause: The if-clause predicate.
        thenclauses: A dictionary mapping subgroup names to their corresponding coverage and then-clause predicates.
        group_calculator: The function used to calculate the cost for each subgroup. Defaults to `if_group_cost_min_change_correctness_threshold`.
        **kwargs: Additional keyword arguments to be passed to the group_calculator function.

    Returns:
        A dictionary mapping subgroup names to their calculated costs.

    """
    return {
        sg: group_calculator(ifclause, thens)
        for sg, (_cov, thens) in thenclauses.items()
    }




[docs]
def calculate_all_if_subgroup_costs(
    ifclauses: List[Predicate],
    all_thenclauses: List[Dict[str, Tuple[float, List[Tuple[Predicate, float, float]]]]],
    group_calculator: if_group_cost_f_t
) -> Dict[Predicate, Dict[str, float]]:
    ret: Dict[Predicate, Dict[str, float]] = {}
    for ifclause, thenclauses in zip(ifclauses, all_thenclauses):
        ret[ifclause] = calculate_if_subgroup_costs(ifclause, thenclauses, group_calculator)
    return ret


##### Calculations of discrepancies between the costs of different subgroups (for the same if-group)


[docs]
def max_intergroup_cost_diff(
    ifclause: Predicate,
    thenclauses: Dict[str, Tuple[float, List[Tuple[Predicate, float, float]]]],
    group_calculator: if_group_cost_f_t
) -> float:
    """Calculate the maximum difference in subgroup costs for an if-clause and its corresponding then-clauses.

    Args:
        ifclause: The if-clause predicate.
        thenclauses: A dictionary mapping subgroup names to their corresponding coverage, then-clause predicates, and costs.
        **kwargs: Additional keyword arguments to be passed to the calculate_if_subgroup_costs function.

    Returns:
        The maximum difference in subgroup costs.

    """
    group_costs = list(
        calculate_if_subgroup_costs(ifclause, thenclauses, group_calculator).values()
    )
    return max(group_costs) - min(group_costs)