Source code for humancompatible.explain.facts.init

from typing import List, Dict, Optional
from collections import defaultdict

import pandas as pd
from pandas import DataFrame
from sklearn.base import BaseEstimator

try:
    from .parameters import ParameterProxy, feature_change_builder
    from .misc import (
        valid_ifthens,
        calc_costs,
        rules2rulesbyif,
        select_rules_subset,
        select_rules_subset_KStest,
        cum_corr_costs_all
    )
    from .formatting import print_recourse_report, print_recourse_report_KStest_cumulative
    from .rule_filters import delete_fair_rules
except ImportError as error:
    from logging import warning
    warning("{}: FACTS will be unavailable. To install, run:\n"
            "pip install 'aif360[FACTS]'".format(error))
    print_recourse_report = None

__all__ = ["FACTS", "print_recourse_report", "FACTS_bias_scan"]


[docs]
def FACTS_bias_scan(
    X: pd.DataFrame,
    clf: BaseEstimator,
    prot_attr: str,
    metric: str,
    categorical_features: Optional[List[str]] = None,
    freq_itemset_min_supp: float = 0.1,
    feature_weights: Dict[str, float] = defaultdict(lambda : 1),
    feats_allowed_to_change: Optional[List[str]] = None,
    feats_not_allowed_to_change: Optional[List[str]] = None,
    viewpoint: str = "macro",
    sort_strategy: str = "max-cost-diff-decr",
    top_count: int = 1,
    phi: float = 0.5,
    c: float = 0.5,
    verbose: bool = True,
    print_recourse_report: bool = False,
    show_subgroup_costs: bool = False,
    show_action_costs: bool = False,
    is_correctness_metric: bool = False,
):
    """Identify the subgroups with the most difficulty achieving recourse.

    FACTS is an efficient, model-agnostic, highly parameterizable, and
    explainable framework for evaluating subgroup fairness through
    counterfactual explanations [#FACTS23]_.

    Note:
        This function is a wrapper to run the FACTS framework from start to
        finish. Its purpose is to provide an API which is both closer to the
        `detectors` API and more succinct.

        For more options and greater control (including the option to cache
        some intermediate results and then apply more than one metric fast),
        consider using the :class:`FACTS` class.

    References:
        .. [#FACTS23] `L. Kavouras, K. Tsopelas, G. Giannopoulos,
           D. Sacharidis, E. Psaroudaki, N. Theologitis, D. Rontogiannis,
           D. Fotakis, I. Emiris, "Fairness Aware Counterfactuals for
           Subgroups", arXiv preprint, 2023.
           <https://arxiv.org/abs/2306.14978>`_

    Args:
        X (DataFrame): Dataset given as a :class:`pandas.DataFrame`. As in
            standard scikit-learn convention, it is expected to contain one
            instance per row and one feature / explanatory variable per
            column (labels not needed, we already have an ML model).

        clf (sklearn.base.BaseEstimator): A trained and ready to use
            classifier, implementing method `predict(X)`, where `X` is
            the matrix of features; predictions returned by `predict(X)`
            are either 0 or 1. In other words, fitted scikit-learn
            classifiers.

        prot_attr (str): the name of the column that represents the
            protected attribute.

        metric (str, optional): one of the following choices

            - "equal-effectiveness"
            - "equal-choice-for-recourse"
            - "equal-effectiveness-within-budget"
            - "equal-cost-of-effectiveness"
            - "equal-mean-recourse"
            - "fair-tradeoff"

            Defaults to "equal-effectiveness".

            For explanation of each of those metrics, refer either to the
            paper [#FACTS23]_ or the demo_FACTS notebook.

        categorical_features (list(str), optional): the list of categorical
            features. The default is to choose (dynamically, inside `fit`) the
            columns of the dataset with types "object" or "category".

        freq_itemset_min_supp (float, optional): minimum support for all the runs
            of the frequent itemset mining algorithm (specifically, `FP Growth <https://en.wikipedia.org/wiki/Association_rule_learning#FP-growth_algorithm>`_).
            We mine frequent itemsets to generate candidate subpopulation groups and candidate actions.
            For more information, see paper [#FACTS23]_.
            Defaults to 10%.

        feature_weights (dict(str, float), optional): the weights for each feature. Used in the calculation
            of the cost of a suggested change. Specifically, the term corresponding to each feature is
            multiplied by this weight.
            Defaults to 1, for all features.

        feats_allowed_to_change (list(str), optional): if provided, only
            allows these features to change value in the suggested recourses.
            Default: no frozen features.
            *Note*: providing both `feats_allowed_to_change` and
            `feats_not_allowed_to_change` is currently treated as an error.

        feats_not_allowed_to_change (list(str), optional): if provided,
            prevents these features from changing at all in any given
            recourse.
            Default: no frozen features.
            *Note*: providing both `feats_allowed_to_change` and
            `feats_not_allowed_to_change` is currently treated as an error.

        viewpoint (str, optional): "macro" or "micro". Refers to the
            notions of "macro viewpoint" and "micro viewpoint" defined
            in section 2.2 of the paper [#FACTS23]_.

            As a short explanation, consider a set of actions A and a
            subgroup (cohort / set of individuals) G. Metrics with the
            macro viewpoint interpretation are constrained to always apply
            one action from A to the entire G, while metrics with the micro
            interpretation are allowed to give each individual in G the
            min-cost action from A which changes the individual's class.

            Note that not all combinations of `metric` and `viewpoint` are
            valid, e.g. "Equal Choice for Recourse" only has a macro
            interpretation.

            Defaults to "macro".

        sort_strategy (str, optional): one of the following choices

            - `"max-cost-diff-decr"`: simply rank the groups in descending \
                order according to the unfairness metric.
            - `"max-cost-diff-decr-ignore-forall-subgroups-empty"`: ignore \
                groups for which we have no available actions whatsoever.
            - `"max-cost-diff-decr-ignore-exists-subgroup-empty"`: ignore \
                groups for which at least one protected subgroup has \
                no available actions.

            Defaults to "max-cost-diff-decr".

        top_count (int, optional): the number of subpopulation groups that
            the algorithm will keep.
            Defaults to 1, i.e. returns the most biased group.

        phi (float, optional): effectiveness threshold. Real number in [0, 1].
            Applicable for "equal-choice-for-recourse" and
            "equal-cost-of-effectiveness" metrics. For these two metrics, an
            action is considered to achieve recourse for a subpopulation group
            if at least `phi` % of the group's individuals achieve recourse.
            Defaults to 0.5.

        c (float, optional): cost budget. Real number. Applicable for
            "equal-effectiveness-within-budget" metric. Specifies the maximum
            cost that can be payed for an action (by the individual, by a
            central authority etc.)
            Defaults to 0.5.

        verbose (bool, optional): whether to print intermediate messages and
            progress bar. Defaults to True.

        print_recourse_report (bool, optional): whether to print a detailed
            and annotated report of the most biased groups to stdout. If False,
            the most biased groups are only computed and returned.
            Defaults to False.

        show_subgroup_costs (bool, optional): Whether to show the costs assigned
            to each protected subgroup.
            Defaults to False.

        show_action_costs (bool, optional): Whether to show the costs assigned
            to each specific action.
            Defaults to False.

        is_correctness_metric (bool, optional): if True, the metric is considered
            to quantify utility, i.e. the greater it is for a group, the
            more beneficial it is for the individuals of the group.
            Defaults to False.

    Returns:
        list(tuple(dict(str, str), float)): the most biased groups as a list \
            of pairs. In each pair, the first element is the group description \
            as a dict. The second element is the value of the chosen unfairness \
            metric for this group.
    """
    detector = FACTS(
        clf=clf,
        prot_attr=prot_attr,
        categorical_features=categorical_features,
        freq_itemset_min_supp=freq_itemset_min_supp,
        feature_weights=feature_weights, # type: ignore
        feats_allowed_to_change=feats_allowed_to_change,
        feats_not_allowed_to_change=feats_not_allowed_to_change,
    )

    detector = detector.fit(X=X, verbose=verbose)

    detector.bias_scan(
        metric=metric,
        viewpoint=viewpoint,
        sort_strategy=sort_strategy,
        top_count=top_count,
        phi=phi,
        c=c,
    )

    if print_recourse_report:
        detector.print_recourse_report(
            show_subgroup_costs=show_subgroup_costs,
            show_action_costs=show_action_costs,
            correctness_metric=is_correctness_metric,
        )

    if detector.subgroup_costs is None:
        assert detector.unfairness is not None
        scores = detector.unfairness
    else:
        scores = {sg: max(costs.values()) - min(costs.values()) for sg, costs in detector.subgroup_costs.items()}

    most_biased_subgroups = [(sg.to_dict(), score) for sg, score in scores.items() if sg in detector.top_rules.keys()]
    return most_biased_subgroups



[docs]
class FACTS(BaseEstimator):
    """Fairness aware counterfactuals for subgroups (FACTS) detector.

    FACTS is an efficient, model-agnostic, highly parameterizable, and
    explainable framework for evaluating subgroup fairness through
    counterfactual explanations [#FACTS23]_.

    This class is a wrapper for the various methods exposed by the
    FACTS framework.

    References:
        .. [#FACTS23] `L. Kavouras, K. Tsopelas, G. Giannopoulos,
           D. Sacharidis, E. Psaroudaki, N. Theologitis, D. Rontogiannis,
           D. Fotakis, I. Emiris, "Fairness Aware Counterfactuals for
           Subgroups", arXiv preprint, 2023.
           <https://arxiv.org/abs/2306.14978>`_
    """

    def __init__(
        self,
        clf,
        prot_attr,
        categorical_features=None,
        freq_itemset_min_supp=0.1,
        feature_weights=defaultdict(lambda : 1),
        feats_allowed_to_change=None,
        feats_not_allowed_to_change=None,
    ):
        """
        Args:
            clf (sklearn.base.BaseEstimator): A trained and ready to use
                classifier, implementing method `predict(X)`, where `X` is
                the matrix of features; predictions returned by `predict(X)`
                are either 0 or 1. In other words, fitted scikit-learn
                classifiers.
            prot_attr (str): the name of the column that represents the
                protected attribute.
            categorical_features (list(str), optional): the list of categorical
                features. The default is to choose (dynamically, inside `fit`) the
                columns of the dataset with types "object" or "category".
            freq_itemset_min_supp (float, optional): minimum support for all the runs
                of the frequent itemset mining algorithm (specifically, `FP Growth <https://en.wikipedia.org/wiki/Association_rule_learning#FP-growth_algorithm>`_).
                We mine frequent itemsets to generate candidate subpopulation groups and candidate actions.
                For more information, see paper [#FACTS23]_.
                Defaults to 10%.
            feature_weights (dict(str, float), optional): the weights for each feature. Used in the calculation
                of the cost of a suggested change. Specifically, the term corresponding to each feature is
                multiplied by this weight.
                Defaults to 1, for all features.
            feats_allowed_to_change (list(str), optional): if provided, only
                allows these features to change value in the suggested recourses.
                Default: no frozen features.
                *Note*: providing both `feats_allowed_to_change` and
                `feats_not_allowed_to_change` is currently treated as an error.
            feats_not_allowed_to_change (list(str), optional): if provided,
                prevents these features from changing at all in any given
                recourse.
                Default: no frozen features.
                *Note*: providing both `feats_allowed_to_change` and
                `feats_not_allowed_to_change` is currently treated as an error.
        """
        self.clf = clf
        self.prot_attr = prot_attr
        self.freq_itemset_min_supp = freq_itemset_min_supp
        self.categorical_features = categorical_features
        self.feature_weights = feature_weights
        self.feats_allowed_to_change = feats_allowed_to_change
        self.feats_not_allowed_to_change = feats_not_allowed_to_change


[docs]
    def fit(self, X: DataFrame, verbose: bool = True):
        """Calculates subpopulation groups, actions and respective effectiveness

        Args:
            X (DataFrame): Dataset given as a :class:`pandas.DataFrame`. As in
                standard scikit-learn convention, it is expected to contain one
                instance per row and one feature / explanatory variable per
                column (labels not needed, we already have an ML model).
            verbose (bool): whether to print intermediate messages and progress bar. Defaults to True.

        Raises:
            ValueError: `feats_allowed_to_change` and `feats_not_allowed_to_change`
                cannot be given simultaneously.
            Exception: when unreachable code is executed.

        Returns:
            FACTS: Returns self.
        """
        if self.categorical_features is None:
            self.categorical_features = X.select_dtypes(include=["object", "category"]).columns.to_list()
        all_feats = X.columns.tolist()
        if self.feats_allowed_to_change is not None and self.feats_not_allowed_to_change is not None:
            raise ValueError("Please specify only feats_allowed_to_change or feats_not_allowed_to_change, not both.")
        elif self.feats_allowed_to_change is None and self.feats_not_allowed_to_change is None:
            feats_not_allowed_to_change = set()
        elif self.feats_allowed_to_change is not None:
            feats_not_allowed_to_change = set(all_feats) - set(self.feats_allowed_to_change)
        elif self.feats_not_allowed_to_change is not None:
            feats_not_allowed_to_change = set(self.feats_not_allowed_to_change)
        else:
            raise Exception("Code should be unreachable.")

        num_features = list(set(X.columns) - set(self.categorical_features))
        comparators = feature_change_builder(
            X=X,
            num_cols=num_features,
            cate_cols=self.categorical_features,
            ord_cols=[],
            feature_weights=self.feature_weights,
            num_normalization=False,
        )
        params = ParameterProxy(featureChanges=comparators)

        ifthens_coverage_correctness = valid_ifthens(
            X=X,
            model=self.clf,
            sensitive_attribute=self.prot_attr,
            freqitem_minsupp=self.freq_itemset_min_supp,
            drop_infeasible=False,
            feats_not_allowed_to_change=list(feats_not_allowed_to_change),
            verbose=verbose,
        )

        rules_by_if = rules2rulesbyif(ifthens_coverage_correctness)

        if verbose:
            print("Computing percentages of individuals flipped by any action with cost up to c, for every c", flush=True)
        self.rules_with_cumulative = cum_corr_costs_all(
            rulesbyif=rules_by_if,
            X=X,
            model=self.clf,
            sensitive_attribute=self.prot_attr,
            params=params,
            verbose=verbose,
        )
        self.rules_by_if = calc_costs(rules_by_if, params=params)

        self.dataset = X.copy(deep=True)

        return self



[docs]
    def bias_scan(
        self,
        metric: str = "equal-effectiveness",
        viewpoint: str = "macro",
        sort_strategy: str = "max-cost-diff-decr",
        top_count: int = 10,
        filter_sequence: List[str] = [],
        phi: float = 0.5,
        c: float = 0.5
    ):
        """Examines generated subgroups and calculates the `top_count` most
        unfair ones, with respect to the chosen metric.

        Stores the final groups in instance variable `self.top_rules` and the
        respective subgroup costs in `self.subgroup_costs` (or `self.unfairness`
        for the "fair-tradeoff" metric).

        Args:
            metric (str, optional): one of the following choices

                - "equal-effectiveness"
                - "equal-choice-for-recourse"
                - "equal-effectiveness-within-budget"
                - "equal-cost-of-effectiveness"
                - "equal-mean-recourse"
                - "fair-tradeoff"

                Defaults to "equal-effectiveness".

                For explanation of each of those metrics, refer either to the
                paper [#FACTS23]_ or the demo_FACTS notebook.

            viewpoint (str, optional): "macro" or "micro". Refers to the
                notions of "macro viewpoint" and "micro viewpoint" defined
                in section 2.2 of the paper [#FACTS23]_.

                As a short explanation, consider a set of actions A and a
                subgroup (cohort / set of individuals) G. Metrics with the
                macro viewpoint interpretation are constrained to always apply
                one action from A to the entire G, while metrics with the micro
                interpretation are allowed to give each individual in G the
                min-cost action from A which changes the individual's class.

                Note that not all combinations of `metric` and `viewpoint` are
                valid, e.g. "Equal Choice for Recourse" only has a macro
                interpretation.

                Defaults to "macro".

            sort_strategy (str, optional): one of the following choices

                - `"max-cost-diff-decr"`: simply rank the groups in descending \
                    order according to the unfairness metric.
                - `"max-cost-diff-decr-ignore-forall-subgroups-empty"`: ignore \
                    groups for which we have no available actions whatsoever.
                - `"max-cost-diff-decr-ignore-exists-subgroup-empty"`: ignore \
                    groups for which at least one protected subgroup has \
                    no available actions.

                Defaults to "max-cost-diff-decr".

            top_count (int, optional): the number of subpopulation groups that
                the algorithm will keep.
                Defaults to 10.

            filter_sequence (List[str], optional): List of various filters
                applied on the groups and / or actions. Available filters are:

                - `"remove-contained"`: does not show groups which are subsumed \
                    by other shown groups. By "subsumed" we mean that the group \
                    is defined by extra feature values, but those values are \
                    not changed by any action.
                - `"remove-below-thr-corr"`: does not show actions which are \
                    below the given effectiveness threshold. Refer also to the \
                    documentation of parameter `phi` below.
                - `"remove-above-thr-cost"`: does not show action that cost more \
                    than the given cost budget. Refer also to the documentation \
                    of parameter `c` below.
                - `"keep-rules-until-thr-corr-reached"`:
                - `"remove-fair-rules"`: do not show groups which do not exhibit \
                    bias.
                - `"keep-only-min-change"`: for each group shown, show only the \
                    suggested actions that have minimum cost, ignore the others.

                Defaults to [].

            phi (float, optional): effectiveness threshold. Real number in [0, 1].
                Applicable for "equal-choice-for-recourse" and
                "equal-cost-of-effectiveness" metrics. For these two metrics, an
                action is considered to achieve recourse for a subpopulation group
                if at least `phi` % of the group's individuals achieve recourse.
                Defaults to 0.5.

            c (float, optional): cost budget. Real number. Applicable for
                "equal-effectiveness-within-budget" metric. Specifies the maximum
                cost that can be payed for an action (by the individual, by a
                central authority etc.)
                Defaults to 0.5.
        """
        self._metric = metric
        if viewpoint == "macro":
            rules = self.rules_by_if
        elif viewpoint == "micro":
            rules = self.rules_with_cumulative
        else:
            raise ValueError("viewpoint parameter can be either 'macro' or 'micro'")
        rules = self.rules_by_if if viewpoint == "macro" else self.rules_with_cumulative

        if metric == "fair-tradeoff":
            preds_Xtest = self.clf.predict(self.dataset)
            pop_sizes = {
                sg: ((self.dataset[self.prot_attr] == sg) & (preds_Xtest == 0)).sum()
                for sg in self.dataset[self.prot_attr].unique()
            }
            self.top_rules, self.unfairness = select_rules_subset_KStest(
                rulesbyif=rules,
                affected_population_sizes=pop_sizes,
                top_count=top_count
            )
            self.subgroup_costs = None
        else:
            self.top_rules, self.subgroup_costs = select_rules_subset(
                rulesbyif=rules,
                metric=metric,
                sort_strategy=sort_strategy,
                top_count=top_count,
                filter_sequence=filter_sequence,
                cor_threshold=phi,
                cost_threshold=c
            )
            self.unfairness = None



[docs]
    def print_recourse_report(
        self,
        population_sizes=None,
        missing_subgroup_val="N/A",
        show_subgroup_costs=False,
        show_action_costs=False,
        show_cumulative_plots=False,
        show_bias=None,
        show_unbiased_subgroups=True,
        correctness_metric=False,
    ):
        """Prints a nicely formatted report of the results (subpopulation groups
        and recourses) discovered by the `bias_scan` method.

        Args:
            population_sizes (dict(str, int), optional): Number of individuals that
                are given the negative prediction by the model, for each subgroup.
                If given, it is included in the report together with some
                coverage percentages.
            missing_subgroup_val (str, optional): Optionally specify a value of the
                protected attribute which denotes that it is missing and should not be
                included in the printed results.
                Defaults to "N/A".
            show_subgroup_costs (bool, optional): Whether to show the costs assigned
                to each protected subgroup.
                Defaults to False.
            show_action_costs (bool, optional): Whether to show the costs assigned
                to each specific action.
                Defaults to False.
            show_cumulative_plots (bool, optional): If true, shows, for each subgroup,
                a graph of the `effectiveness cumulative distribution`, as it is
                called in [#FACTS23]_.
                Defaults to False.
            show_bias (str, optional): Specify which value of the protected
                attribute corresponds to the subgroup against which we want to find
                unfairness. Mainly useful for when the protected attribute is not
                binary (e.g. race).
                Defaults to None.
            correctness_metric (bool, optional): if True, the metric is considered
                to quantify utility, i.e. the greater it is for a group, the
                more beneficial it is for the individuals of the group.
                Defaults to False.
            metric_name (str, optional): If given, it is added to the the printed
                message for unfairness in a subpopulation group, i.e. the method
                prints "Bias against females due to <metric_name>".

        Raises:
            RuntimeError: if costs for groups and subgroups are empty. Most
                likely the `bias_scan` method was not run.
        """
        if self.unfairness is not None:
            if not show_unbiased_subgroups:
                mock_subgroup_costs = {sg: {"dummy": unfairness} for sg, unfairness in self.unfairness.items()}
                rules_to_show = delete_fair_rules(self.top_rules, subgroup_costs=mock_subgroup_costs)
            else:
                rules_to_show = self.top_rules
            print_recourse_report_KStest_cumulative(
                rules_to_show,
                population_sizes=population_sizes,
                missing_subgroup_val=missing_subgroup_val,
                unfairness=self.unfairness,
                show_then_costs=show_action_costs,
                show_cumulative_plots=show_cumulative_plots,
            )
        elif self.subgroup_costs is not None:
            if not show_unbiased_subgroups:
                rules_to_show = delete_fair_rules(self.top_rules, subgroup_costs=self.subgroup_costs)
            else:
                rules_to_show = self.top_rules
            print_recourse_report(
                rules_to_show,
                population_sizes=population_sizes,
                missing_subgroup_val=missing_subgroup_val,
                subgroup_costs=self.subgroup_costs,
                show_subgroup_costs=show_subgroup_costs,
                show_then_costs=show_action_costs,
                show_cumulative_plots=show_cumulative_plots,
                show_bias=show_bias,
                correctness_metric=correctness_metric,
                metric_name=self._metric,
            )
        else:
            raise RuntimeError("Something went wrong. Either subgroup_costs or unfairness should exist. Did you call `bias_scan`?")
Source code for humancompatible.explain.facts.__init__

Source code for humancompatible.explain.facts.init