Source code for humancompatible.explain.glance.utils.action

from typing import List, Any, Optional

import numpy as np
import numpy.typing as npt
import pandas as pd


[docs] def apply_action_pandas( X: pd.DataFrame, action: pd.Series, numerical_columns: List[str], categorical_columns: List[str], categorical_no_action_token: Any, numerical_no_action_token: Optional[Any] = None, ) -> pd.DataFrame: """Apply `action` to all rows of `X`. For numerical columns, add the respective component from `action`. For categorical columns, set the component of all rows to the value of `action`, unless it is equal to the `categorical_no_action_token`, in which case do nothing for this column. Args: X (pd.DataFrame): matrix of observations action (pd.Series): for each column / feature, the action to be applied numerical_columns (List[str]): numerical column names categorical_columns (List[str]): categorical column names categorical_no_action_token (Any): special value signifying no-action (i.e. equivalent to 0 for numerical columns) Returns: pd.DataFrame: new observations resulting from the action application. """ assert (X.columns == action.index).all() if numerical_no_action_token is None: numerical_no_action_token = categorical_no_action_token ret = X.copy(deep=True) for col in numerical_columns: if action[col] != numerical_no_action_token: ret[col] = X[col] + action[col] for col in categorical_columns: if action[col] != categorical_no_action_token: ret[col] = action[col] ret = ret.astype(X.dtypes) return ret
[docs] def apply_action_numpy( X: npt.NDArray[np.number], action: npt.NDArray[np.number], numerical_columns: List[int], categorical_columns: List[int], categorical_no_action_token: np.number, ) -> npt.NDArray[np.number]: """Apply `action` to all rows of `X`. For numerical columns, add the respective component from `action`. For categorical columns, set the component of all rows to the value of `action`, unless it is equal to the `categorical_no_action_token`, in which case do nothing for this column. Note: input array should have a numeric dtype. Thus, categorical columns should be encoded by numbers (e.g. Ordinal Encoding). Args: X (npt.NDArray[np.number]): matrix of observations action (npt.NDArray[np.number]): for each column / feature, the action to be applied numerical_columns (List[int]): numerical column indices categorical_columns (List[int]): categorical column indices categorical_no_action_token (np.number): special value signifying no-action (i.e. equivalent to 0 for numerical columns) Returns: npt.NDArray[np.number]: new observations resulting from the action application. """ assert len(X.shape) == 2 assert len(action.shape) == 1 assert ( X.shape[1] == action.shape[0] ), "action should have length equal to the number of columns" ret = X.copy() ret[:, numerical_columns] += action[numerical_columns] categorical_columns_masked = np.intersect1d( np.where(action != categorical_no_action_token)[0], categorical_columns ) ret[:, categorical_columns_masked] = action[categorical_columns_masked] return ret
[docs] def extract_actions_pandas( X: pd.DataFrame, cfs: pd.DataFrame, categorical_features: List[str], numerical_features: List[str], categorical_no_action_token: Any, ): """ Extracts the actions needed to convert the original dataset `X` into the counterfactual dataset `cfs`. For categorical features, the function identifies changes between `X` and `cfs`. If no change is observed in a categorical feature, a specified `categorical_no_action_token` is used to denote that no action is needed. For numerical features, the function computes the difference between the counterfactual and the original values. Parameters: ---------- X : pd.DataFrame The original dataset, where each row represents an instance, and each column is a feature. cfs : pd.DataFrame The counterfactual dataset, which has the same structure as `X`. It represents the desired state after some action is applied. categorical_features : List[str] List of columns in `X` and `cfs` that are categorical. numerical_features : List[str] List of columns in `X` and `cfs` that are numerical. categorical_no_action_token : Any A token or value to insert into categorical features where no change is needed (i.e., the feature value in `X` is the same as in `cfs`). Returns: ------- pd.DataFrame A DataFrame of the same shape as `X` and `cfs` where each value indicates the action required to transform `X` into `cfs`: - For categorical features: the value in `cfs` if it differs from `X`, otherwise `categorical_no_action_token`. - For numerical features: the difference between `cfs` and `X`. """ actions = X.copy(deep=True) for col in categorical_features: are_equal_indicator = X[col] == cfs[col] actions.loc[are_equal_indicator, col] = categorical_no_action_token actions.loc[~are_equal_indicator, col] = cfs.loc[~are_equal_indicator, col] for col in numerical_features: actions[col] = cfs[col] - X[col] return actions
[docs] def apply_actions_pandas_rows( X: pd.DataFrame, actions: pd.DataFrame, numerical_columns: List[str], categorical_columns: List[str], categorical_no_action_token: object, ) -> pd.DataFrame: """ Applies a set of actions to transform the original dataset `X` based on the actions specified in the `actions` DataFrame. For numerical columns, the function adds the values from the `actions` DataFrame to the corresponding columns in `X`. For categorical columns, if the action for a column is not equal to the `categorical_no_action_token`, the value from the `actions` DataFrame is used to update `X`. Otherwise, the original value from `X` is retained. Parameters: ---------- X : pd.DataFrame The original dataset, where each row represents an instance, and each column is a feature. actions : pd.DataFrame A DataFrame of the same shape as `X`, containing the actions to apply to each feature. - For numerical columns: contains the values to add to the corresponding features in `X`. - For categorical columns: contains either the new value to apply or the `categorical_no_action_token`. numerical_columns : List[str] List of columns in `X` and `actions` that are numerical. categorical_columns : List[str] List of columns in `X` and `actions` that are categorical. categorical_no_action_token : object A token or value indicating that no action should be taken for a categorical feature. Returns: ------- pd.DataFrame A DataFrame of the same shape as `X` where the actions have been applied: - For numerical columns: each value is updated by adding the corresponding action from `actions`. - For categorical columns: updated values from `actions` are used where applicable; otherwise, the original values from `X` are retained. """ ret = X.copy(deep=True) for col in numerical_columns: ret[col] = X[col] + actions[col] for col in categorical_columns: no_action_indicator = actions[col] == categorical_no_action_token ret.loc[~ no_action_indicator, col] = actions.loc[~ no_action_indicator, col].values ret.loc[no_action_indicator, col] = X.loc[no_action_indicator, col].values return ret
[docs] def actions_mean_pandas( actions: pd.DataFrame, numerical_features: List[str], categorical_features: List[str], categorical_no_action_token: Any, ) -> pd.Series: """ Computes the mean action for numerical features and the most frequent action for categorical features from a given actions DataFrame. For numerical features, the function calculates the mean of the actions across all instances. For categorical features, it determines the most frequent value in the `actions` DataFrame, unless all values are equal to the `categorical_no_action_token`, in which case the token is returned. Parameters: ---------- actions : pd.DataFrame A DataFrame where each row represents an instance, and each column represents an action for a feature (either numerical or categorical). numerical_features : List[str] List of columns in `actions` that are numerical features. categorical_features : List[str] List of columns in `actions` that are categorical features. categorical_no_action_token : Any A token or value that indicates no action is needed for categorical features. Returns: ------- pd.Series A Series where: - For numerical features, the values are the mean of the actions for each numerical column. - For categorical features, the values are the most frequent action in each categorical column, or the `categorical_no_action_token` if no action was needed. """ ret = pd.Series(index=actions.columns, dtype="object") ret[numerical_features] = actions[numerical_features].mean() for col in categorical_features: if (actions[col] == categorical_no_action_token).all(): ret[col] = categorical_no_action_token else: value_cnts = actions[col].value_counts() most_freq = ( value_cnts.index[0] if value_cnts.index[0] != categorical_features else value_cnts.index[1] ) ret[col] = most_freq return ret