Source code for humancompatible.explain.facts.frequent_itemsets

import warnings
import numpy as np
import pandas as pd

from pandas import DataFrame
from typing import List, Tuple

from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import fpgrowth
## removes nasty mlxtend-caused warning: they set a global warning filter on
## Deprecation Warnings which then hits even when e.g. importing matplotlib.
if warnings.filters[0] == ("always", None, DeprecationWarning, None, 0):
    warnings.filters.pop(0)

from .predicate import Predicate


[docs] def preprocessDataset(data: DataFrame) -> DataFrame: """Preprocesses the input DataFrame by converting categorical columns to NumPy arrays and mapping each cell value with its column name. Args: data (DataFrame): The input DataFrame to be preprocessed. Returns: DataFrame: The preprocessed DataFrame. Raises: None """ d = data.copy() for col in d: if isinstance(d[col].dtype, pd.CategoricalDtype): d[col] = np.asarray(d[col]) d[col] = d[col].map(lambda x: (col, x)) return d
[docs] def fpgrowth_out_to_predicate_list( fpgrowth_out: DataFrame, ) -> Tuple[List[Predicate], List[float]]: """Converts the output of the FP Growth algorithm stored in a DataFrame to a list of Predicate objects and their corresponding support values. Args: fpgrowth_out (DataFrame): The DataFrame containing the output of the FP Growth algorithm. Returns: Tuple[List[Predicate], List[float]]: A tuple containing the list of Predicate objects and the list of corresponding support values. Raises: None Examples: >>> freq_itemsets = run_fpgrowth(preprocessDataset(df), min_support=0.03) >>> predicate_list = fpgrowth_out_to_predicate_list(freq_itemsets) """ predicate_set = [] for itemset in fpgrowth_out["itemsets"].to_numpy(): pred = {feature: value for feature, value in list(itemset)} pred = Predicate.from_dict(pred) predicate_set.append(pred) return predicate_set, fpgrowth_out["support"].to_numpy().tolist()
[docs] def run_fpgrowth(data: DataFrame, min_support: float = 0.001) -> DataFrame: """Runs the FP Growth algorithm on the input DataFrame to find frequent itemsets. Args: data (DataFrame): The input DataFrame. min_support (float, optional): The minimum support threshold for itemsets. Defaults to 0.001, i.e 0.1%. Returns: DataFrame: The DataFrame containing frequent itemsets sorted by support in descending order. Raises: None Examples: >>> freq_itemsets = runFPGrowth(preprocessDataset(df), min_support=0.03) """ sets = data.to_numpy().tolist() te = TransactionEncoder() sets_onehot = te.fit_transform(sets) df = DataFrame(sets_onehot, columns=te.columns_) # type: ignore frequent_itemsets = fpgrowth(df, min_support=min_support, use_colnames=True) return frequent_itemsets.sort_values(["support"], ascending=[False])