Source code for fairdiverse.search.fairness_evaluator

import numpy as np
import pandas as pd
import math
import os
from sklearn.metrics import pairwise


[docs] def evaluate_runs(args_eval, qids, eval_path, runs, split): """ Evaluate the runs by calculating metrics and saving the results in CSV files. :param args_eval : dict A dictionary containing evaluation configurations, including metrics and other settings. :param qids : list A list of unique query identifiers (QIDs) to evaluate. :param eval_path : str Path where evaluation results will be saved. :param runs : int Number of runs to evaluate. :param split : str The data split to evaluate (e.g., "train", "test"). :return : None This function saves the evaluation results as CSV files. """ for qid in qids: for eval_measure in args_eval['metrics']: output_f = os.path.join(eval_path, "Eval_QID_" + str(qid) + '_' + eval_measure + ".csv") if not os.path.exists(output_f): res_all = [] for r in range(runs): file_name = 'Eval_QID_' + str(qid) + '_' + eval_measure + '.csv' path = os.path.join(eval_path, str(r), split, eval_measure, file_name) if os.path.exists(path): df = pd.read_csv(path) res_all.append(df) res_all = pd.concat(res_all) res_all = res_all.groupby(['rank', 'group', 'k']).mean().reset_index() res_all.to_csv(output_f) print("--- Save eval file in ", output_f, " --- \n")
[docs] def evaluate(data, query_col, s_attribute, eval_path, args_eval): """ Evaluate the given data by calculating metrics for each query and sensitive attribute. :param data : pandas.DataFrame The data containing the query identifiers and sensitive attribute. :param query_col : str The column name for the query identifiers in the data. :param s_attribute : str The sensitive attribute column name. :param eval_path : str Path where evaluation results will be saved. :param args_eval : dict A dictionary containing evaluation configurations, including metrics and other settings. :return : None This function saves the evaluation results as CSV files. """ if not os.path.exists(eval_path): os.makedirs(eval_path) qids = data[query_col].unique() groups = data[s_attribute].unique() for eval_measure in args_eval['metrics']: if not os.path.exists(os.path.join(eval_path, eval_measure)): os.makedirs(os.path.join(eval_path, eval_measure)) for qid in qids: data_qid = data[data[query_col] == qid] res_qid = evaluate_qid(data_qid, eval_measure, s_attribute, groups, args_eval) output_f = os.path.join(eval_path, eval_measure, "Eval_QID_" + str(qid) + "_" + eval_measure + ".csv") res_qid.to_csv(output_f) print("--- Save eval file in ", output_f, " --- \n")
[docs] def evaluate_qid(df, eval_measure, s_attribute, sensitive_groups, args_eval): """ Evaluate the data for a single query ID, calculating the specified metrics. :param df : pandas.DataFrame The data for a single query ID. :param eval_measure : str The evaluation metric to calculate. :param s_attribute : str The sensitive attribute column name. :param sensitive_groups : list A list of sensitive attribute groups. :param args_eval : dict A dictionary containing evaluation configurations, including rankings and k list. :return : pandas.DataFrame A DataFrame containing the evaluation results. """ EVAL_RANKINGS = args_eval['rankings'] seti_quotas = get_quotas_count(df, s_attribute, sensitive_groups=sensitive_groups) res_df = pd.DataFrame(columns=["run", "rank", "k", "group", eval_measure]) k_list = args_eval['k_list'] for ranking in EVAL_RANKINGS: # data sorted by ranking ranking_df = get_sort_df(ranking, df, len(df)) for ki in k_list: ki = int(ki) res_row = [1, ranking, ki] all_row = res_row + ["all"] # data sorted by ranking value at top-k top_ranking = ranking_df.head(ki) if 'individual' in eval_measure: if "__" in ranking: yNN = compute_individual_fairness(ranking_df, ranking) all_row.append(yNN) else: # not applicable on the output of an LTR model all_row.append(-1) if eval_measure == "select_rate": # applicable per group all_row.append(-1) if eval_measure == "diversity": # applicable per group all_row.append(-1) if eval_measure == "exposure": # applicable per group all_row.append(-1) if eval_measure == "igf": # applicable per group all_row.append(-1) res_df.loc[res_df.shape[0]] = all_row # group-level evaluation cur_quotas = get_quotas_count(top_ranking, s_attribute, sensitive_groups) for gi in sensitive_groups: gi_row = res_row + [gi] if eval_measure == "select_rate": # selection rate to rank inside top-k if gi in cur_quotas and seti_quotas[gi] != 0: if seti_quotas[gi] < 1 / ki: # at least one candidate in top-k seti_quotas[gi] = 1 / ki gi_row.append(cur_quotas[gi] / seti_quotas[gi]) else: gi_row.append(0) if eval_measure == "diversity": if gi in cur_quotas: gi_row.append(cur_quotas[gi]) else: gi_row.append(0) gi_top_ranking = top_ranking[top_ranking[s_attribute] == gi] gi_ranking_df = ranking_df[ranking_df[s_attribute] == gi] if eval_measure == "exposure": gi_row.append(compute_cumulative_exposer(gi_top_ranking, ki)) if eval_measure == "igf": if gi_ranking_df is not None: if not gi_top_ranking.shape[0]: gi_row.append(-1) else: gi_row.append(compute_igf_ratio(list(gi_top_ranking["UID"]), gi_ranking_df, ranking)) if 'individual' in eval_measure: # not applicable to group gi_row.append(-1) res_df.loc[res_df.shape[0]] = gi_row return res_df
[docs] def compute_individual_fairness(data, ranking): """ Compute the individual fairness score for the ranking. :param data : pandas.DataFrame The data to evaluate fairness. :param ranking : str The ranking column to use for evaluation. :return : float The individual fairness score between 0 and 1. """ feature_columns = [col for col in data if 'X' in col and '_' not in col] distances_data = pairwise.euclidean_distances(data[feature_columns].to_numpy(), data[feature_columns].to_numpy()) exposers = data[ranking].apply(lambda x: 1 / math.log2(x + 1)) distances_exposer = pairwise.euclidean_distances(exposers.to_numpy().reshape(-1, 1), exposers.to_numpy().reshape(-1, 1)) yNN = 1 - np.mean(np.abs(distances_data - distances_exposer)) return yNN
[docs] def compute_cumulative_exposer(df_top, ki): """ Compute the cumulative exposure for the top-k items in the ranking. :param df_top : pandas.DataFrame The top-k items in the ranking. :param ki : int The number of top-k items. :return : float The cumulative exposure score. """ if len(df_top) < ki: df_top["rank"] = list(range(1, len(df_top) + 1)) else: df_top["rank"] = list(range(1, ki + 1)) if len(df_top) == 0: return -1 exposer_top_k = sum(df_top['rank'].apply(lambda x: 1 / math.log2(x + 1))) return exposer_top_k
[docs] def compute_igf_ratio(top_k_IDS, _orig_df, _orig_sort_col): """ Compute the IGF (Item Group Fairness) ratio for the top-k items. :param top_k_IDS : list A list of IDs representing the top-k items. :param _orig_df : pandas.DataFrame The original DataFrame containing all items. :param _orig_sort_col : str The column name to sort the original DataFrame by. :return : float The IGF ratio. """ accepted_candidates = _orig_df["UID"].isin(top_k_IDS) _lowest_accepted_score = min(_orig_df[accepted_candidates][_orig_sort_col]) rejected_candidates = ~accepted_candidates if sum(rejected_candidates) == 0: return 1 _highest_rejected_score = max(_orig_df[rejected_candidates][_orig_sort_col]) if _highest_rejected_score == 0: return 1 cur_res = min(_orig_df[_orig_df["UID"].isin(top_k_IDS)][_orig_sort_col]) / max( _orig_df[~_orig_df["UID"].isin(top_k_IDS)][_orig_sort_col]) if cur_res > 1: return 1 else: return cur_res
[docs] def get_quotas_count(_df, s_attribute, sensitive_groups): """ Calculate the quota count for each sensitive group in the data. :param _df : pandas.DataFrame The data to calculate the quotas for. :param s_attribute : str The sensitive attribute column name. :param sensitive_groups : list A list of sensitive attribute groups. :return : dict A dictionary with the sensitive group names as keys and their corresponding quota counts as values. """ res_dict = {} for s in sensitive_groups: mask = _df[s_attribute] == s res_dict[s] = sum(mask) / len(_df) return res_dict
[docs] def get_sort_df(_sort_col, _df, _k): """ Sort the DataFrame by the specified column and return the top-k rows. :param _sort_col : str The column to sort the data by. :param _df : pandas.DataFrame The DataFrame to be sorted. :param _k : int The number of top items to return. :return : pandas.DataFrame The top-k sorted rows from the DataFrame. """ _df[_sort_col] = _df[_sort_col].apply(lambda x: float(x)) _k = int(_k) if "__" in _sort_col: sort_df = _df.sort_values(by=_sort_col, ascending=True).head(_k) else: sort_df = _df.sort_values(by=_sort_col, ascending=False).head(_k) return sort_df