import numpy as np
import pandas as pd
import math
import os
from sklearn.metrics import pairwise
[docs]
def evaluate_runs(args_eval, qids, eval_path, runs, split):
"""
Evaluate the runs by calculating metrics and saving the results in CSV files.
:param args_eval : dict
A dictionary containing evaluation configurations, including metrics and other settings.
:param qids : list
A list of unique query identifiers (QIDs) to evaluate.
:param eval_path : str
Path where evaluation results will be saved.
:param runs : int
Number of runs to evaluate.
:param split : str
The data split to evaluate (e.g., "train", "test").
:return : None
This function saves the evaluation results as CSV files.
"""
for qid in qids:
for eval_measure in args_eval['metrics']:
output_f = os.path.join(eval_path, "Eval_QID_" + str(qid) + '_' + eval_measure + ".csv")
if not os.path.exists(output_f):
res_all = []
for r in range(runs):
file_name = 'Eval_QID_' + str(qid) + '_' + eval_measure + '.csv'
path = os.path.join(eval_path, str(r), split, eval_measure, file_name)
if os.path.exists(path):
df = pd.read_csv(path)
res_all.append(df)
res_all = pd.concat(res_all)
res_all = res_all.groupby(['rank', 'group', 'k']).mean().reset_index()
res_all.to_csv(output_f)
print("--- Save eval file in ", output_f, " --- \n")
[docs]
def evaluate(data, query_col, s_attribute, eval_path, args_eval):
"""
Evaluate the given data by calculating metrics for each query and sensitive attribute.
:param data : pandas.DataFrame
The data containing the query identifiers and sensitive attribute.
:param query_col : str
The column name for the query identifiers in the data.
:param s_attribute : str
The sensitive attribute column name.
:param eval_path : str
Path where evaluation results will be saved.
:param args_eval : dict
A dictionary containing evaluation configurations, including metrics and other settings.
:return : None
This function saves the evaluation results as CSV files.
"""
if not os.path.exists(eval_path):
os.makedirs(eval_path)
qids = data[query_col].unique()
groups = data[s_attribute].unique()
for eval_measure in args_eval['metrics']:
if not os.path.exists(os.path.join(eval_path, eval_measure)):
os.makedirs(os.path.join(eval_path, eval_measure))
for qid in qids:
data_qid = data[data[query_col] == qid]
res_qid = evaluate_qid(data_qid, eval_measure, s_attribute, groups, args_eval)
output_f = os.path.join(eval_path, eval_measure, "Eval_QID_" + str(qid) + "_" + eval_measure + ".csv")
res_qid.to_csv(output_f)
print("--- Save eval file in ", output_f, " --- \n")
[docs]
def evaluate_qid(df, eval_measure, s_attribute, sensitive_groups, args_eval):
"""
Evaluate the data for a single query ID, calculating the specified metrics.
:param df : pandas.DataFrame
The data for a single query ID.
:param eval_measure : str
The evaluation metric to calculate.
:param s_attribute : str
The sensitive attribute column name.
:param sensitive_groups : list
A list of sensitive attribute groups.
:param args_eval : dict
A dictionary containing evaluation configurations, including rankings and k list.
:return : pandas.DataFrame
A DataFrame containing the evaluation results.
"""
EVAL_RANKINGS = args_eval['rankings']
seti_quotas = get_quotas_count(df, s_attribute, sensitive_groups=sensitive_groups)
res_df = pd.DataFrame(columns=["run", "rank", "k", "group", eval_measure])
k_list = args_eval['k_list']
for ranking in EVAL_RANKINGS:
# data sorted by ranking
ranking_df = get_sort_df(ranking, df, len(df))
for ki in k_list:
ki = int(ki)
res_row = [1, ranking, ki]
all_row = res_row + ["all"]
# data sorted by ranking value at top-k
top_ranking = ranking_df.head(ki)
if 'individual' in eval_measure:
if "__" in ranking:
yNN = compute_individual_fairness(ranking_df, ranking)
all_row.append(yNN)
else:
# not applicable on the output of an LTR model
all_row.append(-1)
if eval_measure == "select_rate":
# applicable per group
all_row.append(-1)
if eval_measure == "diversity":
# applicable per group
all_row.append(-1)
if eval_measure == "exposure":
# applicable per group
all_row.append(-1)
if eval_measure == "igf":
# applicable per group
all_row.append(-1)
res_df.loc[res_df.shape[0]] = all_row
# group-level evaluation
cur_quotas = get_quotas_count(top_ranking, s_attribute, sensitive_groups)
for gi in sensitive_groups:
gi_row = res_row + [gi]
if eval_measure == "select_rate":
# selection rate to rank inside top-k
if gi in cur_quotas and seti_quotas[gi] != 0:
if seti_quotas[gi] < 1 / ki: # at least one candidate in top-k
seti_quotas[gi] = 1 / ki
gi_row.append(cur_quotas[gi] / seti_quotas[gi])
else:
gi_row.append(0)
if eval_measure == "diversity":
if gi in cur_quotas:
gi_row.append(cur_quotas[gi])
else:
gi_row.append(0)
gi_top_ranking = top_ranking[top_ranking[s_attribute] == gi]
gi_ranking_df = ranking_df[ranking_df[s_attribute] == gi]
if eval_measure == "exposure":
gi_row.append(compute_cumulative_exposer(gi_top_ranking, ki))
if eval_measure == "igf":
if gi_ranking_df is not None:
if not gi_top_ranking.shape[0]:
gi_row.append(-1)
else:
gi_row.append(compute_igf_ratio(list(gi_top_ranking["UID"]), gi_ranking_df, ranking))
if 'individual' in eval_measure:
# not applicable to group
gi_row.append(-1)
res_df.loc[res_df.shape[0]] = gi_row
return res_df
[docs]
def compute_individual_fairness(data, ranking):
"""
Compute the individual fairness score for the ranking.
:param data : pandas.DataFrame
The data to evaluate fairness.
:param ranking : str
The ranking column to use for evaluation.
:return : float
The individual fairness score between 0 and 1.
"""
feature_columns = [col for col in data if 'X' in col and '_' not in col]
distances_data = pairwise.euclidean_distances(data[feature_columns].to_numpy(),
data[feature_columns].to_numpy())
exposers = data[ranking].apply(lambda x: 1 / math.log2(x + 1))
distances_exposer = pairwise.euclidean_distances(exposers.to_numpy().reshape(-1, 1),
exposers.to_numpy().reshape(-1, 1))
yNN = 1 - np.mean(np.abs(distances_data - distances_exposer))
return yNN
[docs]
def compute_cumulative_exposer(df_top, ki):
"""
Compute the cumulative exposure for the top-k items in the ranking.
:param df_top : pandas.DataFrame
The top-k items in the ranking.
:param ki : int
The number of top-k items.
:return : float
The cumulative exposure score.
"""
if len(df_top) < ki:
df_top["rank"] = list(range(1, len(df_top) + 1))
else:
df_top["rank"] = list(range(1, ki + 1))
if len(df_top) == 0:
return -1
exposer_top_k = sum(df_top['rank'].apply(lambda x: 1 / math.log2(x + 1)))
return exposer_top_k
[docs]
def compute_igf_ratio(top_k_IDS, _orig_df, _orig_sort_col):
"""
Compute the IGF (Item Group Fairness) ratio for the top-k items.
:param top_k_IDS : list
A list of IDs representing the top-k items.
:param _orig_df : pandas.DataFrame
The original DataFrame containing all items.
:param _orig_sort_col : str
The column name to sort the original DataFrame by.
:return : float
The IGF ratio.
"""
accepted_candidates = _orig_df["UID"].isin(top_k_IDS)
_lowest_accepted_score = min(_orig_df[accepted_candidates][_orig_sort_col])
rejected_candidates = ~accepted_candidates
if sum(rejected_candidates) == 0:
return 1
_highest_rejected_score = max(_orig_df[rejected_candidates][_orig_sort_col])
if _highest_rejected_score == 0:
return 1
cur_res = min(_orig_df[_orig_df["UID"].isin(top_k_IDS)][_orig_sort_col]) / max(
_orig_df[~_orig_df["UID"].isin(top_k_IDS)][_orig_sort_col])
if cur_res > 1:
return 1
else:
return cur_res
[docs]
def get_quotas_count(_df, s_attribute, sensitive_groups):
"""
Calculate the quota count for each sensitive group in the data.
:param _df : pandas.DataFrame
The data to calculate the quotas for.
:param s_attribute : str
The sensitive attribute column name.
:param sensitive_groups : list
A list of sensitive attribute groups.
:return : dict
A dictionary with the sensitive group names as keys and their corresponding quota counts as values.
"""
res_dict = {}
for s in sensitive_groups:
mask = _df[s_attribute] == s
res_dict[s] = sum(mask) / len(_df)
return res_dict
[docs]
def get_sort_df(_sort_col, _df, _k):
"""
Sort the DataFrame by the specified column and return the top-k rows.
:param _sort_col : str
The column to sort the data by.
:param _df : pandas.DataFrame
The DataFrame to be sorted.
:param _k : int
The number of top items to return.
:return : pandas.DataFrame
The top-k sorted rows from the DataFrame.
"""
_df[_sort_col] = _df[_sort_col].apply(lambda x: float(x))
_k = int(_k)
if "__" in _sort_col:
sort_df = _df.sort_values(by=_sort_col, ascending=True).head(_k)
else:
sort_df = _df.sort_values(by=_sort_col, ascending=False).head(_k)
return sort_df