[docs]defevaluate_runs(args_eval,qids,eval_path,runs,split):""" Evaluate the runs by calculating metrics and saving the results in CSV files. :param args_eval : dict A dictionary containing evaluation configurations, including metrics and other settings. :param qids : list A list of unique query identifiers (QIDs) to evaluate. :param eval_path : str Path where evaluation results will be saved. :param runs : int Number of runs to evaluate. :param split : str The data split to evaluate (e.g., "train", "test"). :return : None This function saves the evaluation results as CSV files. """forqidinqids:foreval_measureinargs_eval['metrics']:output_f=os.path.join(eval_path,"Eval_QID_"+str(qid)+'_'+eval_measure+".csv")ifnotos.path.exists(output_f):res_all=[]forrinrange(runs):file_name='Eval_QID_'+str(qid)+'_'+eval_measure+'.csv'path=os.path.join(eval_path,str(r),split,eval_measure,file_name)ifos.path.exists(path):df=pd.read_csv(path)res_all.append(df)res_all=pd.concat(res_all)res_all=res_all.groupby(['rank','group','k']).mean().reset_index()res_all.to_csv(output_f)print("--- Save eval file in ",output_f," --- \n")
[docs]defevaluate(data,query_col,s_attribute,eval_path,args_eval):""" Evaluate the given data by calculating metrics for each query and sensitive attribute. :param data : pandas.DataFrame The data containing the query identifiers and sensitive attribute. :param query_col : str The column name for the query identifiers in the data. :param s_attribute : str The sensitive attribute column name. :param eval_path : str Path where evaluation results will be saved. :param args_eval : dict A dictionary containing evaluation configurations, including metrics and other settings. :return : None This function saves the evaluation results as CSV files. """ifnotos.path.exists(eval_path):os.makedirs(eval_path)qids=data[query_col].unique()groups=data[s_attribute].unique()foreval_measureinargs_eval['metrics']:ifnotos.path.exists(os.path.join(eval_path,eval_measure)):os.makedirs(os.path.join(eval_path,eval_measure))forqidinqids:data_qid=data[data[query_col]==qid]res_qid=evaluate_qid(data_qid,eval_measure,s_attribute,groups,args_eval)output_f=os.path.join(eval_path,eval_measure,"Eval_QID_"+str(qid)+"_"+eval_measure+".csv")res_qid.to_csv(output_f)print("--- Save eval file in ",output_f," --- \n")
[docs]defevaluate_qid(df,eval_measure,s_attribute,sensitive_groups,args_eval):""" Evaluate the data for a single query ID, calculating the specified metrics. :param df : pandas.DataFrame The data for a single query ID. :param eval_measure : str The evaluation metric to calculate. :param s_attribute : str The sensitive attribute column name. :param sensitive_groups : list A list of sensitive attribute groups. :param args_eval : dict A dictionary containing evaluation configurations, including rankings and k list. :return : pandas.DataFrame A DataFrame containing the evaluation results. """EVAL_RANKINGS=args_eval['rankings']seti_quotas=get_quotas_count(df,s_attribute,sensitive_groups=sensitive_groups)res_df=pd.DataFrame(columns=["run","rank","k","group",eval_measure])k_list=args_eval['k_list']forrankinginEVAL_RANKINGS:# data sorted by rankingranking_df=get_sort_df(ranking,df,len(df))forkiink_list:ki=int(ki)res_row=[1,ranking,ki]all_row=res_row+["all"]# data sorted by ranking value at top-ktop_ranking=ranking_df.head(ki)if'individual'ineval_measure:if"__"inranking:yNN=compute_individual_fairness(ranking_df,ranking)all_row.append(yNN)else:# not applicable on the output of an LTR modelall_row.append(-1)ifeval_measure=="select_rate":# applicable per groupall_row.append(-1)ifeval_measure=="diversity":# applicable per groupall_row.append(-1)ifeval_measure=="exposure":# applicable per groupall_row.append(-1)ifeval_measure=="igf":# applicable per groupall_row.append(-1)res_df.loc[res_df.shape[0]]=all_row# group-level evaluationcur_quotas=get_quotas_count(top_ranking,s_attribute,sensitive_groups)forgiinsensitive_groups:gi_row=res_row+[gi]ifeval_measure=="select_rate":# selection rate to rank inside top-kifgiincur_quotasandseti_quotas[gi]!=0:ifseti_quotas[gi]<1/ki:# at least one candidate in top-kseti_quotas[gi]=1/kigi_row.append(cur_quotas[gi]/seti_quotas[gi])else:gi_row.append(0)ifeval_measure=="diversity":ifgiincur_quotas:gi_row.append(cur_quotas[gi])else:gi_row.append(0)gi_top_ranking=top_ranking[top_ranking[s_attribute]==gi]gi_ranking_df=ranking_df[ranking_df[s_attribute]==gi]ifeval_measure=="exposure":gi_row.append(compute_cumulative_exposer(gi_top_ranking,ki))ifeval_measure=="igf":ifgi_ranking_dfisnotNone:ifnotgi_top_ranking.shape[0]:gi_row.append(-1)else:gi_row.append(compute_igf_ratio(list(gi_top_ranking["UID"]),gi_ranking_df,ranking))if'individual'ineval_measure:# not applicable to groupgi_row.append(-1)res_df.loc[res_df.shape[0]]=gi_rowreturnres_df
[docs]defcompute_individual_fairness(data,ranking):""" Compute the individual fairness score for the ranking. :param data : pandas.DataFrame The data to evaluate fairness. :param ranking : str The ranking column to use for evaluation. :return : float The individual fairness score between 0 and 1. """feature_columns=[colforcolindataif'X'incoland'_'notincol]distances_data=pairwise.euclidean_distances(data[feature_columns].to_numpy(),data[feature_columns].to_numpy())exposers=data[ranking].apply(lambdax:1/math.log2(x+1))distances_exposer=pairwise.euclidean_distances(exposers.to_numpy().reshape(-1,1),exposers.to_numpy().reshape(-1,1))yNN=1-np.mean(np.abs(distances_data-distances_exposer))returnyNN
[docs]defcompute_cumulative_exposer(df_top,ki):""" Compute the cumulative exposure for the top-k items in the ranking. :param df_top : pandas.DataFrame The top-k items in the ranking. :param ki : int The number of top-k items. :return : float The cumulative exposure score. """iflen(df_top)<ki:df_top["rank"]=list(range(1,len(df_top)+1))else:df_top["rank"]=list(range(1,ki+1))iflen(df_top)==0:return-1exposer_top_k=sum(df_top['rank'].apply(lambdax:1/math.log2(x+1)))returnexposer_top_k
[docs]defcompute_igf_ratio(top_k_IDS,_orig_df,_orig_sort_col):""" Compute the IGF (Item Group Fairness) ratio for the top-k items. :param top_k_IDS : list A list of IDs representing the top-k items. :param _orig_df : pandas.DataFrame The original DataFrame containing all items. :param _orig_sort_col : str The column name to sort the original DataFrame by. :return : float The IGF ratio. """accepted_candidates=_orig_df["UID"].isin(top_k_IDS)_lowest_accepted_score=min(_orig_df[accepted_candidates][_orig_sort_col])rejected_candidates=~accepted_candidatesifsum(rejected_candidates)==0:return1_highest_rejected_score=max(_orig_df[rejected_candidates][_orig_sort_col])if_highest_rejected_score==0:return1cur_res=min(_orig_df[_orig_df["UID"].isin(top_k_IDS)][_orig_sort_col])/max(_orig_df[~_orig_df["UID"].isin(top_k_IDS)][_orig_sort_col])ifcur_res>1:return1else:returncur_res
[docs]defget_quotas_count(_df,s_attribute,sensitive_groups):""" Calculate the quota count for each sensitive group in the data. :param _df : pandas.DataFrame The data to calculate the quotas for. :param s_attribute : str The sensitive attribute column name. :param sensitive_groups : list A list of sensitive attribute groups. :return : dict A dictionary with the sensitive group names as keys and their corresponding quota counts as values. """res_dict={}forsinsensitive_groups:mask=_df[s_attribute]==sres_dict[s]=sum(mask)/len(_df)returnres_dict
[docs]defget_sort_df(_sort_col,_df,_k):""" Sort the DataFrame by the specified column and return the top-k rows. :param _sort_col : str The column to sort the data by. :param _df : pandas.DataFrame The DataFrame to be sorted. :param _k : int The number of top items to return. :return : pandas.DataFrame The top-k sorted rows from the DataFrame. """_df[_sort_col]=_df[_sort_col].apply(lambdax:float(x))_k=int(_k)if"__"in_sort_col:sort_df=_df.sort_values(by=_sort_col,ascending=True).head(_k)else:sort_df=_df.sort_values(by=_sort_col,ascending=False).head(_k)returnsort_df