"""Learning fair representations is a pre-processing technique that finds a
latent representation which encodes the data well but obfuscates information
about protected attributes [2]_.
References:
.. [2] R. Zemel, Y. Wu, K. Swersky, T. Pitassi, and C. Dwork, "Learning
Fair Representations." International Conference on Machine Learning,
2013.
Based on code from https://github.com/zjelveh/learning-fair-representations
"""
import numpy as np
import pandas as pd
import scipy.optimize as optim
import os
from .modules.LFR.loss import LFR_optimisation as LFR_func
from .modules.probabilistic_mapping_helpers import compute_X_hat
from .utils import process_data_input, process_data_output, save_model_data, load_model_data
from .fair_model import PreprocessingFairnessIntervention
[docs]
class LFR(PreprocessingFairnessIntervention):
"""
Learning Fair Representations (LFR) fairness intervention.
This class applies the LFR approach to modify the dataset such that fairness constraints
are met while preserving as much utility as possible.
"""
def __init__(self, configs, dataset):
"""
Initialize the LFR model with the given configurations and dataset.
:param configs : dict
Configuration dictionary containing model parameters.
:param dataset : str
The dataset to be processed.
"""
super().__init__(configs, dataset)
[docs]
def fit(self, X_train, run):
"""
Train the LFR fairness model using the given training dataset.
This method optimizes a fairness objective by learning fair representations
of the data using constrained optimization.
:param X_train : pandas.DataFrame or numpy.ndarray
The training dataset. It is assumed that the last non-sensitive column is the target variable.
:param run : str
The identifier for the training run.
:return : self
The trained LFR model.
"""
if not os.path.exists(os.path.join(self.model_path, run)):
X_train, group_weights, sensitive_groups, sensitive_column_indices, nonsensitive_column_indices = (
process_data_input(X_train, self.configs, self.dataset))
if self.configs["seed"] is not None:
np.random.seed(self.configs["seed"])
# assumes that the last non-sensitive column of X_train is Y_train
Y_train = X_train[:, nonsensitive_column_indices][-1]
features_dim = X_train.shape[1]
# Initialize the LFR_module optim objective parameters
parameters_initialization = np.random.uniform(size=int(self.configs["k"] + features_dim * self.configs["k"]))
bnd = [(0, 1)] * self.configs["k"] + [(None, None)] * features_dim * self.configs["k"]
LFR_func.steps = 0
self.opt_params = optim.fmin_l_bfgs_b(LFR_func, x0=parameters_initialization, epsilon=1e-5,
args=(X_train, Y_train, sensitive_groups, sensitive_column_indices, self.configs["k"],
self.configs["A_x"], self.configs["A_y"], self.configs["A_z"],
group_weights, self.configs["biggest_gap"],
os.path.join(self.model_path, run)),
bounds=bnd, approx_grad=True, maxfun=self.configs["maxfun"],
maxiter=self.configs["maxiter"], disp=False)[0]
self.w = self.opt_params[:self.configs["k"]]
self.prototypes = self.opt_params[self.configs["k"]:].reshape((self.configs["k"], features_dim))
save_model_data(self, os.path.join(self.model_path, run))
else:
self.opt_params = load_model_data(os.path.join(self.model_path, run))
return self