Source code for fairdiverse.recommendation.llm_rec

import os.path
from .utils.group_utils import Init_Group_AdjcentMatrix, get_iid2text
import pandas as pd
from .llm import Grounder, Prompt_Constructer, LLM_caller
from .evaluator import LLM_Evaluator
import os
import yaml
import json
from datetime import datetime

[docs] class LLMRecommender(object): """ """ def __init__(self, train_config): """Initialize In-processing and base LLMs-models. :param train_config: Your custom config files. """ self.dataset = train_config['dataset'] #self.stage = stage self.llm_type = train_config['llm_type'] self.config = train_config
[docs] def load_configs(self, dir): """ Loads and merges configuration files for the model, dataset, and evaluation. This function loads multiple YAML configuration files, including the process configuration, dataset-specific settings, model configurations, and evaluation parameters. All configurations are merged, with the highest priority given to the class's own `config` attribute. :param dir: The directory where the main process configuration file is located. :return: A dictionary containing the merged configuration from all files. """ print("start to load config...") with open(os.path.join(dir, "process_config.yaml"), 'r') as f: config = yaml.safe_load(f) with open(os.path.join("recommendation", "properties", "dataset", f"{self.dataset}.yaml"), 'r') as f: config.update(yaml.safe_load(f)) print("start to load model...") with open(os.path.join("recommendation", "properties", "models.yaml"), 'r') as f: model_config = yaml.safe_load(f) with open(os.path.join("recommendation", "properties", "models", f"LLM.yaml"), 'r') as f: model_config.update(yaml.safe_load(f)) config.update(model_config) with open(os.path.join("recommendation", "properties", "evaluation.yaml"), 'r') as f: config.update(yaml.safe_load(f)) config.update(self.config) ###train_config has highest rights print("your loading config is:") print(config) return config
[docs] def recommend(self): """ Training LLMs-based in-processing and base model main workflow. """ dataset_dir = os.path.join("recommendation", "processed_dataset", self.dataset) dataset_file_name = self.dataset + '.test.ranking' input_file = pd.read_csv(os.path.join(dataset_dir, dataset_file_name), delimiter='\t') iid2text, iid2pid = get_iid2text(self.dataset), Init_Group_AdjcentMatrix(self.dataset) config = self.load_configs(dataset_dir) prompt_constructer = Prompt_Constructer(config) prompt_dataset = prompt_constructer.construct_prompt(input_file, iid2text, iid2pid) LLM = LLM_caller(config) results_list = LLM.get_response(prompt_dataset) LLM.clear() grounder = Grounder(config) grounding_result = grounder.grounding(results_list, id2title=iid2text) evaluator = LLM_Evaluator(config) eval_result = evaluator.llm_eval(grounding_result, iid2pid) # dump the logs and eval result today = datetime.today() today_str = f"{today.year}-{today.month}-{today.day}" log_dir = os.path.join("recommendation", "log", f"{today_str}_{config['log_name']}") if not os.path.exists(log_dir): os.makedirs(log_dir) print(f"training complete! start to save the config and model...") print(f" config files are dump in {log_dir}") with open(os.path.join(log_dir, "config.yaml"), 'w') as f: yaml.dump(config, f) with open(os.path.join(log_dir, 'test_result.json'), 'w') as f: json.dump(eval_result, f) print(f"dump in {log_dir}")