Skip to content
Snippets Groups Projects
Commit bec285c0 authored by Benoit Favre's avatar Benoit Favre
Browse files

first set of scripts for using llama LM

parent 3f6d6764
No related branches found
No related tags found
No related merge requests found
...@@ -6,7 +6,11 @@ Install: ...@@ -6,7 +6,11 @@ Install:
pip install -r requirements.txt pip install -r requirements.txt
``` ```
Installing bitsandbytes for llama models is a bit more [involved](https://gitlab.lis-lab.fr/cluster/wiki/-/wikis/Compiling%20bitsandbytes%20for%20int8%20inference).
See RESULTS for the exact match results on the dev. See RESULTS for the exact match results on the dev.
See runs.sh for how to generate runs. See runs.sh for how to generate runs.
Note that external APIs require API keys. Please rename api_keys.template.py to api_keys.py and set keys you need inside. Note that external APIs require API keys. Please rename api_keys.template.py to api_keys.py and set keys you need inside.
...@@ -14,12 +14,20 @@ lm_templates_en = [ ...@@ -14,12 +14,20 @@ lm_templates_en = [
letters = 'abcdefghijklmnopqrstuvwxyz' letters = 'abcdefghijklmnopqrstuvwxyz'
def linearize_instance(instance, include_correct_answers=False): def linearize_instance(instance, include_correct_answers=False, add_left_parenthesis=False):
result = instance['question'] + '\n' + '\n'.join('(%s) %s.' % (k, v) for k, v in instance['answers'].items()) result = instance['question'] + '\n' + '\n'.join('(%s) %s.' % (k, v) for k, v in instance['answers'].items())
if include_correct_answers: if include_correct_answers:
result += '\nRéponse(s) : ' + ' '.join('(%s)' % a for a in instance['correct_answers']) result += '\nRéponse(s) : ' + ' '.join('(%s)' % a for a in instance['correct_answers'])
else:
result += '\nRéponse(s) :' + (' (' if add_left_parenthesis else '')
return result return result
#def linearize_instance(instance, include_correct_answers=False):
# result = instance['question'] + '\n' + '\n'.join('(%s) %s.' % (k, v) for k, v in instance['answers'].items())
# if include_correct_answers:
# result += '\nRéponse(s) : ' + ' '.join('(%s)' % a for a in instance['correct_answers'])
# return result
def get_prompt(prompt, instance, few_shots=[]): def get_prompt(prompt, instance, few_shots=[]):
shots = [linearize_instance(shot, include_correct_answers=True) for shot in few_shots] shots = [linearize_instance(shot, include_correct_answers=True) for shot in few_shots]
return prompt % ('\n\n'.join(shots + [linearize_instance(instance)]),) return prompt % ('\n\n'.join(shots + [linearize_instance(instance)]),)
...@@ -33,10 +41,17 @@ def extract_answer(answer, num_answers=5): ...@@ -33,10 +41,17 @@ def extract_answer(answer, num_answers=5):
selected = [x.replace(')', '').replace('(', '') for x in selected] selected = [x.replace(')', '').replace('(', '') for x in selected]
return list(sorted(set([letter.lower() for letter in selected]))) return list(sorted(set([letter.lower() for letter in selected])))
def hamming(a, b, num): #def hamming(a, b, num):
A = [c.upper() if c in a else c for c in letters[:num]] # A = [c.upper() if c in a else c for c in letters[:num]]
B = [c.upper() if c in b else c for c in letters[:num]] # B = [c.upper() if c in b else c for c in letters[:num]]
return [x == y for x, y in zip(A, B)].count(True) # return [x == y for x, y in zip(A, B)].count(True)
def hamming(preds, refs):
corrects = [True for p in preds if p in refs]
corrects = sum(corrects)
total_refs = len(list(set(preds + refs)))
return corrects / total_refs
def run_inference(generator, corpus_path, template): def run_inference(generator, corpus_path, template):
with open(corpus_path) as fp: with open(corpus_path) as fp:
...@@ -56,7 +71,7 @@ def run_inference(generator, corpus_path, template): ...@@ -56,7 +71,7 @@ def run_inference(generator, corpus_path, template):
print(answer, instance['correct_answers']) print(answer, instance['correct_answers'])
if set(answer) == set(instance['correct_answers']): if set(answer) == set(instance['correct_answers']):
num_exact_correct += 1 num_exact_correct += 1
num_hamming_correct += hamming(answer, instance['correct_answers'], len(instance['answers'])) num_hamming_correct += hamming(answer, instance['correct_answers'])
num_hamming += len(instance['answers']) num_hamming += len(instance['answers'])
results.append(instance['id'] + ';' + '|'.join(list(sorted(answer)))) results.append(instance['id'] + ';' + '|'.join(list(sorted(answer))))
......
# adapted from https://github.com/tloen/alpaca-lora/
import os import os
import uuid import uuid
import json import json
...@@ -7,120 +8,126 @@ import transformers ...@@ -7,120 +8,126 @@ import transformers
from transformers import LlamaForCausalLM, LlamaTokenizer from transformers import LlamaForCausalLM, LlamaTokenizer
from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model from peft import prepare_model_for_int8_training, LoraConfig, get_peft_model
LLAMA_VARIANT = 65 from deft import linearize_instance
# reasonable batch size with 80GB GPU
# 7B -> 24 # 7B -> 24
# 13B -> 12 # 13B -> 12
# 30B -> 6 # 30B -> 6
# 65B -> 1 # 65B -> 1
MICRO_BATCH_SIZE = 1
BATCH_SIZE = 24 def main(LLAMA_VARIANT : int = 65,
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE MICRO_BATCH_SIZE : int = 1,
EPOCHS = 1 # from the result BATCH_SIZE : int = 24,
LEARNING_RATE = 3e-4 # the karpathy constant EPOCHS : int = 1,
CUTOFF_LEN = 256 LEARNING_RATE : float = 3e-4,
WARMUP_RATIO = 0.05 CUTOFF_LEN : int = 256,
IS_INT8 = True WARMUP_RATIO : float = 0.05,
LORA_R = 4 IS_INT8 : bool = True,
LORA_ALPHA = 16 LORA_R : int = 4,
LORA_DROPOUT = 0.05 LORA_ALPHA : int = 16,
PROMPT = '''Ceci est une question de QCM de l\'examen de pharmacie. Réponds avec la ou les lettres correspondant à la bonne réponse.\n\n%s''' LORA_DROPOUT : int = 0.05,
valid_steps : int = 20,
PROMPT : str = '''Ceci est une question de QCM de l\'examen de pharmacie. Réponds avec la ou les lettres correspondant à la bonne réponse.\n\n%s''',
output_dir : str = 'deft_models',
train_json : str = '../../json/train.json',
dev_json : str = '../../json/dev.json',
):
#PROMPT = '''Ceci est une question de QCM de l\'examen de pharmacie. Réponds avec la ou les lettres correspondant à la bonne réponse.\n\n## Question 1\n\n%s''' #PROMPT = '''Ceci est une question de QCM de l\'examen de pharmacie. Réponds avec la ou les lettres correspondant à la bonne réponse.\n\n## Question 1\n\n%s'''
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE
BACKBONE = "decapoda-research/llama-%db-hf" % LLAMA_VARIANT
OUTPUT_PATH = "%s/deft_%s_lora_%s" % (output_dir, BACKBONE.split('/')[-1], str(uuid.uuid4()))
print(OUTPUT_PATH)
os.makedirs(OUTPUT_PATH, exist_ok=True)
model = LlamaForCausalLM.from_pretrained(
BACKBONE,
load_in_8bit=IS_INT8,
device_map="auto",
)
tokenizer = LlamaTokenizer.from_pretrained(BACKBONE, add_eos_token=True)
if IS_INT8:
model = prepare_model_for_int8_training(model)
config = LoraConfig(
r=LORA_R,
lora_alpha=LORA_ALPHA,
target_modules=["q_proj", "v_proj"],
lora_dropout=LORA_DROPOUT,
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
data = load_dataset("json", data_files="../../json/train.json")
data_valid = load_dataset("json", data_files="../../json/dev.json")
data['validation'] = data_valid['train']
def generate_prompt(data_point):
return PROMPT % linearize_instance(data_point, include_correct_answers=True)
print(generate_prompt(data['train'][0]))
data = data.shuffle().map(
lambda data_point: tokenizer(
generate_prompt(data_point),
truncation=True,
max_length=CUTOFF_LEN,
padding="max_length",
)
)
BACKBONE = "decapoda-research/llama-%db-hf" % LLAMA_VARIANT trainer = transformers.Trainer(
OUTPUT_PATH = "deft_models/deft_%s_lora_%s" % (BACKBONE.split('/')[-1], str(uuid.uuid4())) model=model,
print(OUTPUT_PATH) train_dataset=data["train"],
os.makedirs(OUTPUT_PATH, exist_ok=True) eval_dataset=data["validation"],
args=transformers.TrainingArguments(
model = LlamaForCausalLM.from_pretrained( do_eval=True,
BACKBONE, evaluation_strategy='steps',
load_in_8bit=IS_INT8, eval_steps=valid_steps,
device_map="auto", per_device_train_batch_size=MICRO_BATCH_SIZE,
) gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS,
warmup_ratio=WARMUP_RATIO,
num_train_epochs=EPOCHS,
tokenizer = LlamaTokenizer.from_pretrained(BACKBONE, add_eos_token=True) learning_rate=LEARNING_RATE,
fp16=True,
if IS_INT8: logging_steps=1,
model = prepare_model_for_int8_training(model) output_dir=OUTPUT_PATH,
save_total_limit=3,
config = LoraConfig( ),
r=LORA_R, data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False),
lora_alpha=LORA_ALPHA,
target_modules=["q_proj", "v_proj"],
lora_dropout=LORA_DROPOUT,
bias="none",
task_type="CAUSAL_LM",
)
model = get_peft_model(model, config)
tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
data = load_dataset("json", data_files="../../json/train.json")
data_valid = load_dataset("json", data_files="../../json/dev.json")
data['validation'] = data_valid['train']
def linearize_instance(instance, include_correct_answers=False):
result = instance['question'] + '\n' + '\n'.join('(%s) %s.' % (k, v) for k, v in instance['answers'].items())
if include_correct_answers:
result += '\nRéponse(s) : ' + '; '.join('(%s) %s' % (a, instance['answers'][a]) for a in instance['correct_answers']) + '.\n'
else:
result += '\nRéponse(s) : ('
return result
def generate_prompt(data_point):
return PROMPT % linearize_instance(data_point, include_correct_answers=True)
print(generate_prompt(data['train'][0]))
data = data.shuffle().map(
lambda data_point: tokenizer(
generate_prompt(data_point),
truncation=True,
max_length=CUTOFF_LEN,
padding="max_length",
) )
) model.config.use_cache = False
trainer.train(resume_from_checkpoint=False)
trainer = transformers.Trainer(
model=model, model.save_pretrained(OUTPUT_PATH)
train_dataset=data["train"],
eval_dataset=data["validation"], with open("%s/training_config.json" % OUTPUT_PATH, 'w') as fp:
args=transformers.TrainingArguments( fp.write(json.dumps({
do_eval=True, 'backbone': BACKBONE,
evaluation_strategy='steps', 'micro_batch_size': MICRO_BATCH_SIZE,
eval_steps=20, 'batch_size': BATCH_SIZE,
per_device_train_batch_size=MICRO_BATCH_SIZE, 'epochs': EPOCHS,
gradient_accumulation_steps=GRADIENT_ACCUMULATION_STEPS, 'learning_rate': LEARNING_RATE,
warmup_ratio=WARMUP_RATIO, 'cutoff_len': CUTOFF_LEN,
num_train_epochs=EPOCHS, 'warmup_ratio': WARMUP_RATIO,
learning_rate=LEARNING_RATE, 'llama_variant': LLAMA_VARIANT,
fp16=True, 'is_int8': IS_INT8,
logging_steps=1, 'lora_r': LORA_R,
output_dir=OUTPUT_PATH, 'lora_alpha': LORA_ALPHA,
save_total_limit=3, 'lora_dropout': LORA_DROPOUT,
), 'prompt': PROMPT,
data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False), 'output_path': OUTPUT_PATH
) }, indent=4))
model.config.use_cache = False
trainer.train(resume_from_checkpoint=False) print(OUTPUT_PATH)
model.save_pretrained(OUTPUT_PATH) if __name__ == '__main__':
import fire
with open("%s/training_config.json" % OUTPUT_PATH, 'w') as fp: fire.Fire(main)
fp.write(json.dumps({
'backbone': BACKBONE,
'micro_batch_size': MICRO_BATCH_SIZE,
'batch_size': BATCH_SIZE,
'epochs': EPOCHS,
'learning_rate': LEARNING_RATE,
'cutoff_len': CUTOFF_LEN,
'warmup_ratio': WARMUP_RATIO,
'llama_variant': LLAMA_VARIANT,
'is_int8': IS_INT8,
'lora_r': LORA_R,
'lora_alpha': LORA_ALPHA,
'lora_dropout': LORA_DROPOUT,
'prompt': PROMPT,
'output_path': OUTPUT_PATH
}, indent=4))
print(OUTPUT_PATH)
from peft import PeftModel
from transformers import LlamaTokenizer, LlamaForCausalLM, GenerationConfig
import transformers
import torch
import sys
import json
from argparse import Namespace
from deft import linearize_instance, extract_answer, hamming
# stop inference when a given token was generated (for example '\n')
class TokenStopper(transformers.StoppingCriteriaList):
def __init__(self, token, prompt_lengths):
self.token = tokenizer.encode(token)[-1]
self.prompt_lengths = prompt_lengths
def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
for sequence, length in zip(input_ids, self.prompt_lengths):
sequence = sequence[length:]
if (sequence == self.token).sum() == 0:
return False
else:
return True
def run_batch(batch, eval_stats, output_fp):
prompts = [config.prompt % linearize_instance(instance, add_left_parenthesis=True) for instance in batch]
inputs = tokenizer(
prompts,
return_tensors="pt",
padding=True,
).to('cuda')
lengths = [len(tokenizer.encode(prompt)) for prompt in prompts]
generation_config = GenerationConfig(
temperature=0.1,
top_p=0.75,
top_k=40,
num_beams=4,
)
with torch.no_grad():
generation_output = model.generate(
**inputs, max_new_tokens=128,
generation_config=generation_config,
#stopping_criteria=[TokenStopper('\n', [len(sequence) for sequence in inputs['input_ids']])]
)
for instance, PROMPT, length, sequence in zip(batch, prompts, lengths, generation_output):
generated = tokenizer.decode(sequence, skip_special_tokens=True)
print(generated)
answer = extract_answer(generated[len(PROMPT):].split('\n')[0])
print(answer, instance['correct_answers'])
output_fp.write(instance['id'] + ';' + '|'.join(sorted(answer)).lower() + '\n')
eval_stats['num_emr'] += 1
if set(answer) == set(instance['correct_answers']):
eval_stats['num_emr_correct'] += 1
eval_stats['num_hamming_sum'] += hamming(answer, instance['correct_answers'])
eval_stats['num_hamming'] += len(instance['answers'])
def main(model_path : str, output_path : str, corpus_path : str = '../../json/dev.json', adapted : bool = True, batch_size : int = 1):
global config
import json
with open('%s/training_config.json' % model_path) as fp:
config = Namespace(**json.loads(fp.read()))
#config.prompt = "This is a MCQ from the biology exam in French. Answer with the correct set of letters.\n\n%s"
global tokenizer
tokenizer = LlamaTokenizer.from_pretrained(config.backbone, padding_side='left')
tokenizer.pad_token_id = 0 # unk. we want this to be different from the eos token
global model
model = LlamaForCausalLM.from_pretrained(
config.backbone,
load_in_8bit=config.is_int8,
torch_dtype=torch.float16,
device_map="auto",
)
if adapted:
# device_map={"": 0} fix device = None error https://github.com/huggingface/peft/issues/115
model = PeftModel.from_pretrained(model, model_path, torch_dtype=torch.float16, device_map={"": 0})
with open(corpus_path) as fp:
corpus = json.loads(fp.read())
eval_stats = {
'num_emr': 0,
'num_emr_correct': 0,
'num_hamming_sum': 0,
'num_hamming': 0,
}
with open(output_path, 'w') as output_fp:
for i in range(0, len(corpus), batch_size):
run_batch(corpus[i: i + batch_size], eval_stats, output_fp)
print('EXACT MATCH:', config.output_path, eval_stats['num_emr_correct'] / eval_stats['num_emr'])
print('HAMMING DIST:', config.output_path, eval_stats['num_hamming_sum'] / eval_stats['num_hamming'])
if __name__ == '__main__':
import fire
fire.Fire(main)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment