Skip to content
Snippets Groups Projects
Commit ec5b25ad authored by Benoit Favre's avatar Benoit Favre
Browse files

initial commit

parents
No related branches found
No related tags found
No related merge requests found
System for generating DEFT 2023 outputs from LLMs
=================================================
Install:
```
pip install -r requirements.txt
```
See RESULTS for the exact match results on the dev.
See runs.sh for how to generate runs.
Note that external APIs require API keys. Please rename api_keys.template.py to api_keys.py and set keys you need inside.
RESULTS 0 → 100644
Résultats sur le dev :
prompt_0 =>
bn/par exact
bloomz-560m 0.0737
en/bloomz-560m 0.1442
bloomz-3b 0.1442
en/bloomz-3b 0.1153 ??
bloomz-7b1 0.1602
bloomz-7b1-mt 0.1762
flan-t5-xxl 0.1794
flan-ul2 0.1570
tk-instruct-3b-def 0.1346
tk-instruct-11b-def 0.1826
en/tk-instruct-11b-def 0.1442
opt-iml-1.3b 0.0673
opt-iml-30b 0.1442
(int8)
llama_7B 0.0576
llama_7B+alpaca_fr 0.1185
llama_7B+alpaca 0.1217
llama_7B+alpaca-native 0.1153
llama_7B+deft 0.1378
llama_13B 0.0769
llama_13B+alpaca 0.1474
llama_13B+deft 0.1730
llama_30B 0.1442
llama_30B+alpaca 0.1923
llama_30B+deft 0.2467
llama_65B 0.1730
llama_65B+deft 0.3044
(fp16)
llama_30B 0.1891
llama_65B 0.2179
openai/code-cushman-001 0.1121
openai/code-davinci-002 0.3108
ai21/j1-jumbo 0.0833
=> autres prompts
code-cushman-001 0.1346
code-davinci-002_run2 0.3205
code-davinci-002 0.2435
gpt-3.5-turbo-0301.run2 0.4551
gpt-3.5-turbo-0301 0.4038
text-curie-001 0.1217
text-davinci-003 0.2884
cohere_command-xlarge-beta 0.1057
FrenchMedMCQA: A French Multiple-Choice Question Answering Dataset for Medical domain
https://hal.science/hal-03824241v2/preview/LOUHI_2022___QA_22.pdf#page=2
w/o Context Wiki-BM25 HAL-BM25 Wiki-MiniLMv2 HAL-MiniLMv2
Architecture Hamming EMR Hamming EMR Hamming EMR Hamming EMR Hamming EMR
BioBERT_V1.1 36.19 15.43 38.72 16.72 33.33 14.14 35.13 16.23 34.27 13.98
PubMedBERT 33.98 14.14 34.00 13.98 35.66 15.59 33.87 14.79 35.44 14.79
CamemBERT-base 36.24 16.55 34.19 14.46 34.78 15.43 34.66 14.79 34.61 14.95
XLM-RoBERTa-base 37.92 17.20 31.26 11.89 35.84 16.07 32.47 14.63 33.00 14.95
BART-base 31.93 15.91 34.98 18.64 33.80 17.68 29.65 12.86 34.65 18.32
# This file should be modified with your API keys and renamed api_keys.py
# Only the keys for the services you actually use are required
DEEPL_TOKEN = '<your-deepl-api-key>'
OPENAI_TOKEN = '<your-openai-api-key>'
COHERE_TOKEN = '<your-cohere-api-key>'
AI21_TOKEN = '<your-ai21-api-key>'
deft.py 0 → 100644
import re
import json
lm_templates = [
'''Ceci est une question de QCM de l\'examen de pharmacie. Réponds avec la ou les lettres correspondant à la bonne réponse.\n\n%s\n\nRéponse : (''',
'''Corrigé du QCM de pharma.\n%s\nRéponse(s) : (''',
'''Alice est une excellente pharmacienne. Elle répond aux questions de Pierre qui est interne en pharmacie.\nPierre : ma question est la suivante : %s\n Alice : je connais la bonne réponse et c'est (''',
'''Correction du QCM de l\'examen de pharmacie. %s\nRéponse(s) : (''',
'''Alice est une intelligence artificielle experte en pharmacie. Elle répond aux questions de Bob avec précision.\nBob: %s\n Alice: (''',
]
lm_templates_en = [
'''This is a multiple choice question from the pharma exam. Reply with the letter or the letters corresponding to the correct answer.\n\n%s\n\nAnswer : (''',
]
letters = 'abcdefghijklmnopqrstuvwxyz'
def linearize_instance(instance, include_correct_answers=False):
result = instance['question'] + '\n' + '\n'.join('(%s) %s.' % (k, v) for k, v in instance['answers'].items())
if include_correct_answers:
result += '\nRéponse(s) : ' + ' '.join('(%s)' % a for a in instance['correct_answers'])
return result
def get_prompt(prompt, instance, few_shots=[]):
shots = [linearize_instance(shot, include_correct_answers=True) for shot in few_shots]
return prompt % ('\n\n'.join(shots + [linearize_instance(instance)]),)
def extract_answer(answer, num_answers=5):
answer = re.sub('Ceci est une question de QCM.*', '', answer).strip().lower()
selected = re.findall(r'^[a-%s]\)|\([a-%s]\)' % (letters[num_answers - 1], letters[num_answers - 1]), answer)
if len(selected) == 0:
selected = re.findall(r'(\b[a-%s]\b)' % letters[num_answers - 1], answer)
else:
selected = [x.replace(')', '').replace('(', '') for x in selected]
return list(sorted(set([letter.lower() for letter in selected])))
def hamming(a, b, num):
A = [c.upper() if c in a else c for c in letters[:num]]
B = [c.upper() if c in b else c for c in letters[:num]]
return [x == y for x, y in zip(A, B)].count(True)
def run_inference(generator, corpus_path, template):
with open(corpus_path) as fp:
dev_corpus = json.loads(fp.read())
num_exact_correct = 0
num_hamming_correct = 0
num_hamming = 0
results = []
for instance in dev_corpus:
prompt = get_prompt(template, instance)
print(prompt)
generated = generator(prompt)
print(generated)
answer = extract_answer(generated, len(instance['answers']))
print(answer, instance['correct_answers'])
if set(answer) == set(instance['correct_answers']):
num_exact_correct += 1
num_hamming_correct += hamming(answer, instance['correct_answers'], len(instance['answers']))
num_hamming += len(instance['answers'])
results.append(instance['id'] + ';' + '|'.join(list(sorted(answer))))
print('EXACT MATCH:', num_exact_correct / len(dev_corpus))
print('HAMMING DIST:', num_hamming_correct / num_hamming)
return results
def template_from_id(desc):
if desc.startswith('en'):
return lm_templates_en[int(desc[3:])]
else:
return lm_templates[int(desc)]
def write_results(results, output_path):
with open(output_path, 'w') as fp:
fp.write('\n'.join(results) + '\n')
protobuf==3.20
accelerate>=0.17.0
bitsandbytes>=0.37.0
fire
torch>=1.13.1
transformers>=4.26.1
ai21>=1.0.2
cohere>=3.10.0
openai>=0.27.2
backoff>=2.2.1
from api_keys import DEEPL_TOKEN, OPENAI_TOKEN, COHERE_TOKEN, AI21_TOKEN
import openai
import backoff
openai.api_key = OPENAI_TOKEN
openai_lm_models = ['text-davinci-003', 'text-davinci-002', 'code-davinci-002', 'code-cushman-001', 'text-curie-001', 'text-babbage-001', 'text-ada-001', 'davinci', 'curie', 'babage', 'ada']
openai_chat_models = ['gpt-3.5-turbo-0301', 'gpt-3.5-turbo']
@backoff.on_exception(backoff.expo, (openai.error.RateLimitError, openai.error.APIError))
def openai_complete(prompt, model='text-davinci-003'):
result = openai.Completion.create(model=model, prompt=prompt, temperature=0, max_tokens=32)
return result['choices'][0]['text']
@backoff.on_exception(backoff.expo, (openai.error.RateLimitError, openai.error.APIError))
def openai_chat(prompt, model='gpt-3.5-turbo'):
result = openai.ChatCompletion.create(model=model, messages=[{"role": "user", "content": prompt}], temperature=0, max_tokens=32)
return result['choices'][0]['message']['content']
import cohere, requests, time
cohere_client = cohere.Client(COHERE_TOKEN)
cohere_models = ['command-xlarge-beta', 'command-xlarge-nightly', 'xlarge', 'medium', 'command-medium-beta', 'command-medium-nightly']
@backoff.on_exception(backoff.expo, requests.exceptions.RetryError)
def cohere_complete(prompt, model='command-xlarge-beta'):
response = cohere_client.generate(model=model, prompt=prompt, max_tokens=32, temperature=1, k=0, p=0.75, stop_sequences=[], return_likelihoods='NONE')
time.sleep(20) # max 3 queries per minute (free account)
return response.generations[0].text
import ai21
ai21.api_key = AI21_TOKEN
ai21_models = ['j1-jumbo', 'j1-grande', 'j1-grande-instruct', 'j1-large']
def ai21_complete(prompt, model='j1-jumbo'):
result = ai21.Completion.execute(model=model, prompt=prompt, maxTokens=32, temperature=0.5, numResults=1, topP=0.1)
return result['completions'][0]['data']['text']
def main(result_path: str, corpus_path: str, model: str = 'openai/gpt-3.5-turbo-0301', template_num: int = 0):
api, llm = model.split('/', 1)
def generate(input_string):
if api == 'openai':
if llm in openai_chat_models:
return openai_chat(input_string, llm)
else:
return openai_complete(input_string, llm)
elif api == 'cohere':
return cohere_complete(input_string, llm)
elif api == 'ai21':
return ai21_complete(input_string, llm)
import deft
results = deft.run_inference(generate, corpus_path, deft.lm_templates[template_num])
deft.write_results(results, result_path)
if __name__ == '__main__':
import fire
fire.Fire(main)
#https://huggingface.co/bigscience/bloomz-7b1-mt
from transformers import AutoModelForCausalLM, AutoTokenizer
def main(result_path: str, corpus_path: str, model: str = 'bloomz-7b1-mt', template_id: str = '0'):
checkpoint = 'bigscience/' + model
tokenizer = AutoTokenizer.from_pretrained(checkpoint)
llm = AutoModelForCausalLM.from_pretrained(checkpoint, device_map="auto", load_in_8bit=True)
def generate(input_string):
inputs = tokenizer(input_string, return_tensors="pt").input_ids.to("cuda")
outputs = llm.generate(inputs, max_new_tokens=32)
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated[len(input_string):]
import deft
results = deft.run_inference(generate, corpus_path, deft.template_from_id(template_id))
deft.write_results(results, result_path)
if __name__ == '__main__':
import fire
fire.Fire(main)
#https://huggingface.co/docs/transformers/model_doc/flan-t5
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
def main(result_path: str, corpus_path: str, model: str = 'flan-t5-xxl', template_id: str = '0'):
llm = AutoModelForSeq2SeqLM.from_pretrained("google/" + model).to('cuda')
tokenizer = AutoTokenizer.from_pretrained("google/" + model)
def generate(input_string):
inputs = tokenizer(input_string, return_tensors="pt").input_ids.to("cuda")
outputs = llm.generate(inputs, max_length=32)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
import deft
results = deft.run_inference(generate, corpus_path, deft.template_from_id(template_id))
deft.write_results(results, result_path)
if __name__ == '__main__':
import fire
fire.Fire(main)
#https://huggingface.co/google/flan-ul2
# pip install accelerate transformers bitsandbytes
from transformers import T5ForConditionalGeneration, AutoTokenizer
import torch
model = T5ForConditionalGeneration.from_pretrained("google/flan-ul2", device_map="auto", load_in_8bit=True)
tokenizer = AutoTokenizer.from_pretrained("google/flan-ul2")
def generate_flan_ul2(input_string):
inputs = tokenizer(input_string, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(inputs, max_length=32)
return tokenizer.decode(outputs[0])
def main(result_path: str, corpus_path: str, template_num: int = 0):
import deft
results = deft.run_inference(generate_flan_ul2, corpus_path, deft.lm_templates[template_num])
deft.write_results(results, result_path)
if __name__ == '__main__':
import fire
fire.Fire(main)
#https://huggingface.co/facebook/opt-iml-30b
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
def main(result_path: str, corpus_path: str, model: str = 'opt-iml-30b', template_num: int = 0):
checkpoint = 'facebook/' + model
llm = AutoModelForCausalLM.from_pretrained(checkpoint, torch_dtype=torch.float16).cuda()
tokenizer = AutoTokenizer.from_pretrained(checkpoint, use_fast=False)
def generate(input_string):
inputs = tokenizer(input_string, return_tensors="pt").input_ids.to("cuda")
outputs = llm.generate(inputs, max_new_tokens=32)
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated[len(input_string):]
import deft
results = deft.run_inference(generate, corpus_path, deft.lm_templates[template_num])
deft.write_results(results, result_path)
if __name__ == '__main__':
import fire
fire.Fire(main)
#https://huggingface.co/allenai/tk-instruct-large-def-pos
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
def main(result_path: str, corpus_path: str, model: str = 'tk-instruct-3b-def', template_id: str = "0"):
tokenizer = AutoTokenizer.from_pretrained('allenai/' + model)
model = AutoModelForSeq2SeqLM.from_pretrained('allenai/' + model).to('cuda')
def generate(input_string):
inputs = tokenizer(input_string, return_tensors="pt").input_ids.to("cuda")
outputs = model.generate(inputs, max_length=32)
generated = tokenizer.decode(outputs[0], skip_special_tokens=True)
return generated
import deft
results = deft.run_inference(generate, corpus_path, deft.template_from_id(template_id))
deft.write_results(results, result_path)
if __name__ == '__main__':
import fire
fire.Fire(main)
import sys
import json
from tqdm import tqdm
input_filename = sys.argv[1]
output_filename = sys.argv[2]
with open(input_filename) as fp:
data = json.loads(fp.read())
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
#facebook/nllb-200-distilled-600M
tokenizer = AutoTokenizer.from_pretrained("facebook/nllb-200-distilled-1.3B", src_lang="fra_Latn")
model = AutoModelForSeq2SeqLM.from_pretrained("facebook/nllb-200-distilled-1.3B").to('cuda')
batch_size = 20
translated = []
def add_result(i, field, value):
while len(translated) <= i:
translated.append({})
if '/' in field:
k1, k2 = field.split('/')
if k1 not in translated[i]:
translated[i][k1] = {}
translated[i][k1][k2] = value
else:
translated[i][field] = value
def run_translation(batch, batch_info):
inputs = tokenizer(batch, return_tensors="pt", padding=True).to('cuda')
translated_tokens = model.generate(**inputs, forced_bos_token_id=tokenizer.lang_code_to_id["eng_Latn"], max_length=max(len(x) for x in inputs) * 2)
detokenized = tokenizer.batch_decode(translated_tokens, skip_special_tokens=True)
for (i, field), translation in zip(batch_info, detokenized):
add_result(i, field, translation)
batch = []
batch_info = []
for i, instance in enumerate(tqdm(data, desc='translating', total=len(data))):
for k, v in instance.items():
if k not in ['question', 'answer']:
add_result(i, k, v)
else:
add_result(i, k, None)
batch.append(instance['question'])
batch_info.append((i, 'question'))
for k, v in instance['answers'].items():
batch.append(v)
batch_info.append((i, 'answers/' + k))
if len(batch) >= batch_size:
run_translation(batch, batch_info)
batch = []
batch_info = []
else:
if len(batch) > 0:
run_translation(batch, batch_info)
with open(output_filename, 'w') as fp:
fp.write(json.dumps(translated, indent = 4))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment