Something went wrong on our end
Select Git revision
bert-minimal.py 731 B
#!/usr/bin/env python3
import torch
from transformers import AutoModel, AutoTokenizer
name = 'almanach/camembert-base'
#sent = "Des poids lourds et engins en feu \
# dans une entreprise en Vendée ."
#sent = "La gare routière attend toujours ses illuminations ."
sent = "Quelle surprise ! Arturo a la covid"
tok = AutoTokenizer.from_pretrained(name)
model = AutoModel.from_pretrained(name)
tok_sent = tok(sent.split(), is_split_into_words=True,
return_tensors='pt')
tok_ids = tok_sent['input_ids'][0]
decoded = tok.convert_ids_to_tokens(tok_ids)
print(decoded)
print(tok_sent.word_ids())
with torch.no_grad(): # no training
embeds = model(**tok_sent)['last_hidden_state'][0]
print(embeds.shape)