#!/usr/bin/env python3

import torch
from transformers import AutoModel, AutoTokenizer

name  = 'almanach/camembert-base'
#sent  = "Des poids lourds et engins en feu \
#         dans une entreprise en Vendée ."
sent = "La gare routière attend toujours ses illuminations ."
#sent = "Quelle surprise ! Arturo a la covid"
tok   = AutoTokenizer.from_pretrained(name)
model = AutoModel.from_pretrained(name)

tok_sent = tok(sent.split(), is_split_into_words=True, 
               return_tensors='pt')
tok_ids  = tok_sent['input_ids'][0]
decoded = tok.convert_ids_to_tokens(tok_ids) 
print(decoded)
print(tok_sent.word_ids())
with torch.no_grad(): # no training
  embeds = model(**tok_sent)['last_hidden_state'][0]
print(embeds.shape)