Skip to content
Snippets Groups Projects
Select Git revision
  • b70459881ee680ab5772871a5ceaa0db6366055e
  • master default protected
  • erased
  • states
  • negatives
  • temp
  • negativeExamples
  • Rl
8 results

readMCD.py

Blame
  • bert-minimal.py 731 B
    #!/usr/bin/env python3
    
    import torch
    from transformers import AutoModel, AutoTokenizer
    
    name  = 'almanach/camembert-base'
    #sent  = "Des poids lourds et engins en feu \
    #         dans une entreprise en Vendée ."
    #sent = "La gare routière attend toujours ses illuminations ."
    sent = "Quelle surprise ! Arturo a la covid"
    tok   = AutoTokenizer.from_pretrained(name)
    model = AutoModel.from_pretrained(name)
    
    tok_sent = tok(sent.split(), is_split_into_words=True, 
                   return_tensors='pt')
    tok_ids  = tok_sent['input_ids'][0]
    decoded = tok.convert_ids_to_tokens(tok_ids) 
    print(decoded)
    print(tok_sent.word_ids())
    with torch.no_grad(): # no training
      embeds = model(**tok_sent)['last_hidden_state'][0]
    print(embeds.shape)