Skip to content
Snippets Groups Projects
Select Git revision
  • 30-perform-data-augmentation
  • develop default
  • 29-add-apply_func_on_time_window-time-version
  • main protected
  • v0.2.0-alpha
  • v0.1.0-alpha
6 results

VERSION

Blame
  • bert-minimal.py 731 B
    #!/usr/bin/env python3
    
    import torch
    from transformers import AutoModel, AutoTokenizer
    
    name  = 'almanach/camembert-base'
    #sent  = "Des poids lourds et engins en feu \
    #         dans une entreprise en Vendée ."
    #sent = "La gare routière attend toujours ses illuminations ."
    sent = "Quelle surprise ! Arturo a la covid"
    tok   = AutoTokenizer.from_pretrained(name)
    model = AutoModel.from_pretrained(name)
    
    tok_sent = tok(sent.split(), is_split_into_words=True, 
                   return_tensors='pt')
    tok_ids  = tok_sent['input_ids'][0]
    decoded = tok.convert_ids_to_tokens(tok_ids) 
    print(decoded)
    print(tok_sent.word_ids())
    with torch.no_grad(): # no training
      embeds = model(**tok_sent)['last_hidden_state'][0]
    print(embeds.shape)