Skip to content
Snippets Groups Projects
Commit edd42965 authored by Carlos Ramisch's avatar Carlos Ramisch
Browse files

Transfer theoretical course (CM) code snippets and examples to this repo

parent 127f0b3b
No related branches found
No related tags found
No related merge requests found
#!/usr/bin/env python3
from lib.conllulib import CoNLLUReader
test="""# global.columns = ID FORM parseme:ne
1 Le 1:PROD
2 Petit 1
3 Prince 1
4 de *
5 Saint-Exupéry 2:PERS
6 est *
7 entré *
8 à *
9 l' *
10 École 3:ORG
11 Jules-Romains 3"""
for sent in CoNLLUReader.readConlluStr(test):
print(CoNLLUReader.to_bio(sent))
#['B-PROD', 'I-PROD', 'I-PROD', 'O', 'B-PERS', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG']
s1 = ["B-PERS", "I-PERS", "I-PERS", "O", "B-LOC", "I-LOC"]
s2 = ["I-PERS", "B-PERS", "I-PERS", "O", "I-LOC"]
print(CoNLLUReader.from_bio(s1, bio_style='bio'))
# ['1:PERS', '1', '1', '*', '2:LOC', '2']
print(CoNLLUReader.from_bio(s1, bio_style='io'))
# WARNING: Got B tag in spite of 'io' bio_style: interpreted as I
# WARNING: Got B tag in spite of 'io' bio_style: interpreted as I
# ['1:PERS', '1', '1', '*', '2:LOC', '2']
print(CoNLLUReader.from_bio(s2, bio_style='bio'))
# WARNING: Invalid I-initial tag I-PERS converted to B
# WARNING: Invalid I-initial tag I-LOC converted to B
# ['1:PERS', '2:PERS', '2', '*', '3:LOC']
print(CoNLLUReader.from_bio(s2, bio_style='io'))
# WARNING: Got B tag in spite of 'io' bio_style: interpreted as I
# ['1:PERS', '1', '1', '*', '2:LOC']
#!/usr/bin/env python
################################################################################
### CM1 exercise to get familiar with conllu format and library
import conllu, sys
conllufile = open(sys.argv[1], 'r', encoding='UTF-8')
slens, wlens = [], []
for sent in conllu.parse_incr(conllufile):
slens.append(len(sent))
wlens.extend([len(token['form']) for token in sent])
print("Avg sent len={:.2f}".format(sum(slens)/len(slens)))
print("Avg word len={:.2f}".format(sum(wlens)/len(wlens)))
import matplotlib.pyplot as plt
f,(a1,a2) = plt.subplots(1,2)
a1.hist(slens,bins=20)
a1.set_title("Sentence length")
a2.hist(wlens,bins=20)
a2.set_title("Word length")
plt.show()
################################################################################
### CM3 exercise to manipulate morphological features
from collections import Counter, defaultdict
conllufile = open(sys.argv[1], 'r', encoding='UTF-8')
slens, wlens = [], []
feats_pos_dict = defaultdict(lambda: Counter())
feats_dict = defaultdict(lambda: Counter())
no_feat = total_words = total_features = 0
for sent in conllu.parse_incr(conllufile):
for w in sent :
total_words += 1
if w["feats"] :
for (key,value) in w["feats"].items():
feats_pos_dict[w["upos"]][key] += 1
feats_dict[key][value] += 1
total_features += 1
else:
no_feat += 1
print(f"Number of feature keys: {len(feats_dict)}")
for feat in feats_dict:
print(f" {feat}: {list(feats_dict[feat])}")
print(' ' * 9 + '|' + '|'.join([f"{pos:5}" for pos in feats_pos_dict])+'|')
for feat in feats_dict:
print(f"{feat:9}|" + '|'.join([f"{feats_pos_dict[pos][feat] if feats_pos_dict[pos][feat] else ' ':5}" for pos in feats_pos_dict]) + '|')
print(f"Words with no feature: {no_feat}/{total_words} ({no_feat/total_words*100:.2f}%)")
print(f"Average features per word: {total_features/total_words:.2f}")
#!/usr/bin/env python
################################################################################
# defaultdict examples
from collections import defaultdict
word_count = defaultdict(lambda: 0) # return 0 if absent key
sentence = "the man and the dog and the tree"
for word in sentence.split():
word_count[word] += 1
print(dict(word_count))
# {'the': 3, 'man': 1, 'and': 2, 'dog': 1, 'tree': 1}
print(word_count["hi"])
# 0
print(dict(word_count))
# {'the': 3, 'man': 1, 'and': 2, 'dog': 1, 'tree': 1, 'hi': 0}
################################################################################
# Embedding layer examples
from torch import LongTensor
import torch.nn as nn
embed = nn.Embedding(20, 4, padding_idx=0)
print(embed.weight[:2])
# tensor([[ 0.0000, 0.0000, 0.0000, 0.0000],
# [-0.9336, -0.0982, 0.2726, 1.9872], grad_fn=<...>)
print(embed(LongTensor([3,4])).shape)
# torch.Size([2, 4])
print(embed(LongTensor([3, 4, 3, 0])).shape)
# torch.Size([4, 4])
print(embed(LongTensor([[3, 4, 3], [0, 1, 1]])).shape) # batched
# torch.Size([2, 3, 4])
################################################################################
# RNN unit examples
gru = nn.GRU(4, 10, batch_first=True)
emb = nn.Embedding(20, 4, padding_idx=0)
x = emb(LongTensor([[3, 4, 3, 2, 5], # B=2 (batch)
[1, 12, 1, 0, 0]])) # L=5 (timesteps)
print(x.shape)
# torch.Size([2, 5, 4])
y = gru(x)
print(y[0].shape)
# torch.Size([2, 5, 10])
print(y[1].shape)
# torch.Size([1, 2, 10]) # notice batch is dim1
################################################################################
# TensorDataset/DataLoader examples
from torch.utils.data import TensorDataset, DataLoader
import torch
x = torch.rand(7,3)
y = LongTensor([[i] for i in range(7)])
tds = TensorDataset(x,y)
print(len(tds))
# 7
print([(e.shape, e.dtype) for e in tds[5]])
# [(torch.Size([3]), torch.float32), (torch.Size([1]), torch.int64)]
dl = DataLoader(tds, batch_size=2)
print(next(iter(dl))[0].shape)
#torch.Size([2, 3])
print(next(iter(dl))[1].shape)
#torch.Size([2, 1])
for (bx, by) in dl :
print(bx.shape, by.shape)
################################################################################
This diff is collapsed.
#!/usr/bin/env python3
n_appels_rec = n_appels_dynprog = 0
def fiboRec(n):
global n_appels_rec
n_appels_rec += 1
if n <= 1:
return n
else:
return fiboRec(n-1) + fiboRec(n-2)
################################################################################
def fiboDynProg(n):
global n_appels_dynprog
fib = [0] * (n+1)
fib[1] = 1
n_appels_dynprog += 2
for i in range(2,n+1):
n_appels_dynprog += 1
fib[i] = fib[i-1] + fib[i-2]
return fib[n]
for n in range(1,20):
n_appels_rec = n_appels_dynprog = 0
resultRec = fiboRec(n)
resultDynProg = fiboDynProg(n)
print(f"n={n:2} fib_rec(n)={resultRec:4}, n_appels_rec={n_appels_rec:5} fib_dp(n)={resultDynProg:4}, n_appels_dp={n_appels_dynprog:5}")
../lib
\ No newline at end of file
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment