Skip to content
Snippets Groups Projects
Commit 2b70480f authored by Tania Bladier's avatar Tania Bladier
Browse files

debug

parent cba95aff
No related branches found
No related tags found
1 merge request!1debugged pytorch implementation
......@@ -13,7 +13,6 @@ dev_cff="./out/dev_${lang}.cff"
dev_word_limit="5000"
test_conll="../data/test_${lang}.conllu"
#test_conll="../data/test_${lang}_5sent.conllu"
test_mcf="./out/test_${lang}_pgle.mcf"
test_mcf_hyp="./out/test_${lang}_hyp.mcf"
......
......@@ -39,8 +39,6 @@ train_fr_file = sys.argv[7]
dev_fr_file = sys.argv[8]
##########################################################################
#dev_fr_file = '/home/taniabladier/Programming/AMU/tbp/expe/out/dev_fr1.cff'
#train_fr_file = '/home/taniabladier/Programming/AMU/tbp/expe/out/train_fr1.cff'
n_classes, maxlen, n_symbols, symbol_to_idx, idx_to_symbol, class_to_idx, idx_to_class, _ = make_pytorch_dicts(dev_fr_file, train_fr_file)
##########################################################################
......@@ -102,16 +100,16 @@ class SimpleLSTM(nn.Module):
input_size = n_symbols
input_size = 133 #n_symbols
hidden_size = 128
output_size = n_classes
output_size = 75 #n_classes
print('input output', input_size, output_size)
model = SimpleLSTM(input_size, hidden_size, output_size)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
#model_file = '/home/taniabladier/Programming/AMU/tbp/expe/out/fr.pytorch'
#mcf_file = '/home/taniabladier/Programming/AMU/tbp/expe/out/test_fr_pgle.mcf'
checkpoint = torch.load(model_file, map_location=torch.device('cpu'), weights_only=False)
model.load_state_dict(checkpoint['model_state_dict'])
......@@ -130,8 +128,8 @@ model.eval();
#model = load_model(model_file)
inputSize = featModel.getInputSize()
outputSize = moves.getNb()
#inputSize = featModel.getInputSize()
#outputSize = moves.getNb()
c = Config(mcf_file, mcd, dicos)
......@@ -154,7 +152,7 @@ while c.getBuffer().readNextSentence() and numWords < wordsLimit :
###############
inputVector = ' '.join(str(x) for x in inputVector)
inputVector = encode_x_batch([inputVector], symbol_to_idx, n_symbols)
inputVector = encode_x_batch([inputVector], symbol_to_idx, 133) #n_symbols)
inputVector = torch.from_numpy(inputVector).float().to(device)
......@@ -209,10 +207,6 @@ while c.getBuffer().readNextSentence() and numWords < wordsLimit :
for i in range(1, c.getBuffer().getLength()):
w = c.getBuffer().getWord(i)
w.affiche(mcd)
#print('')
#print('5555\t', w.getFeat("GOV"), end='\t')
#print('\n5566\t', w.getFeat("LABEL"))
#print('\n5566\t', w.getFeat("POS"))
numSent += 1
# if numSent % 10 == 0:
......
......@@ -5,7 +5,7 @@ import torch.nn as nn
import torch.nn.functional as F
from pytorch_utils import *
from plot_lib import *
import os
"""## 1. Reading Data Files"""
......@@ -67,17 +67,9 @@ n_classes, maxlen, n_symbols, symbol_to_idx, idx_to_symbol, \
train_items_list, train_labels_list, train_inputSize, train_outputSize = readFile_cff(cffTrainFileName)
dev_items_list, dev_labels_list, dev_inputSize, dev_outputSize = readFile_cff(cffDevFileName)
#print(len(train_items_list))
#print(train_items_list[:3])
#print(len(dev_items_list))
train_data_gen = preprocess_data(train_items_list[:800000], train_labels_list[:800000], batch_size, symbol_to_idx, class_to_idx, train_inputSize, train_outputSize)#, n_symbols, n_classes)
dev_data_gen = preprocess_data(dev_items_list[:200000], dev_labels_list[:200000], batch_size, symbol_to_idx, class_to_idx, train_inputSize, train_outputSize)#, n_symbols, n_classes)
#train_data_gen = preprocess_data(train_items_list[:800], train_labels_list[:800])
#dev_data_gen = preprocess_data(dev_items_list[:200], dev_labels_list[:200])
#print(len(train_items_list))
"""## 2. Defining the Model"""
......@@ -87,9 +79,9 @@ dev_data_gen = preprocess_data(dev_items_list[:200000], dev_labels_list[:200000
torch.manual_seed(1)
# Setup the RNN and training settings
input_size = train_inputSize #n_symbols
input_size = 133 #train_inputSize #n_symbols
hidden_size = 128
output_size = train_outputSize #n_classes
output_size = 75 #train_outputSize #n_classes
class SimpleMLP(nn.Module):
def __init__(self, input_size, output_size):
......@@ -157,44 +149,6 @@ class SimpleLSTM(nn.Module):
c = torch.cat(c_list)
return h, c
class BiLSTM(nn.Module):
def __init__(self, input_size, hidden_size, output_size):
super().__init__()
#self.hidden_size = 64
self.lstm = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True)
self.linear = nn.Linear(hidden_size*4 , output_size)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.5)
self.out = nn.Linear(output_size, 1)
def forward(self, x):
h_lstm = self.lstm(x)[0]
avg_pool = torch.mean(h_lstm, 1)
max_pool, _ = torch.max(h_lstm, 1)
#print("avg_pool", avg_pool.size())
#print("max_pool", max_pool.size())
conc = torch.cat(( avg_pool, max_pool), 1)
conc = self.relu(self.linear(conc))
conc = self.dropout(conc)
out = self.out(conc)
return out
def get_states_across_time(self, x):
h_c = None
h_list, c_list = list(), list()
with torch.no_grad():
for t in range(out.size(1)):
h_c = self.lstm(x[:, [t], :], h_c)[1]
h_list.append(h_c[0])
c_list.append(h_c[1])
h = torch.cat(h_list)
c = torch.cat(c_list)
return h, c
"""## 3. Defining the Training Loop"""
......@@ -359,7 +313,7 @@ def train_and_test(model, train_data_gen, test_data_gen, criterion, optimizer, m
ax.set_xlabel('epoch', fontsize=12)
ax.set_ylabel(metric, fontsize=12)
ax.legend(['Train', 'Test'], loc='best')
plt.savefig('./expe/out/loss_accuracy.png')
plt.savefig(os.path.abspath('..') + '/expe/out/loss_accuracy.png')
#plt.show()
return model
......@@ -425,28 +379,10 @@ max_epochs = 30
# Train the model
model = train_and_test(model, train_data_gen, dev_data_gen, criterion, optimizer, max_epochs)
for parameter_group in list(model.parameters()):
print(parameter_group.size())
""" 6b. BiLSTM
#for parameter_group in list(model.parameters()):
# print(parameter_group.size())
"""
"""
model = BiLSTM(input_size, hidden_size, output_size)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.RMSprop(model.parameters(), lr=0.001)
max_epochs = 30
# Train the model
model = train_and_test(model, train_data_gen, dev_data_gen, criterion, optimizer, max_epochs)
for parameter_group in list(model.parameters()):
print(parameter_group.size())
"""
"""## 7. Model Evaluation"""
import collections
......@@ -527,6 +463,7 @@ def evaluate_model(model, seed=9001, verbose=False):
evaluate_model(model)
""" Visualize Model """
# Get hidden (H) and cell (C) batch state given a batch input (X)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment