"...Multiview/Fusion/Methods/LateFusion.py" did not exist on "c8524de96eb156eec0d1ad855aa3ad920cf8ef3a"
Select Git revision
metric.py 1.73 KiB
"""
Basic sketch of the `metric` script.
The script should be capable of performing two different tasks.
Given two corpora $C_1$ and $C_2$:
1. Calculate individual "scores" based on a custom metric.
2. Calculate a similarity score between the two corpora based on this metric.
The metric will be calculated based on the distributions of various linguistic
features, all of which should be obtainable given a corpus' CoNLL-U file.
In order to test the metric, we first apply it to pre-existing corpora,
with the long-term goal of applying it to the task of generated text
evaluation.
Some information on possible evaluation metrics for generated text can be
found in the README file of this repository.
"""
FILEPATH_1 = ""
FILEPATH_2 = ""
def read_conllu(filepath):
"""
Read a corpus (text) from a .conllu file.
See also:
- [CoNLL-U Format](https://universaldependencies.org/format.html]
- [Python package](https://pypi.org/project/conllu/)
"""
raise NotImplementedError()
def calculate_abs_score(corpus):
"""
Given some CoNLL-U corpus, calculate its "score"
based on the distributions of various linguistic features.
"""
raise NotImplementedError()
def calculate_rel_score(corpus_1, corpus_2):
"""
Given two CoNLL-U corpora, calculate a "relative score",
i.e., a similarity metric between the two texts based on
the compared distributions of various linguistic features.
"""
raise NotImplementedError()
def main():
corpus_1 = read_conllu(FILEPATH_1)
corpus_2 = read_conllu(FILEPATH_2)
score_1 = calculate_abs_score(corpus_1)
score_2 = calculate_abs_score(corpus_2)
similarity = calculate_rel_score(corpus_1, corpus_2)
if __name__ == "__main__":
main()