Skip to content

test_corpuspath

Test the naming scheme of corpus files.

name(module, lang, extension, goallang)

Produce a path to a corpus file.

Parameters:

Name Type Description Default
module str

module of the corpus file

required
lang str

language of the corpus file

required
extension str

extension of the corpus file

required
goallang str

goallang of tmx file

required

Returns:

Type Description
str

path to the corpus file

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_corpuspath.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
def name(module, lang, extension, goallang):
    """Produce a path to a corpus file.

    Args:
        module (str): module of the corpus file
        lang (str): language of the corpus file
        extension (str): extension of the corpus file
        goallang (str): goallang of tmx file

    Returns:
        (str): path to the corpus file
    """
    corpusdir = f"corpus-{lang}-orig" if module == "orig" else f"corpus-{lang}"
    return (
        Path(HERE)
        / corpusdir
        / f"{module if module != 'orig' else ''}"
        / f"{goallang if module.endswith('tmx') else ''}"
        / f"subdir/subsubdir/filename.html{extension}"
    )

test_path_to_orig(filename)

Check that the corpus file naming scheme works as it should.

Parameters:

Name Type Description Default
filename str

the filename to check

required

Raises:

Type Description
AssertionError

is raised if the result is not what is expected

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_corpuspath.py
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
@pytest.mark.parametrize(
    "filename",
    [
        (name("orig", "sme", "", "")),
        (name("orig", "sme", ".xsl", "")),
        (name("orig", "sme", ".log", "")),
        (name("correct-no-gs/converted", "sme", ".xml", "")),
        (name("goldstandard/converted", "sme", ".xml", "")),
        (name("stable/converted", "sme", ".xml", "")),
        (name("stable/tmx", "sme", ".tmx", "nob")),
        (name("analysed", "sme", ".xml", "")),
        (name("converted", "sme", ".xml", "")),
        (name("korp_mono", "sme", ".xml", "")),
        (name("korp_tmx", "sme", ".tmx", "nob")),
        (name("tmx", "sme", ".tmx", "nob")),
    ],
)
def test_path_to_orig(filename):
    """Check that the corpus file naming scheme works as it should.

    Args:
        filename (str): the filename to check

    Raises:
        AssertionError: is raised if the result is not what is expected
    """
    assert corpuspath.make_corpus_path(filename).orig == name(
        module="orig", lang="sme", extension="", goallang=""
    )