Skip to content

counter

This file contains classes to convert files to the giellatekno xml format.

count_files(path)

Count files in the given language.

Source code in /home/anders/projects/CorpusTools/corpustools/counter.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def count_files(path):
    """Count files in the given language."""
    today = date.today()

    cm = convertermanager.ConverterManager(False, False)
    cm.collect_files([path])
    counter = defaultdict(int)
    lacking_files = defaultdict(set)
    for f in cm.files:
        c = converter.Converter(f)
        if os.path.exists(c.names.converted):
            counter["con"] += 1
        else:
            lacking_files["con"].add(c.names.orig)

        for fst in ["xfst", "hfst"]:
            todays_analysed = f"/analysed.{today}/{fst}/"
            if os.path.exists(c.names.analysed.replace("/analysed/", todays_analysed)):
                counter[fst] += 1
        else:
            if os.path.exists(c.names.converted):
                lacking_files["ana"].add(c.names.converted)
    return (
        len(cm.files),
        counter["con"],
        counter["xfst"],
        counter["hfst"],
        lacking_files,
    )

parse_options()

Parse the commandline options.

Returns:

Type Description
argparse.Namespace

the parsed commandline arguments

Source code in /home/anders/projects/CorpusTools/corpustools/counter.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
def parse_options():
    """Parse the commandline options.

    Returns:
        (argparse.Namespace): the parsed commandline arguments
    """
    parser = argparse.ArgumentParser(
        parents=[argparse_version.parser],
        description="Count corpus files. List them if called for.",
    )

    parser.add_argument(
        "--listfiles",
        action="store_true",
        help="List lacking converted and analysed files.",
    )

    args = parser.parse_args()

    return args