This file contains classes to convert files to the giellatekno xml format.
Count files in the given language.
Source code in /home/anders/projects/CorpusTools/corpustools/counter.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81 | def count_files(path):
"""Count files in the given language."""
today = date.today()
cm = convertermanager.ConverterManager(False, False)
cm.collect_files([path])
counter = defaultdict(int)
lacking_files = defaultdict(set)
for f in cm.files:
c = converter.Converter(f)
if os.path.exists(c.names.converted):
counter["con"] += 1
else:
lacking_files["con"].add(c.names.orig)
for fst in ["xfst", "hfst"]:
todays_analysed = f"/analysed.{today}/{fst}/"
if os.path.exists(c.names.analysed.replace("/analysed/", todays_analysed)):
counter[fst] += 1
else:
if os.path.exists(c.names.converted):
lacking_files["ana"].add(c.names.converted)
return (
len(cm.files),
counter["con"],
counter["xfst"],
counter["hfst"],
lacking_files,
)
|
Parse the commandline options.
Returns:
Type |
Description |
argparse.Namespace
|
the parsed commandline arguments |
Source code in /home/anders/projects/CorpusTools/corpustools/counter.py
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50 | def parse_options():
"""Parse the commandline options.
Returns:
(argparse.Namespace): the parsed commandline arguments
"""
parser = argparse.ArgumentParser(
parents=[argparse_version.parser],
description="Count corpus files. List them if called for.",
)
parser.add_argument(
"--listfiles",
action="store_true",
help="List lacking converted and analysed files.",
)
args = parser.parse_args()
return args
|