Normalise the files in the given directory.
main()
Normalise filenames.
Source code in corpustools/normalise_filenames.py
| def main():
"""Normalise filenames."""
for target_dir in normalise_parse_args().target_dirs:
normalise(Path(target_dir))
|
Normalise the filenames in the corpuses.
Parameters:
| Name |
Type |
Description |
Default |
target_dir
|
str
|
directory where filenames should be normalised
|
required
|
Source code in corpustools/normalise_filenames.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70 | def normalise(target_dir: Path):
"""Normalise the filenames in the corpuses.
Args:
target_dir (str): directory where filenames should be normalised
"""
print(f"Normalising names in {target_dir}")
files = (
root / file_
for root, _, filenames in target_dir.walk()
for file_ in filenames
if ".git" not in str(root) and not file_.endswith(".xsl")
)
normalised_paths = (
(
make_corpus_path(str(file_.with_name(normalise_filename(file_.name)))),
make_corpus_path(str(file_)),
)
for file_ in files
if normalise_filename(file_.name) != file_.name
)
for normalised_path, orig_corpus_path in normalised_paths:
print(f"\t\tmove {orig_corpus_path.orig} -> {normalised_path.orig}")
orig_corpus_path.orig.rename(normalised_path.orig)
if orig_corpus_path.xsl.exists():
orig_corpus_path.xsl.rename(normalised_path.xsl)
if orig_corpus_path.converted.exists():
orig_corpus_path.converted.rename(normalised_path.converted)
for parallel_path in orig_corpus_path.parallels():
if parallel_path is not None and parallel_path.exists():
parallel_corpuspath = make_corpus_path(str(parallel_path))
parallel_corpuspath.metadata.set_parallel_text(
normalised_path.lang, normalised_path.orig.name
)
parallel_corpuspath.metadata.write_file()
if orig_corpus_path.tmx(parallel_corpuspath.lang).exists():
orig_corpus_path.tmx(parallel_corpuspath.lang).rename(
normalised_path.tmx(parallel_corpuspath.lang)
)
|
Parse the commandline options.
Returns:
| Type |
Description |
Namespace
|
the parsed commandline arguments
|
Source code in corpustools/normalise_filenames.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93 | def normalise_parse_args():
"""Parse the commandline options.
Returns:
(argparse.Namespace): the parsed commandline arguments
"""
parser = argparse.ArgumentParser(
parents=[argparse_version.parser],
description="Program to normalise names in given directories. "
"The filenames are downcased, non ascii characters are replaced "
"by ascii ones and some unwanted characters are removed.",
)
parser.add_argument(
"target_dirs",
nargs="+",
help="The directory/ies where filenames should be normalised.",
)
args = parser.parse_args()
return args
|