Skip to content

update_metadata

Update metadata files in given directories.

find_xsl_files(directories)

Find .xsl files found in directories.

Parameters:

Name Type Description Default
directories list[str]

paths to directories

required

Yields:

Type Description
str

path to an .xsl file

Source code in /home/anders/projects/CorpusTools/corpustools/update_metadata.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
def find_xsl_files(directories):
    """Find .xsl files found in directories.

    Args:
        directories (list[str]): paths to directories

    Yields:
        (str): path to an .xsl file
    """
    for directory in directories:
        for root, _, files in os.walk(directory):
            for file_ in files:
                if file_.endswith(".xsl"):
                    yield os.path.join(root, file_)

main()

Update metadata files.

Source code in /home/anders/projects/CorpusTools/corpustools/update_metadata.py
128
129
130
131
132
133
134
135
136
137
def main():
    """Update metadata files."""
    args = parse_options()
    for xsl_file in find_xsl_files(args.directories):
        try:
            update_xsl_file(xsl_file)
        except (AttributeError, UserWarning, xslsetter.XsltError) as error:
            print(xsl_file)
            print(error)
            raise SystemExit(4)

parse_options()

Parse the commandline options.

Returns:

Type Description
argparse.Namespace

the parsed commandline arguments

Source code in /home/anders/projects/CorpusTools/corpustools/update_metadata.py
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
def parse_options():
    """Parse the commandline options.

    Returns:
        (argparse.Namespace): the parsed commandline arguments
    """
    parser = argparse.ArgumentParser(
        parents=[argparse_version.parser],
        description="Update metadata files to look like XSL-template.xsl, "
        "but with original content. This script exists because the "
        "XSL-template is updated with new variables and documentation. "
        "This script will propagate these changes to existing "
        "metadata files.",
    )

    parser.add_argument(
        "directories",
        nargs="+",
        help="Directories where metadata files should be updated.",
    )

    args = parser.parse_args()

    return args

update_xsl_file(filename)

Update the xsl file with XSL-template.xsl.

Parameters:

Name Type Description Default
filename str

path to a metadata file.

required
Source code in /home/anders/projects/CorpusTools/corpustools/update_metadata.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
def update_xsl_file(filename):
    """Update the xsl file with XSL-template.xsl.

    Args:
        filename (str): path to a metadata file.
    """
    avoid_names = [
        "danlang",
        "englang",
        "finlang",
        "fkvlang",
        "gerlang",
        "isllang",
        "kallang",
        "nnolang",
        "noblang",
        "smalang",
        "smelang",
        "smjlang",
        "swelang",
        "kpvlang",
        "ruslang",
        "multilingual",
        "columns",
        "parallel_texts",
        "lower",
    ]

    orig_metadata = xslsetter.MetadataHandler(filename)
    template_metadata = xslsetter.MetadataHandler(TEMPLATE_PATH)

    for language in orig_metadata.mlangs:
        template_metadata.set_mlang(language)

    for name, value in orig_metadata.get_set_variables():
        if name not in avoid_names:
            if name.startswith("mlang_"):
                template_metadata.set_mlang(name.replace("mlang_", ""))
            elif name.startswith("para_"):
                template_metadata.set_parallel_text(name.replace("para_", ""), value)
            elif name == "excluded":
                template_metadata.set_variable("skip_pages", value)
            else:
                template_metadata.set_variable(name, value)

    for language, location in orig_metadata.get_parallel_texts().items():
        template_metadata.set_parallel_text(language, location)

    template_element = template_metadata.tree.getroot()
    for template in orig_metadata.xsl_templates:
        template_element.append(template)

    orig_metadata.tree = template_metadata.tree
    orig_metadata.write_file()