Skip to content

tmx_cat

main()

Analyse files in the given directories.

Source code in corpustools/tmx_cat.py
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
def main():
    """Analyse files in the given directories."""
    args = parse_options()

    tmx_root = etree.Element("tmx")
    tmx_header = make_header(tmx_root)

    make_body(tmx_root, tmx_header, args.tmx_dir)

    concatenated_file = Path("concatenated.tmx")
    concatenated_file.write_bytes(
        etree.tostring(
            tmx_root, pretty_print=True, encoding="UTF-8"
        )
    )

    print(f"Concatenated TMX file written to: {concatenated_file}")

make_header(tmx_root)

Create TMX header element.

Source code in corpustools/tmx_cat.py
24
25
26
27
28
29
30
31
32
def make_header(tmx_root: etree.Element) -> etree.Element:
    """Create TMX header element."""
    tmx_header = etree.SubElement(tmx_root, "header")
    tmx_header.set("creationtool", "tmx_cat")
    tmx_header.set("segtype", "sentence")
    tmx_header.set("o-tmf", "OmegaT TMX")
    tmx_header.set("adminlang", "en-us")
    tmx_header.set("datatype", "plaintext")
    return tmx_header

parse_options()

Parse the given options.

Source code in corpustools/tmx_cat.py
10
11
12
13
14
15
16
17
18
19
20
21
def parse_options():
    """Parse the given options."""
    parser = argparse.ArgumentParser(
        parents=[argparse_version.parser], description="Analyse files in parallel."
    )

    parser.add_argument(
        "tmx_dir",
        help="directory containing the TMX files to concatenate",
    )

    return parser.parse_args()