Skip to content

tmx

Classes and functions to make and handle Translation Memory eXchange files.

add_filename_id(filename)

Add the tmx filename as an prop element in the header.

Source code in /home/anders/projects/CorpusTools/corpustools/tmx.py
42
43
44
45
46
47
48
def add_filename_id(filename):
    """Add the tmx filename as an prop element in the header."""
    prop = etree.Element("prop")
    prop.attrib["type"] = "x-filename"
    prop.text = filename

    return prop

main()

Parallelise files.

Source code in /home/anders/projects/CorpusTools/corpustools/tmx.py
136
137
138
139
140
141
def main():
    """Parallelise files."""
    args = parse_options()

    for source in corpuspath.collect_files(args.sources, suffix=".tmx"):
        tmx2html(source)

make_tmx(file1_name, file1_lang, file2_lang, sentence_pairs)

Make tmx file based on the output of the aligner.

Source code in /home/anders/projects/CorpusTools/corpustools/tmx.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
def make_tmx(file1_name, file1_lang, file2_lang, sentence_pairs):
    """Make tmx file based on the output of the aligner."""
    tmx = etree.Element("tmx")
    header = make_tmx_header(
        file1_name,
        file1_lang,
    )
    tmx.append(header)

    body = etree.SubElement(tmx, "body")
    for line1, line2 in zip(*sentence_pairs):
        transl_unit = make_tu(line1, file1_lang, line2, file2_lang)
        body.append(transl_unit)

    return tmx

make_tmx_header(filename, lang)

Make a tmx header based on the lang variable.

Source code in /home/anders/projects/CorpusTools/corpustools/tmx.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
def make_tmx_header(filename, lang):
    """Make a tmx header based on the lang variable."""
    header = etree.Element("header")

    # Set various attributes
    header.attrib["segtype"] = "sentence"
    header.attrib["o-tmf"] = "OmegaT TMX"
    header.attrib["adminlang"] = "en-US"
    header.attrib["srclang"] = lang
    header.attrib["datatype"] = "plaintext"

    header.append(add_filename_id(filename))

    return header

make_tu(line1, file1_lang, line2, file2_lang)

Make a tmx tu element based on line1 and line2 as input.

Source code in /home/anders/projects/CorpusTools/corpustools/tmx.py
32
33
34
35
36
37
38
39
def make_tu(line1, file1_lang, line2, file2_lang):
    """Make a tmx tu element based on line1 and line2 as input."""
    transl_unit = etree.Element("tu")

    transl_unit.append(make_tuv(line1, file1_lang))
    transl_unit.append(make_tuv(line2, file2_lang))

    return transl_unit

make_tuv(line, lang)

Make a tuv element given an input line and a lang variable.

Source code in /home/anders/projects/CorpusTools/corpustools/tmx.py
84
85
86
87
88
89
90
91
92
def make_tuv(line, lang):
    """Make a tuv element given an input line and a lang variable."""
    tuv = etree.Element("tuv")
    tuv.attrib["{http://www.w3.org/XML/1998/namespace}lang"] = lang
    seg = etree.Element("seg")
    seg.text = line.strip()
    tuv.append(seg)

    return tuv

parse_options()

Parse the commandline options.

Returns:

Type Description
argparse.Namespace

the parsed commandline arguments

Source code in /home/anders/projects/CorpusTools/corpustools/tmx.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
def parse_options():
    """Parse the commandline options.

    Returns:
        (argparse.Namespace): the parsed commandline arguments
    """
    parser = argparse.ArgumentParser(
        parents=[argparse_version.parser], description="Convert tmx files to html"
    )

    parser.add_argument(
        "sources", nargs="+", help="Files or directories to search for tmx files"
    )

    args = parser.parse_args()
    return args

tmx2html(filename)

Turn a tmx file into an html file.

Parameters:

Name Type Description Default
filename str

name of a tmx file

required
Source code in /home/anders/projects/CorpusTools/corpustools/tmx.py
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def tmx2html(filename):
    """Turn a tmx file into an html file.

    Args:
        filename (str): name of a tmx file
    """
    tmx = etree.parse(filename)
    html2tmx_transformer = etree.XSLT(
        etree.parse(os.path.join(HERE, "xslt/tmx2html.xsl"))
    )

    html_name = filename.with_name(filename.name + ".html")
    html_name.write_bytes(
        etree.tostring(
            html2tmx_transformer(tmx),
            pretty_print=True,
            encoding="utf-8",
            xml_declaration=True,
        )
    )
    print(f"Wrote {html_name}")