Skip to content

test_docxconverter

Test conversion of docx files.

TestDocxConverter

Bases: XMLTester

Test docx conversion.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_docxconverter.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
class TestDocxConverter(XMLTester):
    """Test docx conversion."""

    def test_convert2intermediate(self):
        """Test conversion of a docx file."""
        got = htmlcontentconverter.convert2intermediate(
            os.path.join(HERE, "converter_data/fakecorpus/orig/sme/riddu/doc-test.docx")
        )
        want = (
            "<document>"
            "    <header>"
            "        <title/>"
            "    </header>"
            "    <body>"
            "        <p>–Mun lean njeallje jagi boaris.</p>"
            "        <p>Nu beaivvádat.</p>"
            "        <p>oahppat guovttejuvlla nalde sykkelastit.</p>"
            "        <p>njeallje suorpma boaris.</p>"
            "        <p>Olggobealde Áššu</p>"
            "        <p>Lea go dus meahccebiila ?</p>"
            "        <p>–Mii lea suohttaseamos geassebargu dus ?</p>"
            "        <p>Suohkana bearašásodagaid juohkin</p>"
            "        <p>Sámi kulturfestivála 1998</p>"
            "    </body>"
            "</document>"
        )

        self.assertXmlEqual(got, etree.fromstring(want))

test_convert2intermediate()

Test conversion of a docx file.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_docxconverter.py
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
def test_convert2intermediate(self):
    """Test conversion of a docx file."""
    got = htmlcontentconverter.convert2intermediate(
        os.path.join(HERE, "converter_data/fakecorpus/orig/sme/riddu/doc-test.docx")
    )
    want = (
        "<document>"
        "    <header>"
        "        <title/>"
        "    </header>"
        "    <body>"
        "        <p>–Mun lean njeallje jagi boaris.</p>"
        "        <p>Nu beaivvádat.</p>"
        "        <p>oahppat guovttejuvlla nalde sykkelastit.</p>"
        "        <p>njeallje suorpma boaris.</p>"
        "        <p>Olggobealde Áššu</p>"
        "        <p>Lea go dus meahccebiila ?</p>"
        "        <p>–Mii lea suohttaseamos geassebargu dus ?</p>"
        "        <p>Suohkana bearašásodagaid juohkin</p>"
        "        <p>Sámi kulturfestivála 1998</p>"
        "    </body>"
        "</document>"
    )

    self.assertXmlEqual(got, etree.fromstring(want))