Skip to content

test_analyser

TestAnalyser

Bases: unittest.TestCase

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_analyser.py
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
class TestAnalyser(unittest.TestCase):
    def setUp(self):
        self.a = analyser.Analyser(
            "sme", "xfst", giella_prefix=os.path.join(HERE, "giella_shared")
        )
        self.a.xml_file = corpusxmlfile.CorpusXMLFile(
            os.path.join(
                HERE,
                "parallelize_data/converted/sme/facta/skuvlahistorja2/",
                "smefile.xml",
            )
        )

    def assertXmlEqual(self, got, want):
        """Check if two stringified xml snippets are equal."""
        checker = doctestcompare.LXMLOutputChecker()
        if not checker.check_output(want, got, 0):
            message = checker.output_difference(
                doctest.Example("", want), got, 0
            ).encode("utf-8")
            raise AssertionError(message)

    def test_raise_on_None_file(self):
        with self.assertRaises(TypeError):
            analyser.Analyser("sme", "xfst", None, None, None, None)

    def test_sme_ccat_output(self):
        """Test if the ccat output is what we expect it to be."""
        got = self.a.ccat()
        want = (
            "Muhto gaskkohagaid, ja erenoamážit dalle go lei buolaš, "
            "de aggregáhta billánii. ¶\n"
        )

        self.assertEqual(got, want)

    def test_analysisXml(self):
        """Check if the xml is what it is supposed to be."""
        self.a.dependency_analysis()
        got = self.a.xml_file.etree
        want = (
            '<document xml:lang="sme" id="no_id">\n'
            "  <header>\n"
            "    <title>Internáhtta sosiálalaš giliguovddážin</title>\n"
            '    <genre code="facta"/>\n'
            "    <author>\n"
            '      <person firstname="Abba" lastname="Abbamar" sex="m" '
            'born="1900" nationality="nor"/>\n'
            "    </author>\n"
            "    <translator>\n"
            '      <person firstname="Ibba" lastname="Ibbamar" sex="unknown" '
            'born="" nationality=""/>\n'
            "    </translator>\n"
            '    <translated_from xml:lang="nob"/>\n'
            "    <year>2005</year>\n"
            "    <publChannel>\n"
            "      <publication>\n"
            "        <publisher>Almmuheaddji OS</publisher>\n"
            "      </publication>\n"
            "    </publChannel>\n"
            "    <wordcount>10</wordcount>\n"
            "    <availability>\n"
            "      <free/>\n"
            "    </availability>\n"
            '    <submitter name="Børre Gaup" '
            'email="boerre.gaup@samediggi.no"/>\n'
            "    <multilingual>\n"
            '      <language xml:lang="nob"/>\n'
            "    </multilingual>\n"
            "    <origFileName>aarseth_s.htm</origFileName>\n"
            "    <metadata>\n"
            "      <uncomplete/>\n"
            "    </metadata>\n"
            "    <version>XSLtemplate  1.9 ; file-specific xsl  "
            "$Revision: 1.3 $; common.xsl  $Revision$; </version>\n"
            "  </header>\n"
            '  <body><dependency><![CDATA["<Muhto>"\n'
            '\t"muhto" CC @CVP #1->1\n"<gaskkohagaid>"\n'
            '\t"gaskkohagaid" Adv @ADVL> #2->12\n"<,>"\n'
            '\t"," CLB #3->4\n"<ja>"\n\t"ja" CC @CNP #4->2\n"<erenoamážit>"\n'
            '\t"erenoamážit" Adv @ADVL> #5->12\n"<dalle_go>"\n'
            '\t"dalle_go" CS @CVP #6->7\n"<lei>"\n'
            '\t"leat" V IV Ind Prt Sg3 @FS-ADVL> #7->12\n"<buolaš>"\n'
            '\t"buolaš" N Sg Nom @<SPRED #8->7\n"<,>"\n'
            '\t"," CLB #9->6\n"<de>"\n'
            '\t"de" Adv @ADVL> #10->12\n"<aggregáhta>"\n'
            '\t"aggregáhta" N Sg Nom @SUBJ> #11->12\n"<billánii>"\n'
            '\t"billánit" V IV Ind Prt Sg3 @FS-ADVL> #12->0\n"<.>"\n'
            '\t"." CLB #13->12\n\n"<¶>"\n'
            '\t"¶" CLB #1->1\n\n]]></dependency></body></document>'
        )
        self.maxDiff = None
        self.assertEqual(etree.tostring(got, encoding="unicode"), want)

assertXmlEqual(got, want)

Check if two stringified xml snippets are equal.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_analyser.py
47
48
49
50
51
52
53
54
def assertXmlEqual(self, got, want):
    """Check if two stringified xml snippets are equal."""
    checker = doctestcompare.LXMLOutputChecker()
    if not checker.check_output(want, got, 0):
        message = checker.output_difference(
            doctest.Example("", want), got, 0
        ).encode("utf-8")
        raise AssertionError(message)

test_analysisXml()

Check if the xml is what it is supposed to be.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_analyser.py
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def test_analysisXml(self):
    """Check if the xml is what it is supposed to be."""
    self.a.dependency_analysis()
    got = self.a.xml_file.etree
    want = (
        '<document xml:lang="sme" id="no_id">\n'
        "  <header>\n"
        "    <title>Internáhtta sosiálalaš giliguovddážin</title>\n"
        '    <genre code="facta"/>\n'
        "    <author>\n"
        '      <person firstname="Abba" lastname="Abbamar" sex="m" '
        'born="1900" nationality="nor"/>\n'
        "    </author>\n"
        "    <translator>\n"
        '      <person firstname="Ibba" lastname="Ibbamar" sex="unknown" '
        'born="" nationality=""/>\n'
        "    </translator>\n"
        '    <translated_from xml:lang="nob"/>\n'
        "    <year>2005</year>\n"
        "    <publChannel>\n"
        "      <publication>\n"
        "        <publisher>Almmuheaddji OS</publisher>\n"
        "      </publication>\n"
        "    </publChannel>\n"
        "    <wordcount>10</wordcount>\n"
        "    <availability>\n"
        "      <free/>\n"
        "    </availability>\n"
        '    <submitter name="Børre Gaup" '
        'email="boerre.gaup@samediggi.no"/>\n'
        "    <multilingual>\n"
        '      <language xml:lang="nob"/>\n'
        "    </multilingual>\n"
        "    <origFileName>aarseth_s.htm</origFileName>\n"
        "    <metadata>\n"
        "      <uncomplete/>\n"
        "    </metadata>\n"
        "    <version>XSLtemplate  1.9 ; file-specific xsl  "
        "$Revision: 1.3 $; common.xsl  $Revision$; </version>\n"
        "  </header>\n"
        '  <body><dependency><![CDATA["<Muhto>"\n'
        '\t"muhto" CC @CVP #1->1\n"<gaskkohagaid>"\n'
        '\t"gaskkohagaid" Adv @ADVL> #2->12\n"<,>"\n'
        '\t"," CLB #3->4\n"<ja>"\n\t"ja" CC @CNP #4->2\n"<erenoamážit>"\n'
        '\t"erenoamážit" Adv @ADVL> #5->12\n"<dalle_go>"\n'
        '\t"dalle_go" CS @CVP #6->7\n"<lei>"\n'
        '\t"leat" V IV Ind Prt Sg3 @FS-ADVL> #7->12\n"<buolaš>"\n'
        '\t"buolaš" N Sg Nom @<SPRED #8->7\n"<,>"\n'
        '\t"," CLB #9->6\n"<de>"\n'
        '\t"de" Adv @ADVL> #10->12\n"<aggregáhta>"\n'
        '\t"aggregáhta" N Sg Nom @SUBJ> #11->12\n"<billánii>"\n'
        '\t"billánit" V IV Ind Prt Sg3 @FS-ADVL> #12->0\n"<.>"\n'
        '\t"." CLB #13->12\n\n"<¶>"\n'
        '\t"¶" CLB #1->1\n\n]]></dependency></body></document>'
    )
    self.maxDiff = None
    self.assertEqual(etree.tostring(got, encoding="unicode"), want)

test_sme_ccat_output()

Test if the ccat output is what we expect it to be.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_analyser.py
60
61
62
63
64
65
66
67
68
def test_sme_ccat_output(self):
    """Test if the ccat output is what we expect it to be."""
    got = self.a.ccat()
    want = (
        "Muhto gaskkohagaid, ja erenoamážit dalle go lei buolaš, "
        "de aggregáhta billánii. ¶\n"
    )

    self.assertEqual(got, want)