Skip to content

test_trainingcorpusmaker

Test sentence division functionality.

TestTrainingCorpusMaker

Bases: unittest.TestCase

Test the TrainingCorpusMaker class.

Attributes:

Name Type Description
sentencemaker corpustools.TrainingCorpusMaker

A TrainingCorpusMaker

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_trainingcorpusmaker.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
class TestTrainingCorpusMaker(unittest.TestCase):
    """Test the TrainingCorpusMaker class.

    Attributes:
        sentencemaker (corpustools.TrainingCorpusMaker): A TrainingCorpusMaker
    """

    def setUp(self):
        """Set up the TrainingCorpusMaker."""
        self.sentencemaker = trainingcorpusmaker.TrainingCorpusMaker("sme")

    def test_no_unknown(self):
        """Test with only known input."""
        test_input = "\n".join(
            [
                '"<Oahpa>"',
                '\t"Oahpa" N Prop Sem/Obj Sg Nom <W:0.0000000000> @HNOUN #1->0',
                '"<:>"',
                '\t":" CLB <W:0.0000000000> #2->1',
                ": ",
                '"<Soahki>"',
                '\t"soahki" N Sem/Plant Sg Nom <W:0.0000000000> @HNOUN #3->1',
                ": ",
                '"<¶>"',
                '\t"¶" CLB <W:0.0000000000> #4->1',
                ":\n",
                '"<addit>"',
                '\t"addit" V TV Inf <W:0.0000000000> @FS-N<IMV #13->11',
                ": ",
                '"<boahtte>"',
                '\t"boahtte" A Sem/Dummytag Attr <W:0.0000000000> @>N #14->15',
                ": ",
                '"<bulvii>"',
                '\t"buolva" N Sem/Body_Group_Hum_Time Sg Ill <W:0.0000000000> @<ADVL #15->13',
                '"<.>"',
                '\t"." CLB <W:0.0000000000> #16->4',
                ": ",
                "",
                '"<¶>"',
                '\t"¶" CLB <W:0.0000000000> #1->1',
                ":\n",
                "",
            ]
        )

        want = "Oahpa: Soahki\naddit boahtte bulvii."
        got = "\n".join(
            [
                sentence
                for sentence in self.sentencemaker.parse_dependency(test_input)
                if sentence
            ]
        )
        self.assertEqual(got, want)

    def test_with_comma(self):
        """Check that comma is handled correctly."""
        test_input = "\n".join(
            [
                '"<áhkuin>"',
                '\t"áhkku" N Sem/Hum Sg Com <W:0.0000000000> @<ADVL #6->1',
                '"<,>"',
                '\t"," CLB <W:0.0000000000> #7->6',
                ": ",
                '"<ádjáin>"',
                '\t"áddjá" N Sem/Hum Sg Com <W:0.0000000000> @<ADVL #8->1',
                ": ",
                '"<dahje>"',
                '\t"dahje" CC <W:0.0000000000> @CNP #9->8',
                ": ",
                '"<earáin>"',
                '\t"eará" Pron Indef Sg Com <W:0.0000000000> @<ADVL #10->1',
                '"<!>"',
                '\t"!" CLB <W:0.0000000000> #13->1',
                ": ",
                "",
                '"<¶>"',
                '\t"¶" CLB <W:0.0000000000> #1->1',
                ":\n",
            ]
        )

        want = "áhkuin, ádjáin dahje earáin!"
        got = "\n".join(
            [
                sentence
                for sentence in self.sentencemaker.parse_dependency(test_input)
                if sentence
            ]
        )
        self.assertEqual(got, want)

setUp()

Set up the TrainingCorpusMaker.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_trainingcorpusmaker.py
33
34
35
def setUp(self):
    """Set up the TrainingCorpusMaker."""
    self.sentencemaker = trainingcorpusmaker.TrainingCorpusMaker("sme")

test_no_unknown()

Test with only known input.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_trainingcorpusmaker.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
def test_no_unknown(self):
    """Test with only known input."""
    test_input = "\n".join(
        [
            '"<Oahpa>"',
            '\t"Oahpa" N Prop Sem/Obj Sg Nom <W:0.0000000000> @HNOUN #1->0',
            '"<:>"',
            '\t":" CLB <W:0.0000000000> #2->1',
            ": ",
            '"<Soahki>"',
            '\t"soahki" N Sem/Plant Sg Nom <W:0.0000000000> @HNOUN #3->1',
            ": ",
            '"<¶>"',
            '\t"¶" CLB <W:0.0000000000> #4->1',
            ":\n",
            '"<addit>"',
            '\t"addit" V TV Inf <W:0.0000000000> @FS-N<IMV #13->11',
            ": ",
            '"<boahtte>"',
            '\t"boahtte" A Sem/Dummytag Attr <W:0.0000000000> @>N #14->15',
            ": ",
            '"<bulvii>"',
            '\t"buolva" N Sem/Body_Group_Hum_Time Sg Ill <W:0.0000000000> @<ADVL #15->13',
            '"<.>"',
            '\t"." CLB <W:0.0000000000> #16->4',
            ": ",
            "",
            '"<¶>"',
            '\t"¶" CLB <W:0.0000000000> #1->1',
            ":\n",
            "",
        ]
    )

    want = "Oahpa: Soahki\naddit boahtte bulvii."
    got = "\n".join(
        [
            sentence
            for sentence in self.sentencemaker.parse_dependency(test_input)
            if sentence
        ]
    )
    self.assertEqual(got, want)

test_with_comma()

Check that comma is handled correctly.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_trainingcorpusmaker.py
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
def test_with_comma(self):
    """Check that comma is handled correctly."""
    test_input = "\n".join(
        [
            '"<áhkuin>"',
            '\t"áhkku" N Sem/Hum Sg Com <W:0.0000000000> @<ADVL #6->1',
            '"<,>"',
            '\t"," CLB <W:0.0000000000> #7->6',
            ": ",
            '"<ádjáin>"',
            '\t"áddjá" N Sem/Hum Sg Com <W:0.0000000000> @<ADVL #8->1',
            ": ",
            '"<dahje>"',
            '\t"dahje" CC <W:0.0000000000> @CNP #9->8',
            ": ",
            '"<earáin>"',
            '\t"eará" Pron Indef Sg Com <W:0.0000000000> @<ADVL #10->1',
            '"<!>"',
            '\t"!" CLB <W:0.0000000000> #13->1',
            ": ",
            "",
            '"<¶>"',
            '\t"¶" CLB <W:0.0000000000> #1->1',
            ":\n",
        ]
    )

    want = "áhkuin, ádjáin dahje earáin!"
    got = "\n".join(
        [
            sentence
            for sentence in self.sentencemaker.parse_dependency(test_input)
            if sentence
        ]
    )
    self.assertEqual(got, want)