Skip to content

test_sentencedivider

Test sentence division functionality.

TestSentenceDivider

Bases: unittest.TestCase

Test the SentenceDivider class.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_sentencedivider.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
class TestSentenceDivider(unittest.TestCase):
    """Test the SentenceDivider class."""

    def test_ccat_input(self):
        """Test the sentence divider."""
        ccat_output = """10. ON-vuogádat ¶
ON doaimmaid oktavuođas; ovddasvástádus sihkkarastit? buot ON orgánat!
..... ¶
wow." ¶
mom.). ¶
mom.: ¶
kult.” ¶
váldočoahkkima nammadit. dievaslaš čađaheami, [2019 – 2020] … ¶
(rávvagiid) ¶
"""
        want = [
            "10. ON-vuogádat",
            "ON doaimmaid oktavuođas;",
            "ovddasvástádus sihkkarastit?",
            "buot ON orgánat!",
            ".....",
            "wow.",
            '"',
            "mom.).",
            "mom.:",
            "kult.",
            "”",
            "váldočoahkkima nammadit.",
            "dievaslaš čađaheami, [2019 – 2020] …",
            "(rávvagiid)",
        ]
        divider = sentencedivider.SentenceDivider("sme")
        self.assertListEqual(divider.make_valid_sentences(ccat_output), want)

    def test_with_dot_and_paragraph(self):
        """Test the sentence divider with a sentence ending with . ¶."""
        ccat_output = """mielddisbuvttii. ¶
Odd Einar Dørum ¶
"""
        want = [
            "mielddisbuvttii.",
            "Odd Einar Dørum",
        ]
        divider = sentencedivider.SentenceDivider("sme")
        self.assertEqual(divider.make_valid_sentences(ccat_output), want)

    def test_with_empty_head_sentence(self):
        """Test the sentence divider with an empty first sentence."""
        ccat_output = """. ¶
Odd Einar Dørum ¶
"""
        want = [
            ".",
            "Odd Einar Dørum",
        ]
        divider = sentencedivider.SentenceDivider("sme")
        self.assertEqual(divider.make_valid_sentences(ccat_output), want)

test_ccat_input()

Test the sentence divider.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_sentencedivider.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
    def test_ccat_input(self):
        """Test the sentence divider."""
        ccat_output = """10. ON-vuogádat ¶
ON doaimmaid oktavuođas; ovddasvástádus sihkkarastit? buot ON orgánat!
..... ¶
wow." ¶
mom.). ¶
mom.: ¶
kult.” ¶
váldočoahkkima nammadit. dievaslaš čađaheami, [2019 – 2020] … ¶
(rávvagiid) ¶
"""
        want = [
            "10. ON-vuogádat",
            "ON doaimmaid oktavuođas;",
            "ovddasvástádus sihkkarastit?",
            "buot ON orgánat!",
            ".....",
            "wow.",
            '"',
            "mom.).",
            "mom.:",
            "kult.",
            "”",
            "váldočoahkkima nammadit.",
            "dievaslaš čađaheami, [2019 – 2020] …",
            "(rávvagiid)",
        ]
        divider = sentencedivider.SentenceDivider("sme")
        self.assertListEqual(divider.make_valid_sentences(ccat_output), want)

test_with_dot_and_paragraph()

Test the sentence divider with a sentence ending with . ¶.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_sentencedivider.py
60
61
62
63
64
65
66
67
68
69
70
    def test_with_dot_and_paragraph(self):
        """Test the sentence divider with a sentence ending with . ¶."""
        ccat_output = """mielddisbuvttii. ¶
Odd Einar Dørum ¶
"""
        want = [
            "mielddisbuvttii.",
            "Odd Einar Dørum",
        ]
        divider = sentencedivider.SentenceDivider("sme")
        self.assertEqual(divider.make_valid_sentences(ccat_output), want)

test_with_empty_head_sentence()

Test the sentence divider with an empty first sentence.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_sentencedivider.py
72
73
74
75
76
77
78
79
80
81
82
    def test_with_empty_head_sentence(self):
        """Test the sentence divider with an empty first sentence."""
        ccat_output = """. ¶
Odd Einar Dørum ¶
"""
        want = [
            ".",
            "Odd Einar Dørum",
        ]
        divider = sentencedivider.SentenceDivider("sme")
        self.assertEqual(divider.make_valid_sentences(ccat_output), want)