Skip to content

test_pick_parallel_docs

Test the ParallelPicker class.

TestParallelPicker

Bases: unittest.TestCase

Test the ParallelPicker class.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
class TestParallelPicker(unittest.TestCase):
    """Test the ParallelPicker class."""

    def make_tempdir(self):
        """Make tempdir where ParallelPicker will do its magic."""
        tempdir = testfixtures.TempDirectory(ignore=[".git"])
        tempdir.makedir("converted/sme/admin")
        tempdir.makedir("converted/nob/admin")
        tempdir.makedir("converted/smj/admin")
        tempdir.write(
            "converted/sme/admin/article-47.html.xml", ARTICLE47_SME.encode("utf8")
        )
        tempdir.write(
            "converted/nob/admin/article-47.html.xml", ARTICLE47_NOB.encode("utf8")
        )
        tempdir.write(
            "converted/smj/admin/article-47.html.xml", ARTICLE47_SMJ.encode("utf8")
        )

        return tempdir

    def setUp(self):
        """Make the ParallelPicker work on tempdir."""
        self.tempdir = self.make_tempdir()
        git.Repo.init(self.tempdir.path)
        self.language1_converted_dir = os.path.join(
            self.tempdir.path, "converted/sme/admin"
        )
        self.picker = pick_parallel_docs.ParallelPicker(
            self.language1_converted_dir, "nob", "73", "110"
        )

    def tearDown(self):
        """Cleanup after tests are done."""
        self.tempdir.cleanup()

    def test_calculate_language1(self):
        """Check that the correct language is found."""
        self.picker.calculate_language1("converted/sme/admin")
        self.assertEqual(self.picker.language1, "sme")

    def test_get_parallel_language(self):
        """Check that the correct parallel language is set."""
        self.assertEqual(self.picker.parallel_language, "nob")

    def test_has_parallel1(self):
        """Parallel exists, parallel_text points to correct place."""
        file_with_parallel1 = corpusxmlfile.CorpusXMLFile(
            os.path.join(self.language1_converted_dir, "article-47.html.xml")
        )
        self.assertTrue(self.picker.has_parallel(file_with_parallel1))

    def test_has_parallel2(self):
        """parallel_text points to wrong language."""
        file_with_parallel1 = corpusxmlfile.CorpusXMLFile(
            os.path.join(self.language1_converted_dir, "article-47.html.xml")
        )
        file_with_parallel1.etree.find("//parallel_text").set(
            "{http://www.w3.org/XML/1998/namespace}lang", "sma"
        )
        self.assertFalse(self.picker.has_parallel(file_with_parallel1))

    def test_has_parallel3(self):
        """parallel_text points to wrong file."""
        file_with_parallel1 = corpusxmlfile.CorpusXMLFile(
            os.path.join(self.language1_converted_dir, "article-47.html.xml")
        )
        file_with_parallel1.etree.find("//parallel_text").set(
            "location", "article-48.html"
        )
        self.assertFalse(self.picker.has_parallel(file_with_parallel1))

    def test_find_lang1_files(self):
        """Check that lang1 files are found."""
        self.assertListEqual(
            glob.glob(self.language1_converted_dir + "/*.xml"),
            [corpus_file.name for corpus_file in self.picker.find_lang1_files()],
        )

    def test_copy_valid_parallels(self):
        """Only copy in the nob-sme pair, and align them."""
        self.picker.copy_valid_parallels()
        self.tempdir.check_all(
            "prestable",
            "converted/",
            "converted/nob/",
            "converted/nob/admin/",
            "converted/nob/admin/article-47.html.xml",
            "converted/sme/",
            "converted/sme/admin/",
            "converted/sme/admin/article-47.html.xml",
            "tmx/",
            "tmx/nob2sme/",
            "tmx/nob2sme/admin/",
            "tmx/nob2sme/admin/article-47.html.tmx",
            "tmx/nob2sme/admin/article-47.html.tmx.html",
        )

make_tempdir()

Make tempdir where ParallelPicker will do its magic.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def make_tempdir(self):
    """Make tempdir where ParallelPicker will do its magic."""
    tempdir = testfixtures.TempDirectory(ignore=[".git"])
    tempdir.makedir("converted/sme/admin")
    tempdir.makedir("converted/nob/admin")
    tempdir.makedir("converted/smj/admin")
    tempdir.write(
        "converted/sme/admin/article-47.html.xml", ARTICLE47_SME.encode("utf8")
    )
    tempdir.write(
        "converted/nob/admin/article-47.html.xml", ARTICLE47_NOB.encode("utf8")
    )
    tempdir.write(
        "converted/smj/admin/article-47.html.xml", ARTICLE47_SMJ.encode("utf8")
    )

    return tempdir

setUp()

Make the ParallelPicker work on tempdir.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
 95
 96
 97
 98
 99
100
101
102
103
104
def setUp(self):
    """Make the ParallelPicker work on tempdir."""
    self.tempdir = self.make_tempdir()
    git.Repo.init(self.tempdir.path)
    self.language1_converted_dir = os.path.join(
        self.tempdir.path, "converted/sme/admin"
    )
    self.picker = pick_parallel_docs.ParallelPicker(
        self.language1_converted_dir, "nob", "73", "110"
    )

tearDown()

Cleanup after tests are done.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
106
107
108
def tearDown(self):
    """Cleanup after tests are done."""
    self.tempdir.cleanup()

test_calculate_language1()

Check that the correct language is found.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
110
111
112
113
def test_calculate_language1(self):
    """Check that the correct language is found."""
    self.picker.calculate_language1("converted/sme/admin")
    self.assertEqual(self.picker.language1, "sme")

test_copy_valid_parallels()

Only copy in the nob-sme pair, and align them.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
def test_copy_valid_parallels(self):
    """Only copy in the nob-sme pair, and align them."""
    self.picker.copy_valid_parallels()
    self.tempdir.check_all(
        "prestable",
        "converted/",
        "converted/nob/",
        "converted/nob/admin/",
        "converted/nob/admin/article-47.html.xml",
        "converted/sme/",
        "converted/sme/admin/",
        "converted/sme/admin/article-47.html.xml",
        "tmx/",
        "tmx/nob2sme/",
        "tmx/nob2sme/admin/",
        "tmx/nob2sme/admin/article-47.html.tmx",
        "tmx/nob2sme/admin/article-47.html.tmx.html",
    )

test_find_lang1_files()

Check that lang1 files are found.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
146
147
148
149
150
151
def test_find_lang1_files(self):
    """Check that lang1 files are found."""
    self.assertListEqual(
        glob.glob(self.language1_converted_dir + "/*.xml"),
        [corpus_file.name for corpus_file in self.picker.find_lang1_files()],
    )

test_get_parallel_language()

Check that the correct parallel language is set.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
115
116
117
def test_get_parallel_language(self):
    """Check that the correct parallel language is set."""
    self.assertEqual(self.picker.parallel_language, "nob")

test_has_parallel1()

Parallel exists, parallel_text points to correct place.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
119
120
121
122
123
124
def test_has_parallel1(self):
    """Parallel exists, parallel_text points to correct place."""
    file_with_parallel1 = corpusxmlfile.CorpusXMLFile(
        os.path.join(self.language1_converted_dir, "article-47.html.xml")
    )
    self.assertTrue(self.picker.has_parallel(file_with_parallel1))

test_has_parallel2()

parallel_text points to wrong language.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
126
127
128
129
130
131
132
133
134
def test_has_parallel2(self):
    """parallel_text points to wrong language."""
    file_with_parallel1 = corpusxmlfile.CorpusXMLFile(
        os.path.join(self.language1_converted_dir, "article-47.html.xml")
    )
    file_with_parallel1.etree.find("//parallel_text").set(
        "{http://www.w3.org/XML/1998/namespace}lang", "sma"
    )
    self.assertFalse(self.picker.has_parallel(file_with_parallel1))

test_has_parallel3()

parallel_text points to wrong file.

Source code in /home/anders/projects/CorpusTools/corpustools/test/test_pick_parallel_docs.py
136
137
138
139
140
141
142
143
144
def test_has_parallel3(self):
    """parallel_text points to wrong file."""
    file_with_parallel1 = corpusxmlfile.CorpusXMLFile(
        os.path.join(self.language1_converted_dir, "article-47.html.xml")
    )
    file_with_parallel1.etree.find("//parallel_text").set(
        "location", "article-48.html"
    )
    self.assertFalse(self.picker.has_parallel(file_with_parallel1))