Skip to content

biblexmlconverter

Convert bible xml files to the Giella xml format.

convert2intermediate(filename)

Convert the bible xml to intermediate Giella xml format.

Source code in /home/anders/projects/CorpusTools/corpustools/biblexmlconverter.py
194
195
196
197
198
199
200
def convert2intermediate(filename):
    """Convert the bible xml to intermediate Giella xml format."""

    document = etree.Element("document")
    document.append(process_bible(etree.parse(filename)))

    return document

make_p(verses)

Convert verse strings to p element.

Parameters:

Name Type Description Default
verses list[str]

a list of strings

required

Returns:

Type Description
lxml.etree.Element

a Giella xml p element

Source code in /home/anders/projects/CorpusTools/corpustools/biblexmlconverter.py
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def make_p(verses):
    """Convert verse strings to p element.

    Args:
        verses (list[str]): a list of strings

    Returns:
        (lxml.etree.Element): a Giella xml p element
    """
    paragraph = etree.Element("p")
    paragraph.text = "\n".join(verses)

    return paragraph

process_bible(bible_doc)

Convert a bible xml document to a Giella xml document.

Parameters:

Name Type Description Default
bible_doc etree.Element

the bible xml tree

required

Returns:

Type Description
lxml.etree.Element

a Giella xml body element.

Source code in /home/anders/projects/CorpusTools/corpustools/biblexmlconverter.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
def process_bible(bible_doc):
    """Convert a bible xml document to a Giella xml document.

    Args:
        bible_doc (etree.Element): the bible xml tree

    Returns:
        (lxml.etree.Element): a Giella xml body element.
    """
    body = etree.Element("body")

    for book in bible_doc.xpath(".//book"):
        body.append(process_book(book))

    return body

process_book(book_element)

Convert a bible xml book to a Giella xml section one.

Parameters:

Name Type Description Default
book_element lxml.etree.Element

a bible xml book element

required

Returns:

Type Description
lxml.etree.Element

a Giella xml section element.

Source code in /home/anders/projects/CorpusTools/corpustools/biblexmlconverter.py
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
def process_book(book_element):
    """Convert a bible xml book to a Giella xml section one.

    Args:
        book_element (lxml.etree.Element): a bible xml book element

    Returns:
        (lxml.etree.Element): a Giella xml section element.
    """
    section = etree.Element("section")

    title = etree.Element("p")
    title.set("type", "title")
    title.text = book_element.get("title")

    section.append(title)

    for chapter_element in book_element:
        if chapter_element.tag != "chapter":
            raise UserWarning(
                "{}: Unexpected element in book: {}".format(chapter_element.tag)
            )

        section.append(process_chapter(chapter_element))

    return section

process_chapter(chapter_element)

Convert a bible xml chapter to a Giella xml section one.

Parameters:

Name Type Description Default
chapter_element lxml.etree.Element

a bible xml chapter element

required

Returns:

Type Description
lxml.etree.Element

a Giella xml section element.

Source code in /home/anders/projects/CorpusTools/corpustools/biblexmlconverter.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
def process_chapter(chapter_element):
    """Convert a bible xml chapter to a Giella xml section one.

    Args:
        chapter_element (lxml.etree.Element): a bible xml chapter element

    Returns:
        (lxml.etree.Element): a Giella xml section element.
    """
    section = etree.Element("section")

    text_parts = []
    if chapter_element.get("number") is not None:
        text_parts.append(chapter_element.get("number"))
    if chapter_element.get("title") is not None:
        text_parts.append(chapter_element.get("title"))

    title = etree.Element("p")
    title.set("type", "title")
    title.text = " ".join(text_parts)

    section.append(title)

    for child in chapter_element:
        if child.tag == "section":
            section.append(process_section(child))
        elif child.tag == "verse":
            paragraph = etree.Element("p")
            paragraph.text = child.text
            section.append(paragraph)
        else:
            raise UserWarning(f"Unexpected element in chapter: {child.tag}")

    return section

process_p(paragraph)

Convert bible xml verse elements to p elements.

Parameters:

Name Type Description Default
paragraph lxml.etree.Element

is a bible xml p element.

required

Returns:

Type Description
lxml.etree.Element

a Giella xml p element

Source code in /home/anders/projects/CorpusTools/corpustools/biblexmlconverter.py
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def process_p(paragraph):
    """Convert bible xml verse elements to p elements.

    Args:
        paragraph (lxml.etree.Element): is a bible xml p element.

    Returns:
        (lxml.etree.Element): a Giella xml p element
    """
    verses = []
    for child in paragraph:
        text = process_verse(child)
        if text:
            verses.append(text)

    paragraph = etree.Element("p")
    paragraph.text = "\n".join(verses)

    return paragraph

process_section(section_element)

Process the section element found in the bible xml documents.

Parameters:

Name Type Description Default
section_element lxml.etree.Element

an etree element containing the section element found in a bible xml document.

required

Returns:

Name Type Description
section lxml.etree.Element

an etree element containing a corpus xml section.

Source code in /home/anders/projects/CorpusTools/corpustools/biblexmlconverter.py
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
def process_section(section_element):
    """Process the section element found in the bible xml documents.

    Args:
        section_element (lxml.etree.Element): an etree element containing the
             section element found in a bible xml document.

    Returns:
        section (lxml.etree.Element): an etree element containing a
            corpus xml section.
    """
    section = etree.Element("section")

    title = etree.Element("p")
    title.set("type", "title")
    title.text = section_element.get("title")

    section.append(title)

    verses = []
    for element in section_element:
        if element.tag == "p":
            if verses:
                section.append(make_p(verses))
                verses = []
            section.append(process_p(element))
        elif element.tag == "verse":
            text = process_verse(element)
            if text:
                verses.append(text)
        else:
            raise UserWarning(f"Unexpected element in section: {element.tag}")

    section.append(make_p(verses))

    return section

process_verse(verse_element)

Process the verse element found in bible xml documents.

Parameters:

Name Type Description Default
verse_element lxml.etree.Element

an etree element containing the verse element found in a bible xml document.

required

Returns:

Type Description
str

A string containing the text of the verse element.

Source code in /home/anders/projects/CorpusTools/corpustools/biblexmlconverter.py
23
24
25
26
27
28
29
30
31
32
33
34
35
36
def process_verse(verse_element):
    """Process the verse element found in bible xml documents.

    Args:
        verse_element (lxml.etree.Element): an etree element containing
            the verse element found in a bible xml document.

    Returns:
        (str): A string containing the text of the verse element.
    """
    if verse_element.tag != "verse":
        raise UserWarning(f"Unexpected element in verse: {verse_element.tag}")

    return verse_element.text