Skip to content

convert_using_soffice

Convert writenow files to the html format.

to_html_elt(filename)

Convert the content of an writenow file to xhtml.

Parameters:

Name Type Description Default
filename str

path to the document

required

Returns:

Type Description
str

A string containing the html version of the writenow file.

Source code in /home/anders/projects/CorpusTools/corpustools/convert_using_soffice.py
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
def to_html_elt(filename):
    """Convert the content of an writenow file to xhtml.

    Args:
        filename (str): path to the document

    Returns:
        (str): A string containing the html version of the writenow file.
    """
    outdir = os.path.dirname(filename)
    subprocess.run(
        [
            "soffice",
            "--convert-to",
            "html",
            "--outdir",
            outdir,
            filename,
        ],
        encoding="utf-8",
    )

    outname = f"{os.path.splitext(filename)[0]}.html"
    parsed_html = html.parse(outname)
    os.remove(outname)

    return parsed_html