Skip to content

modes

Classes and functions to handle apertium modes.xml files.

Pipeline

Make a pipeline out of modes.xml file.

Attributes:

Name Type Description
modename str

a mode element from a modes.xml file.

giella_prefix str

Set this variable if the installed giella files are not found in the standard places.

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
class Pipeline:
    """Make a pipeline out of modes.xml file.

    Attributes:
        modename (str): a mode element from a modes.xml file.
        giella_prefix (str): Set this variable if the installed giella files
            are not found in the standard places.
    """

    def __init__(self, modename, lang, giella_prefix=None):
        """Initialise the Pipeline class.

        Args:
            modename (str): name of a mode that is expected to be found
                in the modes.xml file.
            giella_prefix (str): directory where the filenames given in the
                modes.xml file exist.
        """
        modefile = etree.parse(os.path.join(os.path.dirname(__file__), "xml/modes.xml"))
        self.mode = modefile.find(f'.//mode[@name="{modename}"]')
        self.giella_prefix = self.valid_path(giella_prefix, lang)
        self.sanity_check()

    @staticmethod
    def valid_path(giella_prefix, lang):
        """Check if resources needed by modes exists.

        Args:
            giella_prefix (str): user provided directory where resources exist.
            lang (str): the language that modes is asked to serve.

        Returns:
            (str): A directory where resources for the given language exist.

        Raises:
            utils.ArgumentError: if no resources are found.
        """
        if giella_prefix is not None:
            return os.path.join(giella_prefix, "share/giella", lang)
        else:
            for prefix in [
                os.path.join(os.getenv("HOME"), ".local"),
                "/usr/local",
                "/usr",
            ]:
                path = os.path.join(prefix, "share/giella", lang)
                if os.path.isdir(path) and os.listdir(path):
                    return path

        raise (util.ArgumentError(f"ERROR: found no resources for {lang}"))

    @staticmethod
    def raise_unless_exists(filenames):
        """Raise an ArgumentError if filename does not exist.

        Args:
            filenames (list of str): list of filenames harvested from the
                mode element.

        Raises:
            util.ArgumentError: if a filename does not exist.
        """
        for filename in filenames:
            if not os.path.exists(filename):
                raise (util.ArgumentError(f"ERROR: {filename} does not exist"))

    def sanity_check(self):
        """Check that programs and files found in a program element exist."""
        util.sanity_check(
            [program.get("name") for program in self.mode.iter("program")]
        )
        self.raise_unless_exists(
            [
                os.path.join(self.giella_prefix, file_elem.get("name"))
                for file_elem in self.mode.iter("file")
            ]
        )

    def run_external_command(self, command, instring):
        """Run the command with input using subprocess.

        Args:
            command (list of str): a subprocess compatible command.
            instring (bytes): the input to the command.

        Returns:
            (bytes): the output of the command
        """
        runner = util.ExternalCommandRunner()
        runner.run(command, to_stdin=instring)
        self.check_error(command, runner.stderr)

        return runner.stdout

    @staticmethod
    def check_error(command, error):
        """Print errors."""
        if error:
            print(
                "{} failed:\n{}".format(" ".join(command), error.decode("utf8")),
                file=sys.stderr,
            )

    def tag2commandpart(self, element):
        """Turn program elements to a command part.

        Args:
            element (lxml.Element): a program subelement

        Returns:
            (str): a program, a program option or a path to a file
        """
        if element.tag == "file":
            return os.path.join(self.giella_prefix, element.get("name"))

        return element.get("name")

    def program2command(self, program):
        """Turn a program element to a subprocess compatible command.

        Args:
            program (str): a program element

        Returns:
            (list[str]): a subprocess compatible command
        """
        return [self.tag2commandpart(element) for element in program.iter()]

    @property
    def commands(self):
        """Make a list of subprocess compatible commands.

        Returns:
            (list[list]): a list of subprocess compatible commands.
        """
        return [self.program2command(program) for program in self.mode.iter("program")]

    def run(self, instring):
        """Run the pipeline using input.

        Args:
            instring (bytes): utf-8 encoded input to the pipeline

        Returns:
            (str): output of the pipeline
        """
        for command in self.commands:
            instring = self.run_external_command(command, instring)

        return instring.decode("utf8")

commands property

Make a list of subprocess compatible commands.

Returns:

Type Description
list[list]

a list of subprocess compatible commands.

__init__(modename, lang, giella_prefix=None)

Initialise the Pipeline class.

Parameters:

Name Type Description Default
modename str

name of a mode that is expected to be found in the modes.xml file.

required
giella_prefix str

directory where the filenames given in the modes.xml file exist.

None
Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
48
49
50
51
52
53
54
55
56
57
58
59
60
def __init__(self, modename, lang, giella_prefix=None):
    """Initialise the Pipeline class.

    Args:
        modename (str): name of a mode that is expected to be found
            in the modes.xml file.
        giella_prefix (str): directory where the filenames given in the
            modes.xml file exist.
    """
    modefile = etree.parse(os.path.join(os.path.dirname(__file__), "xml/modes.xml"))
    self.mode = modefile.find(f'.//mode[@name="{modename}"]')
    self.giella_prefix = self.valid_path(giella_prefix, lang)
    self.sanity_check()

check_error(command, error) staticmethod

Print errors.

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
133
134
135
136
137
138
139
140
@staticmethod
def check_error(command, error):
    """Print errors."""
    if error:
        print(
            "{} failed:\n{}".format(" ".join(command), error.decode("utf8")),
            file=sys.stderr,
        )

program2command(program)

Turn a program element to a subprocess compatible command.

Parameters:

Name Type Description Default
program str

a program element

required

Returns:

Type Description
list[str]

a subprocess compatible command

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
156
157
158
159
160
161
162
163
164
165
def program2command(self, program):
    """Turn a program element to a subprocess compatible command.

    Args:
        program (str): a program element

    Returns:
        (list[str]): a subprocess compatible command
    """
    return [self.tag2commandpart(element) for element in program.iter()]

raise_unless_exists(filenames) staticmethod

Raise an ArgumentError if filename does not exist.

Parameters:

Name Type Description Default
filenames list of str

list of filenames harvested from the mode element.

required

Raises:

Type Description
util.ArgumentError

if a filename does not exist.

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
@staticmethod
def raise_unless_exists(filenames):
    """Raise an ArgumentError if filename does not exist.

    Args:
        filenames (list of str): list of filenames harvested from the
            mode element.

    Raises:
        util.ArgumentError: if a filename does not exist.
    """
    for filename in filenames:
        if not os.path.exists(filename):
            raise (util.ArgumentError(f"ERROR: {filename} does not exist"))

run(instring)

Run the pipeline using input.

Parameters:

Name Type Description Default
instring bytes

utf-8 encoded input to the pipeline

required

Returns:

Type Description
str

output of the pipeline

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
176
177
178
179
180
181
182
183
184
185
186
187
188
def run(self, instring):
    """Run the pipeline using input.

    Args:
        instring (bytes): utf-8 encoded input to the pipeline

    Returns:
        (str): output of the pipeline
    """
    for command in self.commands:
        instring = self.run_external_command(command, instring)

    return instring.decode("utf8")

run_external_command(command, instring)

Run the command with input using subprocess.

Parameters:

Name Type Description Default
command list of str

a subprocess compatible command.

required
instring bytes

the input to the command.

required

Returns:

Type Description
bytes

the output of the command

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
def run_external_command(self, command, instring):
    """Run the command with input using subprocess.

    Args:
        command (list of str): a subprocess compatible command.
        instring (bytes): the input to the command.

    Returns:
        (bytes): the output of the command
    """
    runner = util.ExternalCommandRunner()
    runner.run(command, to_stdin=instring)
    self.check_error(command, runner.stderr)

    return runner.stdout

sanity_check()

Check that programs and files found in a program element exist.

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
105
106
107
108
109
110
111
112
113
114
115
def sanity_check(self):
    """Check that programs and files found in a program element exist."""
    util.sanity_check(
        [program.get("name") for program in self.mode.iter("program")]
    )
    self.raise_unless_exists(
        [
            os.path.join(self.giella_prefix, file_elem.get("name"))
            for file_elem in self.mode.iter("file")
        ]
    )

tag2commandpart(element)

Turn program elements to a command part.

Parameters:

Name Type Description Default
element lxml.Element

a program subelement

required

Returns:

Type Description
str

a program, a program option or a path to a file

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
142
143
144
145
146
147
148
149
150
151
152
153
154
def tag2commandpart(self, element):
    """Turn program elements to a command part.

    Args:
        element (lxml.Element): a program subelement

    Returns:
        (str): a program, a program option or a path to a file
    """
    if element.tag == "file":
        return os.path.join(self.giella_prefix, element.get("name"))

    return element.get("name")

valid_path(giella_prefix, lang) staticmethod

Check if resources needed by modes exists.

Parameters:

Name Type Description Default
giella_prefix str

user provided directory where resources exist.

required
lang str

the language that modes is asked to serve.

required

Returns:

Type Description
str

A directory where resources for the given language exist.

Raises:

Type Description
utils.ArgumentError

if no resources are found.

Source code in /home/anders/projects/CorpusTools/corpustools/modes.py
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
@staticmethod
def valid_path(giella_prefix, lang):
    """Check if resources needed by modes exists.

    Args:
        giella_prefix (str): user provided directory where resources exist.
        lang (str): the language that modes is asked to serve.

    Returns:
        (str): A directory where resources for the given language exist.

    Raises:
        utils.ArgumentError: if no resources are found.
    """
    if giella_prefix is not None:
        return os.path.join(giella_prefix, "share/giella", lang)
    else:
        for prefix in [
            os.path.join(os.getenv("HOME"), ".local"),
            "/usr/local",
            "/usr",
        ]:
            path = os.path.join(prefix, "share/giella", lang)
            if os.path.isdir(path) and os.listdir(path):
                return path

    raise (util.ArgumentError(f"ERROR: found no resources for {lang}"))