Skip to content

clean_prestable

Classes and functions to clean the prestable directories.

find_prestable_files(corpusdir)

Find interesting files in prestable.

Parameters:

Name Type Description Default
corpusdir src

path to a corpus directory

required

Yields:

Type Description
str

path to an interesting prestable file

Source code in /home/anders/projects/CorpusTools/corpustools/clean_prestable.py
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
def find_prestable_files(corpusdir):
    """Find interesting files in prestable.

    Args:
        corpusdir (src): path to a corpus directory

    Yields:
        (str): path to an interesting prestable file
    """
    for subdir in ["converted", "tmx"]:
        prestable_root = os.path.join(corpusdir, "prestable", subdir)
        if os.path.exists(prestable_root):
            for root, _, files in os.walk(prestable_root):
                if "pre_run" not in root:
                    for presteable_file in files:
                        yield os.path.join(root, presteable_file)

main()

Remove files in prestable that don't have original files.

Source code in /home/anders/projects/CorpusTools/corpustools/clean_prestable.py
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def main():
    """Remove files in prestable that don't have original files."""
    args = parse_options()

    counter = defaultdict(int)
    for corpusdir in args.corpusdirs:
        vcsfactory = versioncontrol.VersionControlFactory()
        vcs = vcsfactory.vcs(corpusdir)
        for prestable_path in find_prestable_files(corpusdir):
            corpus_file = corpuspath.make_corpus_path(prestable_path)
            if not os.path.exists(corpus_file.orig):
                counter["prestable"] += 1
                print(f"Removing {prestable_path}")
                print(f"Orig was {corpus_file.orig}")
                try:
                    vcs.remove(prestable_path)
                except git.exc.GitCommandError:
                    util.note(
                        "\nError when trying to remove {}".format(
                            corpus_file.prestable_converted
                        )
                    )
                    util.note(f"Orig was {prestable_path}\n")

    for key in counter.keys():
        print(f"Removed {counter[key]} files from prestable")

parse_options()

Parse the commandline options.

Returns:

Type Description
argparse.Namespace

the parsed commandline arguments

Source code in /home/anders/projects/CorpusTools/corpustools/clean_prestable.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
def parse_options():
    """Parse the commandline options.

    Returns:
        (argparse.Namespace): the parsed commandline arguments
    """
    parser = argparse.ArgumentParser(
        parents=[argparse_version.parser],
        description="Remove files in prestable that have no original files.",
    )

    parser.add_argument("corpusdirs", nargs="+", help="Corpus directories")

    args = parser.parse_args()
    return args