Source code for gridtest.main.test

"""

Copyright (C) 2020 Vanessa Sochat.

This Source Code Form is subject to the terms of the
Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed
with this file, You can obtain one at http://mozilla.org/MPL/2.0/.

"""

from gridtest.defaults import (
    GRIDTEST_WORKERS,
    GRIDTEST_RETURNTYPES,
)
from gridtest.templates import copy_template
from gridtest.utils import read_yaml, write_yaml, write_json, save_pickle
from gridtest.logger import bot
from gridtest import __version__

from gridtest.main.generate import (
    import_module,
    get_function_typing,
    extract_modulename,
)
from gridtest.main.grids import Grid
from gridtest.main.helpers import test_basic
from gridtest.main.workers import Workers
from gridtest.main.substitute import substitute_func, substitute_args
from copy import deepcopy

import itertools
import re
import shutil
import sys
import os


[docs]class GridTest:
    def __init__(
        self,
        module,
        name,
        func=None,
        filename=None,
        params=None,
        verbose=False,
        cleanup=True,
        show_progress=True,
    ):

        self.name = name
        self.func = func
        self.module = module
        self.valid = False
        self.success = False
        self.filename = filename or ""
        self.verbose = verbose
        self.cleanup_temp = cleanup
        self.to_cleanup = set()
        self.show_progress = show_progress
        self.result = None
        self.raises = None

        # Catching output and error
        self.out = []
        self.err = []
        self.metrics = {}

        # Parse input arguments
        self.set_params(params)

    def __repr__(self):
        return "[test|%s]" % self.name

    def __str__(self):
        return "[test|%s]" % self.name

    # Templating

[docs]    def set_params(self, params):
        """Given params with args that are loaded, making substitutions
           at the onset of generating the test. Also keep track of
           any directories / files defined by tmp_path and tmp_dir
           to clean up after the test is run.
        """
        self.params = params or {}
        for name, value in self.params.get("args", {}).items():
            new_value = self.substitute(value)
            self.params["args"][name] = new_value

            # If the action is a gridtest function, handle cleanup
            if isinstance(value, str) and re.sub("({%|%}| )", "", value) in [
                "tmp_dir",
                "tmp_path",
            ]:
                self.to_cleanup.add(new_value)

        # Set backup params, in case we reset
        self._params = deepcopy(self.params)

[docs]    def substitute(self, value):
        """Given an input value, return the appropriate substituted string for
           it. This means that {{ args.x }} references can reference arguments
           in params, or {% func %} can refer to a function in the gridtest
           helpers namespace.
        """
        value = self._substitute_args(value)
        value = self._substitute_func(value)
        return value

[docs]    def post_substitute(self):
        """After a run, sometimes we want to check the result (whatever it is)
        """
        # Run substitution for custom sections
        for section in GRIDTEST_RETURNTYPES:
            if section in self.params:
                self.params[section] = self.substitute(self.params[section])

    def _substitute_args(self, value):
        """Given a value, determine if it has variable argument substitutions
           in the format of {{ args.<name> }} and if so, if the argument is present
           return the value with the substitution.
        """
        if not isinstance(value, str):
            return value

        # Returns is a special case, this checks for returns param
        if re.search(r"{{(\s+)?returns(\s)?}}", value) and "returns" in self.params:
            value = substitute_args(value, params=self.params)

        # Result is a special case that works after a test is run
        if re.search(r"{{(\s+)?result(\s)?}}", value) and self.result:
            value = substitute_args(value, params={"result": self.result})

        # We allow for namespacing of args, right now only supports args
        value = re.sub("args[.]", "", value, 1)
        return substitute_args(value, params=self.params.get("args", {}))

    def _substitute_func(self, value):
        """Given a value, determine if it contains a function substitution,
           and do it. See gridtest.main.helpers.substitute_func. for details.
        """
        return substitute_func(value)

    # Summary

    @property
    def summary(self):
        """print a summary of the test, including if it is supposed to
           return, raise, or check existance.
        """
        if self.success:
            return self.summary_success()
        return self.summary_failure()

    # Running

[docs]    def get_func(self):
        """Get the function name, meaning we get the module first. This can
           also be used for one off (custom) function and module names.
        """
        sys.path.insert(0, os.path.dirname(self.filename))
        module = import_module(self.module)
        func = getattr(module, self.get_funcname())
        if func is None:
            bot.error("Cannot find function.")
        return func

[docs]    def get_funcname(self):
        """Get the function name, meaning we get the module first.
        """
        # The function name is the name absent the module
        return re.sub("^%s[.]" % self.module, "", self.name)

[docs]    def run(self, interactive=False, cleanup=None):
        """run an isolated test, and store the return code and result with
           the tester here. 

           Arguments:
            - interactive (bool) : run interactively for shell to debug
            - cleanup (bool) : remove any temporary directories or files (True)
        """
        if not self.show_progress:
            bot.info(f"Running test {name}")

        # Should we clean up temporary files produced?
        if cleanup is not None:
            self.cleanup_temp = cleanup

        # [passed, result, out, err, raises]
        passed, result, out, err, raises = test_basic(
            funcname=self.get_funcname(),
            module=self.module,
            func=self.func,
            filename=self.filename,
            metrics=self.params.get("metrics", []),
            args=self.params.get("args", {}),
            returns=self.params.get("returns"),
            interactive=interactive,
        )

        self.success = passed
        self.result = result
        self.out = out
        self.err = err
        self.raises = raises

        # Finish by checking output
        self.check_output()
        if self.cleanup_temp:
            self.cleanup()

    # Checking Results

[docs]    def check_output(self):
        """Given that self.result is defined, check final output for the test.
           This works (and is called) after self.run(), OR by the multiprocessing
           worker that has updated self.result. Each of the actions below
           does additional parsing of the result, and the client will update
           self.success to be False if there is an issue.
        """
        # Do final substitution
        self.post_substitute()

        # If decorators provided, parse their output
        self.check_metrics()

        # Set 1: test for returns
        if "returns" in self.params:
            self.check_returns(self.params["returns"])

        # Set 2: test raises
        if "raises" in self.params:
            self.check_raises(self.params["raises"])

        # Set 3: test exists
        if "exists" in self.params:
            self.check_exists(self.params["exists"])

        # Set 4: Determine if a statement is true or false
        if "istrue" in self.params:
            self.check_istrue(self.params["istrue"])
        if "isfalse" in self.params:
            self.check_isfalse(self.params["isfalse"])
        if "equals" in self.params:
            self.check_equals(self.params["equals"])
        if "isinstance" in self.params:
            self.check_isinstance(self.params["isinstance"])

        # Set 5: An error was raised (not expected)
        if self.raises and "raises" not in self.params:
            self.err.append(f"Unexpected Exception: {self.raises}.")
            self.success = False

        # If expected success or failure, and got opposite
        if "success" in self.params:
            if not self.params["success"] and not self.success:
                self.out.append("success key set to false, expected failure.")
                self.success = True

    # Checks

[docs]    def check_exists(self, filename):
        """check if a filename exists.
        """
        self.success = False
        if os.path.exists(filename):
            self.success = True

[docs]    def check_returns(self, value):
        """test that a function returns a particular value. The value might
           reference an input variable, so we use the args dictionary to
           substitute
        """
        self.success = False
        value = self.substitute(value)
        if value == self.result:
            self.success = True

[docs]    def check_raises(self, exception):
        """Ensure that running a function raises a particular error. If the
           function runs successfully, this is considered a failure.
        """
        self.success = False

        # Case 1: no exception thrown
        if not self.raises:
            self.success = False
            self.err.append(f"Expected exception {exception} not raised")

        # Case 2: correct exception thrown
        elif self.raises == exception:
            self.success = True
            self.out.append(f"Exception: {self.raises} raised as desired.")
        else:
            self.err.append(
                f"Expected exception {exception}, instead raised {self.raises}"
            )

[docs]    def check_isinstance(self, instance):
        """check if the result is of a particular type
        """
        if not type(self.result).__name__ == instance:
            self.err.append(
                f"{type(self.result).__name__} is not instance of {instance}"
            )
            self.success = False

[docs]    def check_istrue(self, statement):
        """check if a statement is true.
        """
        if not eval(str(statement)) == True:
            self.success = False

[docs]    def check_isfalse(self, statement):
        """check if a statement is false
        """
        if not eval(str(statement)) == False:
            self.success = False

[docs]    def check_equals(self, statement):
        """check if a result equals some statement.
        """
        if not eval(str(statement)) == self.result:
            self.success = False

[docs]    def check_metrics(self):
        """After runs are complete, given metrics defined in params, parse
           over the list and look for metric output in the output (and remove)
        """
        metrics = self.params.get("metrics")
        if metrics:
            regex = "^(%s)" % "|".join(metrics)
            self.metrics = {k: [] for k in metrics}
            for line in self.out:
                for metric in metrics:
                    if line.startswith(metric):
                        self.metrics[metric].append(line.replace(metric, "", 1).strip())
            self.out = [x for x in self.out if not re.search(regex, x)]

    # Cleanup and reset

[docs]    def cleanup(self):
        """Given a list of paths (files or folders) generated by gridtest,
           clean them up with shutil.rm
        """
        if self.cleanup_temp:
            bot.debug("Skipping cleanup.")
        else:
            for path in self.to_cleanup:
                if os.path.isfile(path):
                    bot.debug(f"Cleaning up file {path}")
                    os.remove(path)
                elif os.path.isfile(path):
                    bot.debug(f"Cleaning up directory {path}")
                    shutil.rmtree(path)

[docs]    def reset(self):
        """reset a test to it's original state, meaning that original parameters,
           the result, raises, etc. are reset.
        """
        self.params = deepcopy(self._params)
        self.result = None
        self.raises = None
        self.valid = False
        self.success = False
        self.to_cleanup = set()
        self.out = []
        self.err = []

    def _summary(self, out):
        """return summary for specific output (or error) stream
        """
        output = "".join(out) if self.verbose else ""
        for key in [
            "returns",
            "raises",
            "exists",
            "istrue",
            "isfalse",
            "equals",
            "isinstance",
        ]:
            if key in self.params:
                output += " %s %s" % (key, self.params[key])
        return output.strip()

[docs]    def summary_success(self):
        return self._summary(self.out)

[docs]    def summary_failure(self):
        return self._summary(self.err)


[docs]class GridTestFunc(GridTest):
    """a function can be loaded from within Python with GridTestFunc.
    """

    def __init__(
        self, func, params=None, verbose=False, show_progress=True,
    ):
        super().__init__(
            module=func.__module__,
            func=func,
            name=func.__name__,
            params=params,
            verbose=verbose,
            show_progress=show_progress,
        )


[docs]class GridRunner:
    def __init__(self, input_file, **kwargs):
        """the grid tester loads a gridtest specification file,
           and then deploys testing workers to run the tests.

           Arguments:

           input_file (str) : the watcher name, defaults to github
           kwargs: should include command line arguments from the client.
        """
        self.config = {}
        self._version = __version__
        self.load(input_file)
        self.set_name(kwargs.get("name"))
        self._fill_classes()
        self.show_progress = True
        self.grids = {}

[docs]    def load(self, input_file):
        """load a testing gridtest file.
        """
        input_file = os.path.abspath(input_file)
        if not os.path.exists(input_file):
            sys.exit(f"Cannot find gridtest file {input_file}")
        if not re.search("(yml|yaml)$", input_file):
            sys.exit("Please provide a yaml file (e.g., gridtest.yml) to test.")
        self.config = read_yaml(input_file)
        self.input_file = input_file
        self.input_dir = os.path.dirname(input_file)

[docs]    def set_name(self, name=None):
        """set a custom name. If the user provides a name to the GridRunner,
           this name will be used. Otherwise we use the basename of the input
           file.

           Arguments:
            - name (str): the name of the input file
        """
        self.name = name or os.path.basename(self.input_file)

[docs]    def iter_tests(self):
        for _, section in self.config.items():
            for name, tests in section.get("tests", {}).items():
                yield (name, tests)

[docs]    def iter_grids(self):
        for name, grid in self.grids.items():
            yield (name, grid)

    def _fill_classes(self):
        """Read in a config, and create a lookup for any instance variables. Then
           substitute arguments (starting with instance) for these variables.
        """
        # First create the lookup
        self.lookup = dict()
        for name, tests in self.iter_tests():
            for test in tests:
                if "instance" in test:
                    self.lookup[test["instance"]] = test

        # Now fill in variables
        for name, tests in self.iter_tests():
            for test in tests:
                if "self" in test.get("args", {}):
                    if not test["args"]["self"]:
                        sys.exit(
                            "%s: please define an instance to use with 'self: {{ instance.name }}'"
                            % name
                        )
                    if re.search("{{.+}}", test["args"]["self"]):
                        instance = re.sub(
                            "({{|}}|instance[.])", "", test["args"]["self"]
                        ).strip()
                        if instance in self.lookup:
                            test["args"]["self"] = self.lookup[instance]

[docs]    def run_tests(self, tests, nproc=9, parallel=True, interactive=False, name=None):
        """run tests. By default, we run them in parallel, unless serial
           is selected.

           Arguments:
            - parallel (bool) : run tasks in parallel that are able (default is True)
            - nproc (int) : number of processes to run
            - cleanup (bool) : clean up files/dir generated with tmp_path, tmp_dir
            - name (str) : the name of a test to interact with
            - interactive (bool) : run jobs interactively (for debugging)
              not available for parallel jobs.
        """
        # Parallel tests cannot be interactive
        if parallel and not interactive:
            self._run_parallel(tests, nproc=nproc)

        else:
            total = len(tests)
            progress = 1

            for _, task in tests.items():
                prefix = "[%s:%s/%s]" % (task.name, progress, total)
                if self.show_progress:
                    bot.show_progress(progress, total, length=35, prefix=prefix)
                else:
                    bot.info("Running %s" % prefix)

                # Should this be interactive?
                is_interactive = interactive
                if name is not None and interactive:
                    if not task.name.startswith(name):
                        is_interactive = False

                # Run the task, update results with finished object
                task.run(interactive=is_interactive)
                progress += 1

        return tests

    def _run_parallel(self, tests, nproc=GRIDTEST_WORKERS):
        """run tasks in parallel using the Workers class. Returns the same
           tests results, but after running.

           Arguments:
              - queue: the list of task objects to run
        """
        workers = Workers(show_progress=self.show_progress, workers=nproc)
        workers.run(tests)

        # Run final checks
        for name, test in tests.items():
            test.check_output()
            if test.cleanup_temp:
                test.cleanup()

        return tests

[docs]    def run(
        self,
        regexp=None,
        parallel=True,
        nproc=None,
        show_progress=True,
        verbose=False,
        interactive=False,
        name=None,
        cleanup=True,
        save=None,
        save_report=None,
        save_compact=False,
        save_metrics=None,
        report_template="report",
    ):
        """run the grid runner, meaning that we turn each function and set of
           tests into a single test, and then run with multiprocessing. 
           This is the function called by the user that also does filtering
           of tests based on a regular expression before calling them.

           Arguments:
              - regexp (str) : if supplied, filter to this pattern 
              - parallel (bool) : use multiprocessing to run tasks (default True)
              - show_progress (bool) : show progress instead of task information
              - nproc (int) : number of processes to use for parallel testing
              - verbose (bool) : print success output too
              - interactive (bool) : interactively debug functions
              - name (str) : if specified, a name of a test to interact with
              - cleanup (bool) : cleanup files/directories generated with tmp_path tmp_dir
              - save (str) : a filepath to save results to (must be json)
              - save_report (str) : path to folder (not existing) to save a report to
              - report_template (str) : a template name of a report to generate

        """
        # 1. Generate list of tests and grid functions
        self.show_progress = show_progress
        self.get_grids()

        tests = self.get_tests(regexp=regexp, verbose=verbose, cleanup=cleanup)

        # Pretty print results to screen
        if not tests:
            bot.exit_info("No tests to run.")

        # 2. Run tests (serial or in parallel)
        self.run_tests(
            tests=tests,
            parallel=parallel,
            nproc=nproc or GRIDTEST_WORKERS,
            interactive=interactive,
            name=name,
        )

        self.print_results(tests)

        # Save report?
        if save_report:
            report_dir = self.save_report(save_report, report_template)
            save = os.path.join(report_dir, "results.json")

        if save_metrics:
            self.save_metrics(save_metrics, tests, save_compact)

        # Save to file (required for report)
        if save:
            self.save_results(save, tests, save_compact)

        # return correct error code
        if self.failed(tests):
            return 1
        return 0

[docs]    def success(self, tests):
        """Given a test of tests, return True if all are successful.
        """
        return all([test.success for name, test in tests.items()])

[docs]    def failed(self, tests):
        """Given a test of tests, return True if any are not successful.
        """
        return not self.success(tests)

[docs]    def save(self, testfile):
        """Save the runner.config to an output yaml file.
        """
        bot.info(f"Writing {self} to {testfile}")
        write_yaml(self.config, testfile)

[docs]    def save_report(self, report_dir, report_template):
        """save a runner results to file.
        """
        report_dir = os.path.abspath(report_dir)

        # Report directory cannot already exist
        if os.path.exists(report_dir):
            bot.exit(f"{report_dir} already exists, please remove before using.")

        dest = copy_template(report_template, report_dir)
        if not dest:
            bot.exit(f"Error writing to {dest}.")
        return dest

    def _savepaths_valid(self, filename, allowed=None):
        """For some file to be written, check that the directory exists, and 
           a proper extension is used.
        """
        allowed = allowed or [".json", ".pkl"]
        regexp = "(%s)$" % ("|".join(allowed))

        dirname = os.path.dirname(filename)
        if not os.path.exists(dirname):
            bot.exit(f"{dirname} does not exist, skipping save.")

        elif not re.search(regexp, filename):
            bot.warning(
                f"%s must have extensions in %s, skipping save."
                % (filename, ",".join(allowed))
            )
            return False
        return True

[docs]    def save_metrics(self, filename, tests, save_compact=False):
        """save metrics to file. This is the same data as a general results
           export, but without the results, and without the params.

           Arguments:
            - filename (str) : the json file to save to (must end in json)
            - tests (gridtest.main.test.GridTest) : the gridtest object
            - save_compact (bool) : don't pretty print
        """
        filename = os.path.abspath(filename)
        if self._savepaths_valid(filename, allowed=[".json"]):
            results = []
            for key, test in tests.items():

                if test.params.get("save", True) == False:
                    continue

                results.append(
                    {
                        "name": key,
                        "function": test.name,
                        "filename": test.filename,
                        "raises": test.raises,
                        "success": test.success,
                        "metrics": test.metrics,
                        "module": test.module,
                    }
                )

            write_json(results, filename, pretty=not save_compact)
            return filename

[docs]    def save_results(self, filename, tests, save_compact=False):
        """save a runner results to file.

           Arguments:
            - filename (str) : the json file to save to (must end in json)
            - tests (gridtest.main.test.GridTest) : the gridtest object
            - save_compact (bool) : don't pretty print
        """
        filename = os.path.abspath(filename)
        if self._savepaths_valid(filename):
            results = []
            for key, test in tests.items():

                if test.params.get("save", True) == False:
                    continue

                results.append(
                    {
                        "name": key,
                        "function": test.name,
                        "filename": test.filename,
                        "out": test.out,
                        "err": test.err,
                        "result": test.result,
                        "params": test.params,
                        "raises": test.raises,
                        "success": test.success,
                        "metrics": test.metrics,
                        "module": test.module,
                    }
                )

            if filename.endswith(".json"):
                try:
                    write_json(results, filename, pretty=not save_compact)
                except:
                    bot.warning(f"Error saving to json, try a .pkl extension instead.")

            elif filename.endswith(".pkl"):
                save_pickle(results, filename)
            return filename

[docs]    def print_results(self, tests):
        """print the results of the tests, meaning that success is in green,
           and non-success is in red.
        """
        total = 0
        success = 0
        failure = 0
        has_metrics = False

        print("{:<30} {:<30} {:<30}".format("Name", "Status", "Summary"))
        print("{:_<120}".format(""))

        for name, test in tests.items():
            total += 1
            if test.metrics:
                has_metrics = True
            if test.success:
                bot.success(
                    "{:<30} {:<30} {:<30}".format(name, "success", test.summary)
                )
                success += 1
            else:
                bot.failure(f"failure: {name} {test.summary}")
                failure += 1

        if has_metrics:
            print("\n{:_<120}".format(""))
        for name, test in tests.items():
            for metric, result in test.metrics.items():
                print("{:<30} {:<30} {:<30}".format(name, metric, "|".join(result)))

        print(f"\n{success}/{total} tests passed")

[docs]    def get_grids(self):
        """a grid is a specification under "grids" that can be run to
           parameterize a set of arguments, optionally run through a function
           or just generated to have combinations. If a count variable is
           included, we multiply by that many times.
        """
        for parent, section in self.config.items():
            filename = extract_modulename(section.get("filename", ""), self.input_dir)
            for name, grid in section.get("grids", {}).items():
                self.grids[name] = Grid(
                    name=name, params=grid, filename=filename, refs=self.grids
                )
        return self.grids

[docs]    def get_tests(self, regexp=None, verbose=False, cleanup=True):
        """get tests based on a regular expression.

           Arguments:
            - regexp (str) : if provided, only include those tests that match.
        """
        tests = {}

        for parent, section in self.config.items():
            for name, module in section.get("tests", {}).items():

                if regexp and not re.search(regexp, name):
                    continue

                # Get either the file path, module name, or relative path
                filename = extract_modulename(
                    section.get("filename", ""), self.input_dir
                )

                idx = 0

                # Use idx to index each test with parameters
                for entry in module:
                    grid = None

                    # Grid and args cannot both be defined
                    if "args" in entry and "grid" in entry:
                        bot.exit(f"{name} has defined both a grid and args.")

                    # If we find a grid, it has to reference an existing grid
                    if "grid" in entry and entry["grid"] not in self.grids:
                        bot.exit(
                            f"{name} needs grid {entry['grid']} but not found in grids."
                        )

                    # If we find a grid, it has to reference an existing grid
                    if "grid" in entry and entry["grid"] in self.grids:
                        grid = self.grids[entry["grid"]]
                        params = deepcopy(entry)
                        for key in ["grid", "instance"]:
                            if key in params:
                                del params[key]
                        grid.params.update(params)

                    # A class function is tested over it's instance grid
                    instance_grid = [{}]

                    # If entry is defined without a grid, we need to generate it
                    if not grid:
                        grid = Grid(
                            name=name, params=entry, filename=filename, refs=self.grids
                        )

                        # If the grid has an instance, add the correct args to it
                        if "self" in grid.args and "grid" in grid.args["self"]:
                            instance_grid = self.grids.get(
                                grid.args["self"]["grid"], [{}]
                            )

                    # If the grid is cached, we already have parameter sets
                    argsets = grid
                    if grid.cache:
                        argsets = grid.argsets

                    # iterate over argsets for a grid, get overlapping args
                    for extra_args in instance_grid:
                        for argset in argsets:
                            updated = deepcopy(grid.params)

                            # Add instance args, if needed
                            updated["args"] = argset
                            if extra_args:
                                updated["args"]["self"] = extra_args

                            tests["%s.%s" % (name, idx)] = GridTest(
                                module=parent,
                                name=name,
                                params=updated,
                                verbose=verbose,
                                cleanup=cleanup,
                                filename=filename,
                                show_progress=self.show_progress,
                            )
                            print(f"generating test {idx}", end="\r")
                            idx += 1
        return tests

    def __repr__(self):
        return "[gridtest|%s]" % self.name

    def __str__(self):
        return "[gridtest|%s]" % self.name