# Copyright Red Hat
#
# This file is part of relval.
#
# wikitcms is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
# Adam Williamson <awilliam@redhat.com>
# Based on initial implementation by Josef Skladanka <jskladan@redhat.com>
# Josef's work incorporated code from Kamil Paral <kparal@redhat.com>
# HTML output by Lukas Ruzicka <lruzicka@redhat.com>
# Original in https://git.fedorahosted.org/cgit/fedora-qa.git/tree/stats

# this code is kinda irredeemable from these perspectives
# pylint: disable=too-many-locals,too-many-statements,too-many-branches,too-many-instance-attributes

"""This file contains functions that help implement the
'testcase-stats' sub-command of relval, which produces statistics
about release validation test coverage in HTML format.
"""

import datetime
import importlib.resources
import os
import re
import random
import shutil
from collections import defaultdict, Counter, OrderedDict
from jinja2 import Environment, PackageLoader
from operator import attrgetter
from typing import Self, Any
from markupsafe import escape

import wikitcms.result as wres
import wikitcms.page as wpage


SANITY_SUB = re.compile(r"\W")
NOHTML = re.compile(r"<.*?>")


class Test:
    """An object representing information for one 'unique test' (see
    main() for definition). Counts total passes, fails and warns for
    the test across all composes as 'p', 'f' and 'w', and creates a
    'bitmap' of these totals as a dict keyed by compose, each entry
    being a list with two entries, the first being the results total
    and the second a string that is used to style the HTML output
    depending on if the results are all passes, all fails or a mix.
    Keeps a note of the most recent compose for which it was sent a
    result row: this relies on the result rows being sent in the
    correct order, this class does not know how to order the results
    it is sent.

    Stores lists of the wikitcms result objects for this test in
    three more dicts, one each for 'passes', 'fails' and warns', keyed
    by compose. As it extracts result objects from the result row, it
    notes the environments for which any results were provided, and
    divides this into the number of possible environments to produce
    a coverage percentage for each compose. This information is used
    to generate the detailed result pages.
    """

    def __init__(self):
        self.passes = defaultdict(list)
        self.warns = defaultdict(list)
        self.fails = defaultdict(list)
        self.coverage = defaultdict(int)
        self.composes = set()
        self.last_tested = ""
        self.last_tested_by_env = {}
        self.results_by_envs = {}
        self.envs = set()
        self.resultenvs = set()
        self.milestone = ""
        self.rescount = 0
        self.secid = 0
        self.bugs = []

    def update_weblink(self, weblink: str) -> str:
        """This method creates a valid test link from the given string
        which is based on section names (tables in matrices). In merges that
        with the Wiki URL and replaces white spaces with underscores to
        make it valid."""
        weblink = f"https://fedoraproject.org/wiki/{weblink}"
        weblink = weblink.replace(" ", "_")
        return weblink

    def update(self, compose: str, row: wres.ResultRow, link: str) -> None:
        """Passed a result row and the 2-tuple that identifies the
        compose it comes from. Parses the data from the result row as
        described in the class description. Assumes there will only
        ever be one result row per test instance per compose: if this
        is ever not true, the bitmap will represent only the results
        from the last row seen, and the coverage percentage will be
        inaccurate.
        """
        self.milestone = row.milestone
        self.composes.add(compose)
        self.secid = row.secid
        self.envs.update(list(row.results.keys()))
        link = self.update_weblink(link)
        rlists = list(row.results.items())
        pas = 0
        fail = 0
        warn = 0
        envs = 0
        for env, rlist in rlists:
            # Iterate over the lsit of result rows and deal with its content,
            # and divide the results into groups based on test environments.
            self.last_tested_by_env.setdefault(env, "nottested")
            self.resultenvs.add(env)
            envs += 1

            if not rlist:
                # If no result is there for a particular row, let us create
                # an empty record and store it.
                if env not in self.results_by_envs:
                    self.results_by_envs[env] = [(compose, "na", "na", link)]
                else:
                    self.results_by_envs[env].append((compose, "na", "na", link))
            for result in rlist:
                # If there are results in rlist, create result records accordingly.
                # The result record is a tuple that contains the following
                # information: which compose it originates, what is the result, if
                # performed by bot or manually, and the link to the test matrix.
                # Add this results to the result counter
                self.rescount += 1
                self.bugs = self.bugs + result.bugs
                bot = "manual"
                if result.bot:
                    bot = "auto"
                if result.status == "pass":
                    self.last_tested = compose
                    self.last_tested_by_env[env] = compose
                    self.passes[compose].append((env, result))
                    pas += 1
                    if env not in self.results_by_envs:
                        self.results_by_envs[env] = [(compose, "pass", bot, link)]
                    else:
                        self.results_by_envs[env].append((compose, "pass", bot, link))
                if result.status == "fail":
                    self.last_tested = compose
                    self.last_tested_by_env[env] = compose
                    self.fails[compose].append((env, result))
                    fail += 1
                    if env not in self.results_by_envs:
                        self.results_by_envs[env] = [(compose, "fail", bot, link)]
                    else:
                        self.results_by_envs[env].append((compose, "fail", bot, link))
                if result.status == "warn":
                    self.last_tested = compose
                    self.last_tested_by_env[env] = compose
                    self.warns[compose].append((env, result))
                    warn += 1
                    if env not in self.results_by_envs:
                        self.results_by_envs[env] = [(compose, "warn", bot, link)]
                    else:
                        self.results_by_envs[env].append((compose, "warn", bot, link))
        self.coverage[compose] = envs * 100 // len(row.results)

    def merge_from(self, other: Self) -> None:
        """Passed another Test object, this merges the contents of
        that Test into this one. Used by the post-processor to clean
        results.
        """
        for compose in other.composes:
            self.composes.add(compose)
            self.passes[compose] = other.passes[compose]
            self.fails[compose] = other.fails[compose]
            self.warns[compose] = other.warns[compose]
            self.coverage[compose] = other.coverage[compose]
            pas = len(other.passes[compose])
            fail = len(other.fails[compose])
            warn = len(other.warns[compose])
        if not self.last_tested:
            self.last_tested = other.last_tested


def recalculate_results(
    results_by_envs: dict[str, list[tuple[str, str, str, str]]], secturl: str
) -> dict:
    """This will recalculate the results to cover for two specific
    cases when we have more than one result for one particular environment.

    In that case, the more repetitive results, the darker the graphical
    representation must be: a.k.a heatmap.

    Also, if the results are different, then the overall result shown must
    represent the worst result recorded, i.e. one fail, the overall is fail,
    no fails but one warn, the overall must be a warn, and only if all results
    are passes, the overall result can be a pass.

    When the results have one manual result, it will be shown as manual,
    while only when all of the results are automated, then it will be shown
    as automated.

    It returns a tuple where (result, heat, bot) are seen.
    """
    recalculated = {}
    for env, restuples in results_by_envs.items():
        # For the heat matrix (or something similar to it), we need to
        # merge results for the same compose. We will go through the results
        # and split them according to the composes.
        composed = {}
        for result in restuples:
            compose = result[0]  # The compose info
            if compose not in composed:
                composed[compose] = [result[1:]]
            else:
                composed[compose].append(result[1:])
        # At this point, we have the current environmental results split
        # by a compose, so we will go through the composes and heat the
        # results.
        heated_results = []
        for compose, reslist in composed.items():
            res = [x[0] for x in reslist]  # Only take the results to get the heat.
            bot = [x[1] for x in reslist]  # To collect the list of auto/manual.
            try:
                link = reslist[0][
                    2
                ]  # We only take the link once, as it should be same with all results.
                link = specify_link(link, secturl)
            except IndexError:
                link = ""
            robot = "na"
            if "manual" in bot:
                robot = "manual"
            elif "auto" in bot:
                robot = "auto"
            # To keep the compose info for important milestones.
            for milestone in ["Alpha", "Beta", "Final", "RC"]:
                if milestone in compose:
                    robot = f"mst{robot}"
            heated = heat_results(res)
            heated_result = (heated[0], heated[1], heated[2], robot, link)
            heated_results.append(heated_result)
        recalculated[env] = heated_results
    return recalculated


def heat_results(results: list[str]) -> tuple[str, int, int]:
    """Provides calculation for recalculate_results. Takes the results
    from the compose and calculates how many results there are for that
    compose, then merges them together and increases or decreases
    the 'heat' based on a couple of rules:
    - if there is at least one failed test, the overall is a fail
    - if there is no fail, but at least one warn, the overall is a warn
    - if there are no fails or warns, but only passes, the overall is a pass
    - each pass adds +1 to the heat
    - each warn and fail add -1 to the heat
    - when heat becomes negative, absolute value is taken instead
    The method returs a tuple with (overall, heat, total_results)
    """
    passes = 0
    fails = 0
    warns = 0
    na = 0
    for result in results:
        # Let's count passes, fails, warns.
        if result == "fail":
            fails += 1
        elif result == "warn":
            warns += 1
        elif result == "pass":
            passes += 1
        else:
            na += 1
    # If there was at least one fail, the overall status is fail.
    if fails:
        overall = "fail"
    elif warns:
        overall = "warn"
    elif passes:
        overall = "pass"
    else:
        overall = "na"

    # We will calculate the heat.
    # Fails and passes go clearly against each other, so when there are
    # multiple results with both fails and passes, the heat should be
    # reduced. The same goes valid for warns.
    if overall == "fail":
        heat = (fails + warns) - passes
    elif overall == "warn":
        heat = warns - passes
    elif overall == "pass":
        heat = passes
    else:
        heat = 0
    # If there should me more passes than fails and/or warns, we would
    # reach negative values here. In this case, because the overall result
    # is always a fail, we will zero the heat to show a little lighter
    # shade of red to indicate that the fail might not be as significant
    heat = max(heat, 0)

    # Let's calculate the total number of results we have for this compose.
    total = passes + fails + warns

    heated_results = (overall, heat, total)
    return heated_results


def get_bitmap_html(
    recalculated_results: dict[str, list[tuple[str, int, int, str, str]]],
) -> dict[str, list[dict[str, str | int]]]:
    """Iterates over recalculated results (already heated) and replaces
    the result and heat strings with HTML entities to display nice
    colorful symbols instead. We distinguish various grades of fail, warn,
    and pass, as well as if the results come from a bot or a real user,
    and also if the results were gained during a Beta or Final RC compose.
    """
    bitmapped_results = {}
    # Heat colours for failures
    fail_heats = {
        0: "#FF9999",  # 90
        1: "#FF6666",  # 75
        2: "#FF0000",  # 60
        3: "#B30000",  # 50
    }
    # Heat colours for passes
    pass_heats = {
        0: "#ADEBAD",  # 80
        1: "#70DB70",  # 65
        2: "#33CC33",  # 50
        3: "#1F7A1F",  # 40
    }
    # Heat colours for warnings
    warn_heats = {
        0: "#FFD699",
        1: "#FFB84D",
        2: "#FF9900",
        3: "#CC7A00",
    }
    for environment, results in recalculated_results.items():
        # Non-testing symbol that indicates that there are
        # no results provided.
        symbol = "&#9679;"
        # The result will be a symbol together with a heat
        # symbol - a tuple. The heat symbol will be used to make the
        # color different in CSS in the template.
        # The default symbol will be "not tested" and "no heat".
        for result in results:
            res, heat, total, bot, link = result
            color = "#DDDDDD"
            # Fix heat values to fall into the color grid to
            # only present three colours for displaying clarity.
            reached = heat
            if heat < 0:
                heat *= -1
            if heat > 5:
                heat = 3
            elif heat > 2:
                heat = 2
            elif heat > 0:
                heat = 1
            # For bot user, change symbol
            if bot == "auto":
                symbol = "&#9679;"
            elif bot == "manual":
                symbol = "&#9670;"
            elif bot == "mstauto":
                symbol = "&#9679;"
            elif bot == "mstmanual":
                symbol = "&#9670;"
            if res == "pass":
                color = pass_heats[heat]
            elif res == "fail":
                color = fail_heats[heat]
            elif res == "warn":
                color = warn_heats[heat]
            else:
                color = "#DDDDDD"
                symbol = "&#9679;"
            output = {
                "symbol": symbol,
                "color": color,
                "heat": reached,
                "link": link,
                "result": res,
                "total": total,
            }
            if environment not in bitmapped_results:
                bitmapped_results[environment] = [output]
            else:
                bitmapped_results[environment].append(output)
    return bitmapped_results


def post_process(
    tests: dict[tuple[str, str, str], Test], allcomposes: list[str]
) -> dict[tuple[str, str, str], Test]:
    """Crazy magic voodoo which cleans up the results after they've
    been turned into Test() objects but before we feed them to
    print_results_html(). Is passed the tests dict for a testtype,
    and the list of all composes for which any tests at all exist in
    the set of results being considered.

    The first half of the processor tries to merge tests which are
    'really' the same. First thing it does is remove lots of tests from
    consideration entirely. If the test case occurs exactly once in the
    rows for the test type, we don't currently do any merges that would
    be relevant to it, so we throw it away (we're not handling test
    case renames yet). It also throws out any test which appears in the
    page for every single compose; again, none of the merges we
    currently do would merge into such a test.

    Next, it tries to catch cases where tests were moved between
    sections or sections were renamed. It iterates over the cases that
    remain in its 'to be considered' list. For each one, it finds the
    other tests in the list which have the same test name + test case
    (and hence only differ in section name). It then filters those
    tests down to only the ones which do not include results for any of
    the same composes as the test being matched. It then reduces the
    'candidate' list once more, by including only tests which have all
    the environments (result columns) the test being matched has
    results for.

    Then it merges the test being matched with the first test in the
    remaining candidate list, if there still *is* one. That might sound
    a little rash, but in practice, it's pretty much always correct.
    There *are* cases where multiple candidates still remain after the
    final sort, but given how we organize the pages, the top candidate
    in the list is almost always the right one - because the list is
    ordered and we're comparing top down. Say a page has three sections
    with identical names, and each section contains all the same test
    cases and environments, and all three sections get renamed between
    two composes: this parser handles that correctly. When it hits the
    first section in the list and runs through all its checks, there
    will be three candidates remaining - but the top one will be the
    correct one, because essentially you have all six sections in order
    at this point, and the results from section 1 drop into section 4,
    then from section 2 into section 5, and section 3 into section 6.

    For an encore, it re-sorts the results by section. Using an ordered
    dict for tests as it's created keeps them in order so long as
    the pages are perfectly consistent, but when there's a difference
    between pages, the 'changed' tests just get stuck on the bottom.
    So what we do here is we find all the categories in the final
    result list, then we iterate over the whole list updating the
    'section id' for each section as we go. Each result row stores the
    'id' - the numerical index - of the section it was in at the time
    wikitcms ran into it. With this trick, what we wind up with is a
    dict of all the sections in the final results, with each one having
    the highest numerical index it ever had on any of the pages. This
    gives us a sort order for the sections which should usually reflect
    the order of the sections on the last page in the set of pages.left

    Then we build a new copy of the tests dict, sorted by the test's
    section name's index from the dict we just created. That way all
    the results with the same section name stay together, and the
    sections themselves are ordered correctly.

    The other nice thing about all this crazy magic voodoo is if you
    are currently making the sign of the cross at the screen, it all
    drops out completely if you just don't call the post-processor;
    send tests straight to print_results_html() and you'll get the raw
    results with none of this processing voodoo.
    """
    testlist = list(tests.keys())
    cnt = Counter([testcase for (name, testcase, section) in testlist])
    print(f"Total unique tests: {len(testlist)}")

    # don't consider test cases that occur only once...
    testlist = [tst for tst in testlist if cnt[tst[1]] != 1]
    # ...or tests which exist for all composes
    testlist = [tst for tst in testlist if tests[tst].composes != set(allcomposes)]
    print(
        "Merge candidates (tests with more than one occurrence, that do not "
        f"occur in all composes): {len(testlist)}"
    )

    # next iteration: now we're only looking at merge candidate cases
    for i, test in enumerate(testlist):
        # we only ever merge "down" - older results into newer ones
        cands = [c for c in testlist[i + 1 :] if (c[0], c[1]) == (test[0], test[1])]
        # print("Candidates w/ same name: " + str(len(cands)))
        cands = [c for c in cands if tests[test].composes.isdisjoint(tests[c].composes)]
        # print("Candidates without overlapping composes: " + str(len(cands)))
        cands = [c for c in cands if tests[test].resultenvs <= tests[c].envs]
        # print("Candidates w/ required envs: " + str(len(cands)))
        try:
            tests[cands[0]].merge_from(tests[test])
            tests.pop(test)
            tmpl = (
                "Merged test case: {0} with name: {1} from section: {2} "
                "into test case: {3} with name: {4} from section: {5}"
            )
            print(tmpl.format(test[1], test[0], test[2], cands[0][1], cands[0][0], cands[0][2]))
        except IndexError:
            # well I never commented this when I wrote it, but I'm
            # guessing this is for when cands is empty?
            pass

    # this finds all section names and stores the highest index found for each
    # in a dict keyed on the section name
    secids = {}
    secset = {section for (_, _, section) in tests.keys()}
    for section in secset:
        secids[section] = int(
            max(test.secid for (_, _, sec), test in tests.items() if sec == section)
        )

    # now we build the new tests dict sorted by the index for the section
    # name from the dict we just built
    tests = OrderedDict(sorted(iter(tests.items()), key=lambda x: secids[x[0][2]]))
    return tests


def prepare_results_data(
    testtype: str,
    pages: list[wpage.ComposePage],
    allcomposes: list,
    tests: dict[tuple[str, str, str], Test],
    outtype: str,
) -> dict[str, str | dict[str, Any]]:
    """Prepares the results into a dictionary that is passed into
    the Jinja template for the actual HTML creation.
    """
    # Originally, this was a function that composed the web page from
    # hardcoded HTML tags and wrote the file to the final location.
    # this is where we actually start doin' stuff
    # The variable to hold the dataset for the Jinja template
    final_data = {}
    # Timestamp is used to display the time of the page creation.
    now = datetime.datetime.now(datetime.UTC)
    timestamp = datetime.datetime.strftime(now, "%Y-%m-%d %H:%M %Z")
    final_data["timestamp"] = timestamp

    sections = {}  # to hold section data
    for (name, testcase, section), test in tests.items():
        # We use slightly different formats for both the detail page
        # filenames and the summary page visible names for tests whose
        # 'name' differs from their 'testcase'. We want to display and
        # use both attributes when they differ, but just use one when
        # they're the same.
        if testcase != name:
            dispname = f"{testcase} {name}"
            linkname = f"{testcase}"
        else:
            dispname = name
            linkname = name
        # If there's a section for the test case, include it in the
        # page URL in a nice way.
        if section:
            secturl = section.replace(" ", "_")
        else:
            secturl = ""

        test_row = {}
        test_row["testcase"] = testcase
        test_row["name"] = dispname
        test_row["milestone"] = test.milestone
        # Do not show empty space when not last_tested
        # show "never" instead.
        last_tested = test.last_tested
        if not last_tested:
            last_tested = "never"
        test_row["last"] = last_tested
        test_row["rescount"] = test.rescount
        # Recalculate results to obtain overall results and heats
        results_by_envs = test.results_by_envs
        recalculated_results = recalculate_results(results_by_envs, secturl)
        test_row["danger"] = find_danger(recalculated_results)
        if outtype == "html":
            test_row["results"] = get_bitmap_html(recalculated_results)
        else:
            test_row["results"] = recalculated_results
        test_row["timestamp"] = timestamp
        test_row["linktotest"] = f"https://fedoraproject.org/wiki/{linkname}"
        test_row["bugs"] = test.bugs
        test_row["url"] = secturl
        # Append collected test_row into the section collection.
        if section not in sections:
            sections[section] = [test_row]
        else:
            sections[section].append(test_row)

        final_data["sections"] = sections
    # Iterate over the sections and try to guess the tables for each section.
    for section, rows in final_data["sections"].items():
        final_data["sections"][section] = resolve_tables(rows)
    return final_data


def specify_link(link: str, secturl: str) -> str:
    """This will take the link to a certain Wiki matrix and will
    add the section to point it to an exact location in the Fedora Wiki."""
    nohtml = re.sub(NOHTML, "", secturl)
    link = f"{link}#{nohtml}"
    return link


def find_danger(recalculated_results: dict[str, list[tuple[str, int, int, str, str]]]) -> list:
    """Checks for results in recalculated_results based on environments and checks if
    the test has not been tested for 10 last occurences. If it is the case,
    we will pass it to the prepare_results_data and make it visible in the output."""
    dangers = []
    for env, results in recalculated_results.items():
        tested = [1 if result[0] != "na" else 0 for result in results]
        lastten = sum(tested[-10:])
        if lastten:
            dangers.append(env)
    return dangers


def resolve_tables(rows: list[dict[str, Any]]) -> dict[str, list[dict[str, Any]]]:
    """Iterates over the testcases in Sections and tries to recreate the tables
    that originally were part of the Wiki page.
    When a test has different environments, a new table will be created and the
    test will be put into that table. Then for the next test, a new table will
    be created again, or it will be assigned to one of the existing tables.
    """
    tables = {}  # Holds the data in newly split tables.
    last_cols = set()
    for row in rows:
        table_id = len(tables)  # Calculate unique ID
        cols = row["results"].keys()
        # Set of envs in the test case that should act
        # like columns.If the number of columns differ from what has been previously
        # found, we could split the tables, however before we that
        # we will attempt to see if the line with less columns would fit
        # into the previous setting.
        # If the columns are the same, this is clearly another line of
        # the same table and we will append it to it.
        if cols == last_cols:
            table_id = len(tables) - 1  # As we start from 0
            tables[table_id].append(row)
        else:  # The columns differ
            # We check if test columns are a subset of existing columns.
            if all(col in last_cols for col in cols):
                table_id = len(tables) - 1
                row = fit_into_table(row, last_cols)
                tables[table_id].append(row)
            else:  # We cannot fit this test into the previous table.
                # If no table is available, create it.
                if not tables.keys():
                    tables[table_id] = [row]
                    last_cols = cols
                # If the table_id differs, we know a new table should be created.
                elif table_id not in tables:
                    tables[table_id] = [row]
                    last_cols = cols
    # We need to recreate the ID keys to make sure we have a unique ID
    # because otherwise Java script will be crazy about this.
    unique = {}
    for _, table in tables.items():
        key = random.randint(0, 1000000000)
        unique[key] = table
    return unique


def fit_into_table(row: dict[str, Any], tablecols: set[str]) -> dict[str, Any]:
    """This is a helper function for the resolve_tables. When in the test row
    there are missing columns, because they are grayed out in the validation
    matrix, the fields need to be reconstructed to make the tables hold
    together. A fake field will be added to all missing columns with
    engineered fake info. The reconstructed test wor is returned."""
    results = {}
    for col in tablecols:
        if col in row["results"]:
            results[col] = row["results"][col]
        else:
            res = {
                "symbol": "not applicable",
                "color": "#CCCCCC",
                "heat": "0",
                "result": "",
                "total": "0",
                "link": "",
            }
            results[col] = [res]
    row["results"] = results
    return row


def print_results_html(data: dict, target: str) -> None:
    """This method renders the HTML output based on Jinja template."""
    jinja_env = Environment(loader=PackageLoader("relval", package_path="html"))
    teststat_template = jinja_env.get_template("testcase_statistics.html")
    rendered_html = teststat_template.render(data)

    with open(target, "w") as outfile:
        outfile.write(rendered_html)
    print("Creating HTML output!\n")
