# -*- coding: utf-8 -*-


__all__ = (
    "ComposeChangelog",
)


# TODO: added/removed RPMs for existing builds


import os
import json
import re
from distutils.version import LooseVersion

import kobo.pkgset
from kobo.rpmlib import parse_nvra, make_nvr, make_nvra, get_changelogs_from_header
from kobo.threads import ThreadPool, WorkerThread


def formatsize(size):
    if size < 0:
        return '-' + formatsize(-size)

    units = ('KiB', 'MiB', 'GiB', 'TiB')
    size = float(size)
    chosen = 'B'
    prec = 0
    for unit in units:
        if size > 1024:
            prec = 2
            size = size / 1024
            chosen = unit
        else:
            break
    return '{0:.{1}f} {2}'.format(size, prec, chosen)


def to_utf8(text):
    encodings = ["ascii", "utf8", "latin1", "latin2"]
    for encoding in encodings:
        try:
            return text.decode(encoding)
        except UnicodeDecodeError:
            pass
    return text.decode("ascii", "ignore")


clogs = {}


class SimpleRpmWrapperWithChangelog(kobo.pkgset.SimpleRpmWrapper):
    __slots__ = (
        "summary",
        "changelogs",
    )

    def __init__(self, file_path, **kwargs):
        super(SimpleRpmWrapperWithChangelog, self).__init__(file_path, **kwargs)
        ts = kwargs.pop("ts", None)
        header = kobo.rpmlib.get_rpm_header(file_path, ts=ts)

        self.summary = kobo.rpmlib.get_header_field(header, "summary").decode('utf-8')

        if self.sourcerpm:
            key = self.sourcerpm
        else:
            key = "%s.rpm" % self.nvra

        # HACK: pass reference instead using global
        global clogs
        if key not in clogs:
            clog = get_changelogs_from_header(header, -1)
            clogs[key] = clog
        self.changelogs = clogs[key]


class ReaderPool(ThreadPool):
    def __init__(self, file_cache, logger=None):
        ThreadPool.__init__(self, logger)
        self.file_cache = file_cache
        self.result = {}


class SrpmReaderThread(WorkerThread):
    def process(self, item, num):
        file_path = str(item)
        rpm_obj = self.pool.file_cache.add(file_path)
        self.pool.result.setdefault(rpm_obj.name, rpm_obj)


class RpmReaderThread(WorkerThread):
    def process(self, item, num):
        file_path = str(item)
        rpm_obj = self.pool.file_cache.add(file_path)
        srpm_name = parse_nvra(rpm_obj.sourcerpm)["name"]
        self.pool.result.setdefault(srpm_name, set()).add(rpm_obj)


def get_changelog_diff_from_headers(old, new, max_records=-1):
    old_changelog = old.changelogs
    new_changelog = new.changelogs

    if new_changelog and old_changelog:
        result = []
        old_time = old_changelog[0].time
        old_nvr = LooseVersion(to_utf8(old_changelog[0].name).rsplit(None, 1)[-1])
        while new_changelog:
            entry = new_changelog.pop(0)
            new_nvr = LooseVersion(to_utf8(entry.name).rsplit(None, 1)[-1])
            if entry.time < old_time or (entry.time == old_time and new_nvr <= old_nvr):
                # We want to take all entries from new changelog that are newer
                # than old changelog, or from the same day as latest old entry
                # but with newer version.
                break
            result.insert(0, entry)
        return result
    return new_changelog


class FakeSRPM(object):
    def __init__(self, rpm_obj):
        nvra = rpm_obj.sourcerpm or rpm_obj.nvra
        parsed = parse_nvra(nvra)
        parsed['epoch'] = rpm_obj.epoch
        self.nvr = make_nvr(parsed, add_epoch=True)
        self.nvra = nvra
        self.name = parsed["name"]
        self.epoch = rpm_obj.epoch
        self.nevra = make_nvra(parsed, add_epoch=True)
        self.summary = rpm_obj.summary
        self.sourcerpm = None
        self.changelogs = rpm_obj.changelogs
        self.size = 0


def get_srpm(lst, name, rpm_obj_list):
    result = lst.get(name, None)
    if result:
        return FakeSRPM(result)

    # try to find "main" package (with name matching SRPM name)
    matches = [i for i in rpm_obj_list if i.name == name]
    if matches:
        rpm_obj = matches[0]
    else:
        rpm_obj = list(rpm_obj_list)[0]
    return FakeSRPM(rpm_obj)


class ComposeChangelog(object):
    def __init__(self, strip_suffix=None):
        self.suffix_re = None
        if strip_suffix:
            self.suffix_re = re.compile('\.%s[0-9a-fA-F]+$' % strip_suffix)

        self.file_cache = kobo.pkgset.FileCache(SimpleRpmWrapperWithChangelog)

    def get_srpms(self, compose):
        rm = compose.rpms.rpms
        result = {}
        for variant in rm:
            for arch in rm[variant]:
                for srpm_nevra in rm[variant][arch]:
                    for rpm_nevra, data in rm[variant][arch][srpm_nevra].items():
                        if data['category'] != 'source':
                            continue
                        result.setdefault(
                            srpm_nevra,
                            os.path.normpath(os.path.join(compose.compose_path, data["path"]))
                        )
        return result

    def read_srpm_headers(self, srpms):
        pool = ReaderPool(self.file_cache)
        for i in range(10):
            pool.add(SrpmReaderThread(pool))

        for i in srpms.values():
            pool.queue.put(i)

        if not pool.queue.empty():
            pool.start()
            pool.stop()
        return pool.result

    def read_rpm_headers(self, rpms):
        pool = ReaderPool(self.file_cache)
        for i in range(1):
            pool.add(RpmReaderThread(pool))

        for srpm_nevra in rpms:
            for i in rpms[srpm_nevra].values():
                pool.queue.put(i)

        if not pool.queue.empty():
            pool.start()
            pool.stop()
        return pool.result

    def get_rpms(self, compose):
        rm = compose.rpms.rpms
        result = {}
        for variant in rm:
            for arch in rm[variant]:
                for srpm_nevra in rm[variant][arch]:
                    for rpm_nevra, data in rm[variant][arch][srpm_nevra].items():
                        if data["category"] in ("debug", "source"):
                            continue
                        result.setdefault(
                            srpm_nevra, {}
                        ).setdefault(
                            rpm_nevra,
                            os.path.normpath(os.path.join(compose.compose_path, data["path"]))
                        )
        return result

    def image_id(self, img):
        """Get a unique identifier for an image. Currently we use a
        3-tuple (subvariant, type, arch). Should produce a sensible
        human-readable text when run through " ".join(). Should also
        be hashable for use as a dict key.
        """
        return (img['subvariant'], img['type'], img['arch'])

    def get_images(self, compose):
        """Parse a compose's image list and return a dict whose keys
        are image identifiers (whatever self.image_id gives) and whose
        values are the original Image instances `serialize`d into
        dicts.
        """
        imgs = {}
        imgdict = compose.images.images
        for variant in imgdict:
            for arch in imgdict[variant]:
                for img in imgdict[variant][arch]:
                    # the way Image.serialize works is a bit odd...
                    parser = []
                    img.serialize(parser)
                    img = parser[0]
                    imgs[self.image_id(img)] = img
        return imgs

    def _load_compose_id(self, compose):
        """
        Get compose ID from RPM manifest. Raises `RuntimeError` if metadata
        file is not found.
        """
        try:
            return compose.rpms.compose.id
        except AttributeError:
            raise RuntimeError('Failed to load metadata from %s'
                               % compose.compose_path)

    def get_changelog(self, old_compose, new_compose, max_logs=-1):
        result = {}

        result["old_compose"] = self._load_compose_id(old_compose)
        result["new_compose"] = self._load_compose_id(new_compose)

        imgs_old = self.get_images(old_compose)
        imgs_new = self.get_images(new_compose)
        imgs_old_ids = set(imgs_old.keys())
        imgs_new_ids = set(imgs_new.keys())
        result["added_images"] = [imgs_new[imid] for imid in
                                  imgs_new_ids - imgs_old_ids]
        result["dropped_images"] = [imgs_old[imid] for imid in
                                    imgs_old_ids - imgs_new_ids]

        srpms_old = self.get_srpms(old_compose)
        srpms_new = self.get_srpms(new_compose)

        rpms_old = self.get_rpms(old_compose)
        rpms_new = self.get_rpms(new_compose)

        for i in list(srpms_old.keys()) + list(srpms_new.keys()) + list(rpms_old.keys()) + list(rpms_new.keys()):
            if i in srpms_old and i in srpms_new:
                del srpms_old[i]
                del srpms_new[i]
            if i in rpms_old and i in rpms_new:
                del rpms_old[i]
                del rpms_new[i]

        srpm_headers_old = self.read_srpm_headers(srpms_old)
        srpm_headers_new = self.read_srpm_headers(srpms_new)
        rpm_headers_old = self.read_rpm_headers(rpms_old)
        rpm_headers_new = self.read_rpm_headers(rpms_new)

        pkgs_added = set(rpm_headers_new) - set(rpm_headers_old)
        pkgs_dropped = set(rpm_headers_old) - set(rpm_headers_new)
        pkgs_changed = set(rpm_headers_old) & set(rpm_headers_new)

        result["summary"] = {
            "added_images": len(result["added_images"]),
            "dropped_images": len(result["dropped_images"]),
            "added_packages": 0,
            "dropped_packages": 0,
            "upgraded_packages": 0,
            "downgraded_packages": 0,

            "added_packages_size": 0,
            "dropped_packages_size": 0,
            "upgraded_packages_size": 0,
            "downgraded_packages_size": 0,

            "upgraded_packages_size_change": 0,
            "downgraded_packages_size_change": 0,
        }

        result["added_packages"] = []
        for name in sorted(pkgs_added):
            pkg = get_srpm(srpm_headers_new, name, rpm_headers_new[name])
            size = sum([i.size for i in rpm_headers_new.get(name, [])])
            result["added_packages"].append({
                "name": pkg.name,
                "nvr": pkg.nvr,
                "summary": pkg.summary,
                "rpms": sorted(set([i.name for i in rpm_headers_new[name]])),
                "size": size,
            })
            result["summary"]["added_packages"] += 1
            result["summary"]["added_packages_size"] += size

        result["dropped_packages"] = []
        for name in sorted(pkgs_dropped):
            pkg = get_srpm(srpm_headers_old, name, rpm_headers_old[name])
            size = sum([i.size for i in rpm_headers_old[name]])
            result["dropped_packages"].append({
                "name": pkg.name,
                "nvr": pkg.nvr,
                "summary": pkg.summary,
                "rpms": sorted(set([i.name for i in rpm_headers_old[name]])),
                "size": size,
            })
            result["summary"]["dropped_packages"] += 1
            result["summary"]["dropped_packages_size"] += size

        result["upgraded_packages"] = []
        result["downgraded_packages"] = []
        for name in sorted(pkgs_changed):
            new_package = get_srpm(srpm_headers_new, name, rpm_headers_new[name])
            old_package = get_srpm(srpm_headers_old, name, rpm_headers_old[name])

            if self.suffix_re:
                # We need to compare releases without considering hash from
                # module build.
                if self.suffix_re.sub('', new_package.nvr) == self.suffix_re.sub('', old_package.nvr):
                    continue

            data = {}
            data["name"] = new_package.name
            data["nvr"] = new_package.nvr
            data["summary"] = new_package.summary
            data["rpms"] = sorted(set([i.name for i in rpm_headers_new[name]]))

            data["old_nvr"] = old_package.nvr
            data["old_rpms"] = sorted(set([i.name for i in rpm_headers_old[name]]))

            data["added_rpms"] = sorted(set(data["rpms"]) - set(data["old_rpms"]))
            data["dropped_rpms"] = sorted(set(data["old_rpms"]) - set(data["rpms"]))
            data["common_rpms"] = sorted(set(data["old_rpms"]) & set(data["rpms"]))

            data["size"] = sum([i.size for i in rpm_headers_new[name]])
            data["size_change"] = sum([i.size for i in rpm_headers_new[name]]) - sum([i.size for i in rpm_headers_old[name]])

            data["changelog"] = []
            for i in get_changelog_diff_from_headers(old_package, new_package, max_logs):
                data["changelog"].append("* %s %s\n%s" % (i.ctime, to_utf8(i.name), to_utf8(i.text)))

            # TODO: comps, system release
            # if rpm.versionCompare(old_package, new_package.header) == -1:
            if kobo.rpmlib.compare_nvr(kobo.rpmlib.parse_nvra(old_package.nevra), kobo.rpmlib.parse_nvra(new_package.nevra)) == -1:
                result["upgraded_packages"].append(data)
                result["summary"]["upgraded_packages"] += 1
                result["summary"]["upgraded_packages_size"] += data["size"]
                result["summary"]["upgraded_packages_size_change"] += data["size_change"]
            else:
                result["downgraded_packages"].append(data)
                result["summary"]["downgraded_packages"] += 1
                result["summary"]["downgraded_packages_size"] += data["size"]
                result["summary"]["downgraded_packages_size_change"] += data["size_change"]

        return result

    def _get_summary(self, changelog_data):
        result = []
        result.append("===== SUMMARY =====")
        result.append("Added images:        %s" % changelog_data["summary"]["added_images"])
        result.append("Dropped images:      %s" % changelog_data["summary"]["dropped_images"])
        result.append("Added packages:      %s" % changelog_data["summary"]["added_packages"])
        result.append("Dropped packages:    %s" % changelog_data["summary"]["dropped_packages"])
        result.append("Upgraded packages:   %s" % changelog_data["summary"]["upgraded_packages"])
        result.append("Downgraded packages: %s" % changelog_data["summary"]["downgraded_packages"])
        result.append("")

        result.append("Size of added packages:      %s" % (formatsize(changelog_data["summary"]["added_packages_size"])))
        result.append("Size of dropped packages:    %s" % (formatsize(changelog_data["summary"]["dropped_packages_size"])))
        result.append("Size of upgraded packages:   %s" % (formatsize(changelog_data["summary"]["upgraded_packages_size"])))
        result.append("Size of downgraded packages: %s" % (formatsize(changelog_data["summary"]["downgraded_packages_size"])))
        result.append("")

        result.append("Size change of upgraded packages:   %s" % (formatsize(changelog_data["summary"]["upgraded_packages_size_change"])))
        result.append("Size change of downgraded packages: %s" % (formatsize(changelog_data["summary"]["downgraded_packages_size_change"])))
        result.append("")

        return result

    def get_brief_log(self, changelog_data):
        result = []
        result.append("OLD: %s" % changelog_data["old_compose"])
        result.append("NEW: %s" % changelog_data["new_compose"])
        result.append("")

        result.extend(self._get_summary(changelog_data))

        result.append("===== ADDED IMAGES =====")
        for i in changelog_data["added_images"]:
            result.append(" ".join(self.image_id(i)))
        result.append("")

        result.append("===== DROPPED IMAGES =====")
        for i in changelog_data["dropped_images"]:
            result.append(" ".join(self.image_id(i)))
        result.append("")

        result.append("===== ADDED PACKAGES =====")
        for i in changelog_data["added_packages"]:
            result.append("%(name)s: %(nvr)s" % i)
        result.append("")

        result.append("===== DROPPED PACKAGES =====")
        for i in changelog_data["dropped_packages"]:
            result.append("%(name)s: %(nvr)s" % i)
        result.append("")

        result.append("===== UPGRADED PACKAGES =====")
        for i in changelog_data["upgraded_packages"]:
            result.append("%(name)s: %(old_nvr)s -> %(nvr)s" % i)
        result.append("")

        result.append("===== DOWNGRADED PACKAGES =====")
        for i in changelog_data["downgraded_packages"]:
            result.append("%(name)s: %(old_nvr)s -> %(nvr)s" % i)
        result.append("")

        return "\n".join(result)

    def get_verbose_log(self, changelog_data, shorten=False):
        result = []
        result.append("OLD: %s" % changelog_data["old_compose"])
        result.append("NEW: %s" % changelog_data["new_compose"])
        result.append("")

        result.extend(self._get_summary(changelog_data))

        result.append("===== ADDED IMAGES =====")
        for i in changelog_data["added_images"]:
            result.append("Image: %s" % " ".join(self.image_id(i)))
            result.append("Path: %s" % i['path'])
        result.append("")

        result.append("===== DROPPED IMAGES =====")
        for i in changelog_data["dropped_images"]:
            result.append("Image: %s" % " ".join(self.image_id(i)))
            result.append("Path: %s" % i['path'])
        result.append("")

        result.append("===== ADDED PACKAGES =====")
        for i in changelog_data["added_packages"]:
            result.append("Package: %s" % i["nvr"])
            result.append("Summary: %s" % i["summary"])
            if not shorten:
                result.append("RPMs:    %s" % " ".join(sorted(i["rpms"])))
            result.append("Size:    %s" % formatsize(i["size"]))
            result.append("")
        result.append("")

        result.append("===== DROPPED PACKAGES =====")
        for i in changelog_data["dropped_packages"]:
            result.append("Package: %s" % i["nvr"])
            if not shorten:
                result.append("Summary: %s" % i["summary"])
                result.append("RPMs:    %s" % " ".join(sorted(i["rpms"])))
            result.append("Size:    %s" % formatsize(i["size"]))
            result.append("")
        result.append("")

        result.append("===== UPGRADED PACKAGES =====")
        for i in changelog_data["upgraded_packages"]:
            result.append("Package:      %s" % i["nvr"])
            result.append("Old package:  %s" % i["old_nvr"])
            if not shorten:
                result.append("Summary:      %s" % i["summary"])
                result.append("RPMs:         %s" % " ".join(sorted(i["rpms"])))
            if i["added_rpms"]:
                result.append("Added RPMs:   %s" % " ".join(i["added_rpms"]))
            if i["dropped_rpms"]:
                result.append("Dropped RPMs: %s" % " ".join(i["dropped_rpms"]))
            if not shorten:
                result.append("Size:         %s" % formatsize(i["size"]))
            result.append("Size change:  %s" % formatsize(i["size_change"]))
            if i["changelog"]:
                result.append("Changelog:")
                for entry in i["changelog"]:
                    for line in entry.splitlines():
                        result.append("  %s" % line)
                    result.append("")
            result.append("")
        result.append("")

        result.append("===== DOWNGRADED PACKAGES =====")
        for i in changelog_data["downgraded_packages"]:
            result.append("Package:      %s" % i["nvr"])
            result.append("Old package:  %s" % i["old_nvr"])
            if not shorten:
                result.append("Summary:      %s" % i["summary"])
                result.append("RPMs:         %s" % " ".join(sorted(i["rpms"])))
            if i["added_rpms"]:
                result.append("Added RPMs:   %s" % " ".join(i["added_rpms"]))
            if i["dropped_rpms"]:
                result.append("Dropped RPMs: %s" % " ".join(i["dropped_rpms"]))
            if not shorten:
                result.append("Size:         %s" % formatsize(i["size"]))
            result.append("Size change:  %s" % formatsize(i["size_change"]))
            if i["changelog"]:
                result.append("Changelog:")
                for entry in i["changelog"]:
                    for line in entry.splitlines():
                        result.append("  %s" % line)
                    result.append("")
            result.append("")
        result.append("")

        return "\n".join(result)

    def write(self, data, path=None, name=None, verbose=False):
        if name:
            name = "changelog-%s" % name
        else:
            name = "changelog"
        name = name.replace(" ", "_")
        name = name.replace("/", "_")
        name = name.replace("\\", "_")

        path = path or ""

        # json
        json_log = os.path.join(path, "%s.json" % name)
        with open(json_log, "w") as f:
            json.dump(data, f, sort_keys=True, indent=4)

        # brief
        brief_log = os.path.join(path, "%s.brief" % name)
        with open(brief_log, "w") as f:
            f.write(self.get_brief_log(data))

        # verbose
        verbose_log = os.path.join(path, "%s.verbose" % name)
        with open(verbose_log, "wb") as f:
            clog = self.get_verbose_log(data, shorten=verbose)
            f.write(clog.encode('utf-8'))
