"""
These codes are adopted from LEAF with some modifications.
"""

import hashlib
import logging
import os

from easyfl.datasets.utils import util

logger = logging.getLogger(__name__)


def get_hash(base_folder):
    cfd = os.path.join(base_folder, "intermediate", "class_file_dirs")
    wfd = os.path.join(base_folder, "intermediate", "write_file_dirs")
    class_file_dirs = util.load_obj(cfd)
    write_file_dirs = util.load_obj(wfd)

    class_file_hashes = []
    write_file_hashes = []

    count = 0
    for tup in class_file_dirs:
        if (count % 100000 == 0):
            logger.info("hashed %d class images" % count)

        (cclass, cfile) = tup
        file_path = os.path.join(base_folder, cfile)

        chash = hashlib.md5(open(file_path, "rb").read()).hexdigest()

        class_file_hashes.append((cclass, cfile, chash))

        count += 1

    cfhd = os.path.join(base_folder, "intermediate", "class_file_hashes")
    util.save_obj(class_file_hashes, cfhd)

    count = 0
    for tup in write_file_dirs:
        if (count % 100000 == 0):
            logger.info("hashed %d write images" % count)

        (cclass, cfile) = tup
        file_path = os.path.join(base_folder, cfile)

        chash = hashlib.md5(open(file_path, "rb").read()).hexdigest()

        write_file_hashes.append((cclass, cfile, chash))

        count += 1

    wfhd = os.path.join(base_folder, "intermediate", "write_file_hashes")
    util.save_obj(write_file_hashes, wfhd)