12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455 |
- """
- These codes are adopted from LEAF with some modifications.
- """
- import hashlib
- import logging
- import os
- from easyfl.datasets.utils import util
- logger = logging.getLogger(__name__)
- def get_hash(base_folder):
- cfd = os.path.join(base_folder, "intermediate", "class_file_dirs")
- wfd = os.path.join(base_folder, "intermediate", "write_file_dirs")
- class_file_dirs = util.load_obj(cfd)
- write_file_dirs = util.load_obj(wfd)
- class_file_hashes = []
- write_file_hashes = []
- count = 0
- for tup in class_file_dirs:
- if (count % 100000 == 0):
- logger.info("hashed %d class images" % count)
- (cclass, cfile) = tup
- file_path = os.path.join(base_folder, cfile)
- chash = hashlib.md5(open(file_path, "rb").read()).hexdigest()
- class_file_hashes.append((cclass, cfile, chash))
- count += 1
- cfhd = os.path.join(base_folder, "intermediate", "class_file_hashes")
- util.save_obj(class_file_hashes, cfhd)
- count = 0
- for tup in write_file_dirs:
- if (count % 100000 == 0):
- logger.info("hashed %d write images" % count)
- (cclass, cfile) = tup
- file_path = os.path.join(base_folder, cfile)
- chash = hashlib.md5(open(file_path, "rb").read()).hexdigest()
- write_file_hashes.append((cclass, cfile, chash))
- count += 1
- wfhd = os.path.join(base_folder, "intermediate", "write_file_hashes")
- util.save_obj(write_file_hashes, wfhd)
|