1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071 |
- """
- These codes are adopted from LEAF with some modifications.
- Creates .pkl files for:
- 1. list of directories of every image in 'by_class'
- 2. list of directories of every image in 'by_write'
- the hierarchal structure of the data is as follows:
- - by_class -> classes -> folders containing images -> images
- - by_write -> folders containing writers -> writer -> types of images -> images
- the directories written into the files are of the form 'raw_data/...'
- """
- import os
- from easyfl.datasets.utils import util
- def get_file_dir(base_folder):
- class_files = [] # (class, file directory)
- write_files = [] # (writer, file directory)
- class_dir = os.path.join(base_folder, "raw_data", "by_class")
- rel_class_dir = os.path.join(base_folder, "raw_data", "by_class")
- classes = os.listdir(class_dir)
- classes = [c for c in classes if len(c) == 2]
- for cl in classes:
- cldir = os.path.join(class_dir, cl)
- rel_cldir = os.path.join(rel_class_dir, cl)
- subcls = os.listdir(cldir)
- subcls = [s for s in subcls if (("hsf" in s) and ("mit" not in s))]
- for subcl in subcls:
- subcldir = os.path.join(cldir, subcl)
- rel_subcldir = os.path.join(rel_cldir, subcl)
- images = os.listdir(subcldir)
- image_dirs = [os.path.join(rel_subcldir, i) for i in images]
- for image_dir in image_dirs:
- class_files.append((cl, image_dir))
- write_dir = os.path.join(base_folder, "raw_data", "by_write")
- rel_write_dir = os.path.join(base_folder, "raw_data", "by_write")
- write_parts = os.listdir(write_dir)
- for write_part in write_parts:
- writers_dir = os.path.join(write_dir, write_part)
- rel_writers_dir = os.path.join(rel_write_dir, write_part)
- writers = os.listdir(writers_dir)
- for writer in writers:
- writer_dir = os.path.join(writers_dir, writer)
- rel_writer_dir = os.path.join(rel_writers_dir, writer)
- wtypes = os.listdir(writer_dir)
- for wtype in wtypes:
- type_dir = os.path.join(writer_dir, wtype)
- rel_type_dir = os.path.join(rel_writer_dir, wtype)
- images = os.listdir(type_dir)
- image_dirs = [os.path.join(rel_type_dir, i) for i in images]
- for image_dir in image_dirs:
- write_files.append((writer, image_dir))
- util.save_obj(
- class_files,
- os.path.join(base_folder, "intermediate", "class_file_dirs"))
- util.save_obj(
- write_files,
- os.path.join(base_folder, "intermediate", "write_file_dirs"))
|