get_file_dirs.py 2.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. """
  2. These codes are adopted from LEAF with some modifications.
  3. Creates .pkl files for:
  4. 1. list of directories of every image in 'by_class'
  5. 2. list of directories of every image in 'by_write'
  6. the hierarchal structure of the data is as follows:
  7. - by_class -> classes -> folders containing images -> images
  8. - by_write -> folders containing writers -> writer -> types of images -> images
  9. the directories written into the files are of the form 'raw_data/...'
  10. """
  11. import os
  12. from easyfl.datasets.utils import util
  13. def get_file_dir(base_folder):
  14. class_files = [] # (class, file directory)
  15. write_files = [] # (writer, file directory)
  16. class_dir = os.path.join(base_folder, "raw_data", "by_class")
  17. rel_class_dir = os.path.join(base_folder, "raw_data", "by_class")
  18. classes = os.listdir(class_dir)
  19. classes = [c for c in classes if len(c) == 2]
  20. for cl in classes:
  21. cldir = os.path.join(class_dir, cl)
  22. rel_cldir = os.path.join(rel_class_dir, cl)
  23. subcls = os.listdir(cldir)
  24. subcls = [s for s in subcls if (("hsf" in s) and ("mit" not in s))]
  25. for subcl in subcls:
  26. subcldir = os.path.join(cldir, subcl)
  27. rel_subcldir = os.path.join(rel_cldir, subcl)
  28. images = os.listdir(subcldir)
  29. image_dirs = [os.path.join(rel_subcldir, i) for i in images]
  30. for image_dir in image_dirs:
  31. class_files.append((cl, image_dir))
  32. write_dir = os.path.join(base_folder, "raw_data", "by_write")
  33. rel_write_dir = os.path.join(base_folder, "raw_data", "by_write")
  34. write_parts = os.listdir(write_dir)
  35. for write_part in write_parts:
  36. writers_dir = os.path.join(write_dir, write_part)
  37. rel_writers_dir = os.path.join(rel_write_dir, write_part)
  38. writers = os.listdir(writers_dir)
  39. for writer in writers:
  40. writer_dir = os.path.join(writers_dir, writer)
  41. rel_writer_dir = os.path.join(rel_writers_dir, writer)
  42. wtypes = os.listdir(writer_dir)
  43. for wtype in wtypes:
  44. type_dir = os.path.join(writer_dir, wtype)
  45. rel_type_dir = os.path.join(rel_writer_dir, wtype)
  46. images = os.listdir(type_dir)
  47. image_dirs = [os.path.join(rel_type_dir, i) for i in images]
  48. for image_dir in image_dirs:
  49. write_files.append((writer, image_dir))
  50. util.save_obj(
  51. class_files,
  52. os.path.join(base_folder, "intermediate", "class_file_dirs"))
  53. util.save_obj(
  54. write_files,
  55. os.path.join(base_folder, "intermediate", "write_file_dirs"))