Shellmiao
/
EasyFL_with_PgFed


			
							12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394
							"""
These codes are adopted from LEAF with some modifications.

It converts a list of (writer, [list of (file,class)]) tuples into a json object of the form:
  {users: [bob, etc], num_samples: [124, etc.],
  user_data: {bob : {x:[img1,img2,etc], y:[class1,class2,etc]}, etc}},
where "img_" is a vectorized representation of the corresponding image.
"""

from __future__ import division

import json
import math
import os

import numpy as np
from PIL import Image

from easyfl.datasets.utils import util

MAX_WRITERS = 100  # max number of writers per json file.


def relabel_class(c):
    """
    maps hexadecimal class value (string) to a decimal number
    returns:
    - 0 through 9 for classes representing respective numbers
    - 10 through 35 for classes representing respective uppercase letters
    - 36 through 61 for classes representing respective lowercase letters
    """
    if c.isdigit() and int(c) < 40:
        return int(c) - 30
    elif int(c, 16) <= 90:  # uppercase
        return int(c, 16) - 55
    else:
        return int(c, 16) - 61


def data_to_json(base_folder):
    by_writer_dir = os.path.join(base_folder, "intermediate", "images_by_writer")
    writers = util.load_obj(by_writer_dir)

    num_json = int(math.ceil(len(writers) / MAX_WRITERS))

    users = []
    num_samples = []
    user_data = {}

    writer_count = 0
    json_index = 0
    for (w, l) in writers:

        users.append(w)
        num_samples.append(len(l))
        user_data[w] = {"x": [], "y": []}

        size = 28, 28  # original image size is 128, 128
        for (f, c) in l:
            file_path = os.path.join(base_folder, f)
            img = Image.open(file_path)
            gray = img.convert("L")
            gray.thumbnail(size, Image.ANTIALIAS)
            arr = np.asarray(gray).copy()
            vec = arr.flatten()
            vec = vec / 255  # scale all pixel values to between 0 and 1
            vec = vec.tolist()

            nc = relabel_class(c)

            user_data[w]["x"].append(vec)
            user_data[w]["y"].append(nc)

        writer_count += 1
        if writer_count == MAX_WRITERS:
            all_data = {}
            all_data["users"] = users
            all_data["num_samples"] = num_samples
            all_data["user_data"] = user_data

            file_name = "all_data_%d.json" % json_index
            file_path = os.path.join(base_folder, "all_data", file_name)

            print("writing %s" % file_name)

            with open(file_path, "w") as outfile:
                json.dump(all_data, outfile)

            writer_count = 0
            json_index += 1

            users[:] = []
            num_samples[:] = []
            user_data.clear()