123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- #
- # Copyright 2019 The FATE Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- #
- import numpy as np
- from federatedml.util import LOGGER
- from fate_arch.session import computing_session
- from fate_arch.abc import CTableABC
- class PaillierTensor(object):
- def __init__(self, obj, partitions=1):
- if obj is None:
- raise ValueError("Cannot convert None to Paillier tensor")
- if isinstance(obj, (list, np.ndarray)):
- self._ori_data = obj
- self._partitions = partitions
- self._obj = computing_session.parallelize(obj,
- include_key=False,
- partition=partitions)
- elif isinstance(obj, CTableABC):
- self._ori_data = None
- self._partitions = obj.partitions
- self._obj = obj
- else:
- raise ValueError(f"Cannot convert obj to Paillier tensor, object type is {type(obj)}")
- LOGGER.debug("tensor's partition is {}".format(self._partitions))
- def __add__(self, other):
- if isinstance(other, PaillierTensor):
- return PaillierTensor(self._obj.join(other._obj, lambda v1, v2: v1 + v2))
- elif isinstance(other, CTableABC):
- return PaillierTensor(self._obj.join(other, lambda v1, v2: v1 + v2))
- elif isinstance(other, (np.ndarray, int, float)):
- return PaillierTensor(self._obj.mapValues(lambda v: v + other))
- else:
- raise ValueError(f"Unrecognized type {type(other)}, dose not support subtraction")
- def __radd__(self, other):
- return self.__add__(other)
- def __sub__(self, other):
- if isinstance(other, PaillierTensor):
- return PaillierTensor(self._obj.join(other._obj, lambda v1, v2: v1 - v2))
- elif isinstance(other, CTableABC):
- return PaillierTensor(self._obj.join(other, lambda v1, v2: v1 - v2))
- elif isinstance(other, (np.ndarray, int, float)):
- return PaillierTensor(self._obj.mapValues(lambda v: v - other))
- else:
- raise ValueError(f"Unrecognized type {type(other)}, dose not support subtraction")
- def __rsub__(self, other):
- return self.__sub__(other)
- def __mul__(self, other):
- if isinstance(other, (int, float)):
- return PaillierTensor(self._obj.mapValues(lambda val: val * other))
- elif isinstance(other, np.ndarray):
- return PaillierTensor(self._obj.mapValues(lambda val: np.matmul(val, other)))
- elif isinstance(other, CTableABC):
- other = PaillierTensor(other)
- return self.__mul__(other)
- elif isinstance(other, PaillierTensor):
- ret = self.numpy() * other.numpy()
- return PaillierTensor(ret, partitions=max(self.partitions, other.partitions))
- def matmul(self, other):
- if isinstance(other, np.ndarray):
- if len(other.shape) != 2:
- raise ValueError("Only Support 2-D multiplication in matmul op, "
- "if you want to do 3-D, use fast_multiply_3d")
- return self.fast_matmul_2d(other)
- def multiply(self, other):
- if isinstance(other, np.ndarray):
- if other.shape != self.shape:
- raise ValueError(f"operands could not be broadcast together with shapes {self.shape} {other.shape}")
- rhs = PaillierTensor(other)
- return PaillierTensor(self.multiply(rhs))
- elif isinstance(other, CTableABC):
- other = PaillierTensor(other)
- return self.multiply(other)
- elif isinstance(other, PaillierTensor):
- return PaillierTensor(self._obj.join(other._obj, lambda v1, v2: v1 * v2))
- else:
- raise ValueError(f"Not support type in multiply op {type(other)}")
- @property
- def T(self):
- if self._ori_data is None:
- self._ori_data = self.numpy()
- new_data = self._ori_data.T
- return PaillierTensor(new_data, self.partitions)
- @property
- def partitions(self):
- return self._partitions
- def get_obj(self):
- return self._obj
- @property
- def shape(self):
- if self._ori_data is not None:
- return self._ori_data.shape
- else:
- first_dim = self._obj.count()
- if not first_dim:
- return (0, )
- second_dim = self._obj.first()[1].shape
- return tuple([first_dim] + list(second_dim))
- def mean(self, axis=-1):
- if axis == -1:
- size = 1
- for shape in self._ori_data.shape:
- size *= shape
- if not size:
- raise ValueError("shape of data is zero, it should be positive")
- return self._obj.mapValues(lambda val: np.sum(val)).reduce(lambda val1, val2: val1 + val2) / size
- else:
- ret_obj = self._obj.mapValues(lambda val: np.mean(val, axis - 1))
- return PaillierTensor(ret_obj)
- def reduce_sum(self):
- return self._obj.reduce(lambda t1, t2: t1 + t2)
- def map_ndarray_product(self, other):
- if isinstance(other, np.ndarray):
- return PaillierTensor(self._obj.mapValues(lambda val: val * other))
- else:
- raise ValueError('only support numpy array')
- def numpy(self):
- if self._ori_data is not None:
- return self._ori_data
- arr = [None for i in range(self._obj.count())]
- for k, v in self._obj.collect():
- arr[k] = v
- self._ori_data = np.array(arr, dtype=arr[0].dtype)
- return self._ori_data
- def encrypt(self, encrypt_tool):
- return PaillierTensor(encrypt_tool.distribute_encrypt(self._obj))
- def decrypt(self, decrypt_tool):
- return PaillierTensor(self._obj.mapValues(lambda val: decrypt_tool.recursive_decrypt(val)))
- def encode(self, encoder):
- return PaillierTensor(self._obj.mapValues(lambda val: encoder.encode(val)))
- def decode(self, decoder):
- return PaillierTensor(self._obj.mapValues(lambda val: decoder.decode(val)))
- @staticmethod
- def _vector_mul(kv_iters):
- ret_mat = None
- for k, v in kv_iters:
- tmp_mat = np.outer(v[0], v[1])
- if ret_mat is not None:
- ret_mat += tmp_mat
- else:
- ret_mat = tmp_mat
- return ret_mat
- def fast_matmul_2d(self, other):
- """
- Matrix multiplication between two matrix, please ensure that self's shape is (m, n) and other's shape is (m, k)
- Their result is a matrix of (n, k)
- """
- if isinstance(other, np.ndarray):
- mat_tensor = PaillierTensor(other, partitions=self.partitions)
- return self.fast_matmul_2d(mat_tensor)
- if isinstance(other, CTableABC):
- other = PaillierTensor(other)
- func = self._vector_mul
- ret_mat = self._obj.join(other.get_obj(), lambda vec1, vec2: (vec1, vec2)).applyPartitions(func).reduce(
- lambda mat1, mat2: mat1 + mat2)
- return ret_mat
- def matmul_3d(self, other, multiply='left'):
- assert multiply in ['left', 'right']
- if isinstance(other, PaillierTensor):
- mat = other
- elif isinstance(other, CTableABC):
- mat = PaillierTensor(other)
- elif isinstance(other, np.ndarray):
- mat = PaillierTensor(other, partitions=self.partitions)
- else:
- raise ValueError('only support numpy array and Paillier Tensor')
- if multiply == 'left':
- return PaillierTensor(self._obj.join(mat._obj, lambda val1, val2: np.tensordot(val1, val2, (1, 0))),
- partitions=self._partitions)
- if multiply == 'right':
- return PaillierTensor(mat._obj.join(self._obj, lambda val1, val2: np.tensordot(val1, val2, (1, 0))),
- partitions=self._partitions)
- def element_wise_product(self, other):
- if isinstance(other, np.ndarray):
- mat = PaillierTensor(other, partitions=self.partitions)
- elif isinstance(other, CTableABC):
- mat = PaillierTensor(other)
- else:
- mat = other
- return PaillierTensor(self._obj.join(mat._obj, lambda val1, val2: val1 * val2))
- def squeeze(self, axis):
- if axis == 0:
- return PaillierTensor(list(self._obj.collect())[0][1], partitions=self.partitions)
- else:
- return PaillierTensor(self._obj.mapValues(lambda val: np.squeeze(val, axis=axis - 1)))
- def select_columns(self, select_table):
- return PaillierTensor(self._obj.join(select_table, lambda v1, v2: v1[v2]))
|