pipeline-upload.py 2.5 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970
  1. #
  2. # Copyright 2019 The FATE Authors. All Rights Reserved.
  3. #
  4. # Licensed under the Apache License, Version 2.0 (the "License");
  5. # you may not use this file except in compliance with the License.
  6. # You may obtain a copy of the License at
  7. #
  8. # http://www.apache.org/licenses/LICENSE-2.0
  9. #
  10. # Unless required by applicable law or agreed to in writing, software
  11. # distributed under the License is distributed on an "AS IS" BASIS,
  12. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13. # See the License for the specific language governing permissions and
  14. # limitations under the License.
  15. #
  16. import os
  17. import argparse
  18. from pipeline.backend.pipeline import PipeLine
  19. # path to data
  20. # default fate installation path
  21. DATA_BASE = "/data/projects/fate"
  22. # site-package ver
  23. # import site
  24. # DATA_BASE = site.getsitepackages()[0]
  25. def main(data_base=DATA_BASE):
  26. # parties config
  27. guest = 9999
  28. # partition for data storage
  29. partition = 4
  30. # table name and namespace, used in FATE job configuration
  31. dense_data = {"name": "breast_hetero_guest", "namespace": f"experiment"}
  32. tag_data = {"name": "breast_hetero_host", "namespace": f"experiment"}
  33. pipeline_upload = PipeLine().set_initiator(role="guest", party_id=guest).set_roles(guest=guest)
  34. # add upload data info
  35. # path to csv file(s) to be uploaded, modify to upload designated data
  36. # This is an example for standalone version. For cluster version, you will need to upload your data
  37. # on each party respectively.
  38. pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_guest.csv"),
  39. table_name=dense_data["name"], # table name
  40. namespace=dense_data["namespace"], # namespace
  41. head=1, partition=partition) # data info
  42. pipeline_upload.add_upload_data(file=os.path.join(data_base, "examples/data/breast_hetero_host.csv"),
  43. table_name=tag_data["name"],
  44. namespace=tag_data["namespace"],
  45. head=1, partition=partition)
  46. # upload data
  47. pipeline_upload.upload(drop=1)
  48. if __name__ == "__main__":
  49. parser = argparse.ArgumentParser("PIPELINE DEMO")
  50. parser.add_argument("--base", "-b", type=str,
  51. help="data base, path to directory that contains examples/data")
  52. args = parser.parse_args()
  53. if args.base is not None:
  54. main(args.base)
  55. else:
  56. main()