generated_testsuite.py 3.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. import json
  2. import os
  3. import sys
  4. cur_path = os.path.realpath(__file__)
  5. for i in range(4):
  6. cur_path = os.path.dirname(cur_path)
  7. print(f'fate_path: {cur_path}')
  8. sys.path.append(cur_path)
  9. cur_dir = os.path.abspath(os.path.dirname(__file__))
  10. def insert_extract_code(file_path, fold_name):
  11. f_str = open(cur_dir + '/' + file_path, 'r').read()
  12. code = \
  13. """
  14. from examples.pipeline.{}.generated_testsuite import extract
  15. extract(pipeline, __file__)
  16. """.format(fold_name)
  17. f_str = f_str.replace('pipeline.fit(work_mode=work_mode)',
  18. '# pipeline.fit(work_mode=work_mode)\n' + code)
  19. f_str = f_str.replace('common_tools.prettify(pipeline.get_component("hetero_lr_0").get_summary())',
  20. '')
  21. f_str = f_str.replace('common_tools.prettify(pipeline.get_component("evaluation_0").get_summary())',
  22. '')
  23. f_str = f_str.replace('for i in range(4):',
  24. 'for i in range(5):')
  25. return f_str
  26. def extract(my_pipeline, file_name, output_path='generated_conf_and_dsl'):
  27. out_name = file_name.split('/')[-1]
  28. out_name = out_name.replace('pipeline-', '').replace('.py', '').replace('-', '_')
  29. conf = my_pipeline.get_train_conf()
  30. dsl = my_pipeline.get_train_dsl()
  31. conf_name = './{}/{}_conf.json'.format(output_path, out_name)
  32. dsl_name = './{}/{}_dsl.json'.format(output_path, out_name)
  33. json.dump(conf, open(conf_name, 'w'), indent=4)
  34. print('conf name is {}'.format(conf_name))
  35. json.dump(dsl, open(dsl_name, 'w'), indent=4)
  36. print('dsl name is {}'.format(dsl_name))
  37. def get_testsuite_file(testsuite_file_path):
  38. import examples
  39. cpn_path = os.path.dirname(examples.__file__) + f'/dsl/v1/{testsuite_file_path}'
  40. with open(cpn_path, 'r', encoding='utf-8') as load_f:
  41. testsuite_json = json.load(load_f)
  42. testsuite_json['tasks'] = {}
  43. return testsuite_json
  44. def do_generated(fold_name='hetero_logistic_regression'):
  45. folder = '.'
  46. files = os.listdir(".")
  47. cmd = 'python {}'
  48. replaced_path = 'replaced_code'
  49. generated_path = 'generated_conf_and_dsl'
  50. if not os.path.exists('./{}'.format(replaced_path)):
  51. os.system('mkdir {}'.format(replaced_path))
  52. if not os.path.exists('./{}'.format(generated_path)):
  53. os.system('mkdir {}'.format(generated_path))
  54. for f in files:
  55. if not f.startswith("pipeline"):
  56. continue
  57. print(f)
  58. code_str = insert_extract_code(f, fold_name)
  59. open('./{}/{}'.format(replaced_path, f), 'w').write(code_str)
  60. print('replace done')
  61. # file_path = folder + f
  62. # os.system(cmd.format(folder + f))
  63. exe_files = os.listdir('./{}/'.format(replaced_path))
  64. for f in exe_files:
  65. print('executing {}'.format(f))
  66. os.system(cmd.format('./{}/'.format(replaced_path) + f))
  67. suite_json = get_testsuite_file('hetero_logistic_regression/hetero_lr_testsuite.json')
  68. conf_files = os.listdir('./{}/'.format(generated_path))
  69. f_dsl = {"-".join(f.split('_')[2: -1]): f for f in conf_files if 'dsl.json' in f}
  70. f_conf = {"-".join(f.split('_')[2: -1]): f for f in conf_files if 'conf.json' in f}
  71. for task_type, dsl_file in f_dsl.items():
  72. conf_file = f_conf[task_type]
  73. suite_json['tasks'][task_type] = {
  74. "conf": conf_file,
  75. "dsl": dsl_file
  76. }
  77. with open('./{}/{}_testsuite.json'.format(generated_path, fold_name), 'w', encoding='utf-8') as json_file:
  78. json.dump(suite_json, json_file, ensure_ascii=False, indent=4)
  79. # os.system('rm -rf {}'.format(replaced_path))
  80. from sklearn.metrics import fowlkes_mallows_score
  81. if __name__ == '__main__':
  82. do_generated()
  83. # pass