123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105 |
- import json
- import os
- import sys
- cur_path = os.path.realpath(__file__)
- for i in range(4):
- cur_path = os.path.dirname(cur_path)
- print(f'fate_path: {cur_path}')
- sys.path.append(cur_path)
- cur_dir = os.path.abspath(os.path.dirname(__file__))
- def insert_extract_code(file_path, fold_name):
- f_str = open(cur_dir + '/' + file_path, 'r').read()
- code = \
- """
- from examples.pipeline.{}.generated_testsuite import extract
- extract(pipeline, __file__)
- """.format(fold_name)
- f_str = f_str.replace('pipeline.fit(work_mode=work_mode)',
- '# pipeline.fit(work_mode=work_mode)\n' + code)
- f_str = f_str.replace('common_tools.prettify(pipeline.get_component("hetero_lr_0").get_summary())',
- '')
- f_str = f_str.replace('common_tools.prettify(pipeline.get_component("evaluation_0").get_summary())',
- '')
- f_str = f_str.replace('for i in range(4):',
- 'for i in range(5):')
- return f_str
- def extract(my_pipeline, file_name, output_path='generated_conf_and_dsl'):
- out_name = file_name.split('/')[-1]
- out_name = out_name.replace('pipeline-', '').replace('.py', '').replace('-', '_')
- conf = my_pipeline.get_train_conf()
- dsl = my_pipeline.get_train_dsl()
- conf_name = './{}/{}_conf.json'.format(output_path, out_name)
- dsl_name = './{}/{}_dsl.json'.format(output_path, out_name)
- json.dump(conf, open(conf_name, 'w'), indent=4)
- print('conf name is {}'.format(conf_name))
- json.dump(dsl, open(dsl_name, 'w'), indent=4)
- print('dsl name is {}'.format(dsl_name))
- def get_testsuite_file(testsuite_file_path):
- import examples
- cpn_path = os.path.dirname(examples.__file__) + f'/dsl/v1/{testsuite_file_path}'
- with open(cpn_path, 'r', encoding='utf-8') as load_f:
- testsuite_json = json.load(load_f)
- testsuite_json['tasks'] = {}
- return testsuite_json
- def do_generated(fold_name='hetero_logistic_regression'):
- folder = '.'
- files = os.listdir(".")
- cmd = 'python {}'
- replaced_path = 'replaced_code'
- generated_path = 'generated_conf_and_dsl'
- if not os.path.exists('./{}'.format(replaced_path)):
- os.system('mkdir {}'.format(replaced_path))
- if not os.path.exists('./{}'.format(generated_path)):
- os.system('mkdir {}'.format(generated_path))
- for f in files:
- if not f.startswith("pipeline"):
- continue
- print(f)
- code_str = insert_extract_code(f, fold_name)
- open('./{}/{}'.format(replaced_path, f), 'w').write(code_str)
- print('replace done')
- # file_path = folder + f
- # os.system(cmd.format(folder + f))
- exe_files = os.listdir('./{}/'.format(replaced_path))
- for f in exe_files:
- print('executing {}'.format(f))
- os.system(cmd.format('./{}/'.format(replaced_path) + f))
- suite_json = get_testsuite_file('hetero_logistic_regression/hetero_lr_testsuite.json')
- conf_files = os.listdir('./{}/'.format(generated_path))
- f_dsl = {"-".join(f.split('_')[2: -1]): f for f in conf_files if 'dsl.json' in f}
- f_conf = {"-".join(f.split('_')[2: -1]): f for f in conf_files if 'conf.json' in f}
- for task_type, dsl_file in f_dsl.items():
- conf_file = f_conf[task_type]
- suite_json['tasks'][task_type] = {
- "conf": conf_file,
- "dsl": dsl_file
- }
- with open('./{}/{}_testsuite.json'.format(generated_path, fold_name), 'w', encoding='utf-8') as json_file:
- json.dump(suite_json, json_file, ensure_ascii=False, indent=4)
- # os.system('rm -rf {}'.format(replaced_path))
- from sklearn.metrics import fowlkes_mallows_score
- if __name__ == '__main__':
- do_generated()
- # pass
|