1234567891011121314151617181920212223242526272829 |
- """
- This module provide some utilized methods that operate the index of distributed data
- """
- def collect_index(data_insts):
- data_sids = data_insts.mapValues(lambda data_inst: None)
-
- data_sids_iter = data_sids.collect()
- data_sids_iter = sorted(data_sids_iter, key=lambda x: x[0])
- data_size = len(data_sids_iter)
- return data_sids_iter, data_size
|