1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192 |
- #!/usr/bin/env python
- # -*- coding: utf-8 -*-
- #
- # Copyright 2019 The FATE Authors. All Rights Reserved.
- #
- # Licensed under the Apache License, Version 2.0 (the "License");
- # you may not use this file except in compliance with the License.
- # You may obtain a copy of the License at
- #
- # http://www.apache.org/licenses/LICENSE-2.0
- #
- # Unless required by applicable law or agreed to in writing, software
- # distributed under the License is distributed on an "AS IS" BASIS,
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- # See the License for the specific language governing permissions and
- # limitations under the License.
- import math
- class Bucket(object):
- def __init__(self, idx=-1, adjustment_factor=0.5, right_bound=-math.inf):
- self.idx = idx
- self.left_bound = math.inf
- self.right_bound = right_bound
- self.left_neighbor_idx = idx - 1
- self.right_neighbor_idx = idx + 1
- self.event_count = 0
- self.non_event_count = 0
- self.adjustment_factor = adjustment_factor
- self.event_total = None
- self.non_event_total = None
- def set_left_neighbor(self, left_idx):
- self.left_neighbor_idx = left_idx
- def set_right_neighbor(self, right_idx):
- self.right_neighbor_idx = right_idx
- @property
- def is_mixed(self):
- return self.event_count > 0 and self.non_event_count > 0
- @property
- def total_count(self):
- return self.event_count + self.non_event_count
- def merge(self, other):
- if other is None:
- return
- if other.left_bound < self.left_bound:
- self.left_bound = other.left_bound
- if other.right_bound > self.right_bound:
- self.right_bound = other.right_bound
- self.event_count += other.event_count
- self.non_event_count += other.non_event_count
- return self
- def add(self, label, value):
- if label == 1:
- self.event_count += 1
- else:
- self.non_event_count += 1
- if value < self.left_bound:
- self.left_bound = value
- if value > self.right_bound:
- self.right_bound = value
- @property
- def iv(self):
- if self.event_total is None or self.non_event_total is None:
- raise AssertionError("Bucket's event_total or non_event_total has not been assigned")
- # only have EVENT records or Non-Event records
- if self.event_count == 0 or self.non_event_count == 0:
- event_rate = 1.0 * (self.event_count + self.adjustment_factor) / max(self.event_total, 1)
- non_event_rate = 1.0 * (self.non_event_count + self.adjustment_factor) / max(self.non_event_total, 1)
- else:
- event_rate = 1.0 * self.event_count / max(self.event_total, 1)
- non_event_rate = 1.0 * self.non_event_count / max(self.non_event_total, 1)
- woe = math.log(non_event_rate / event_rate)
- return (non_event_rate - event_rate) * woe
- @property
- def gini(self):
- if self.total_count == 0:
- return 0
- return 1 - (1.0 * self.event_count / self.total_count) ** 2 - \
- (1.0 * self.non_event_count / self.total_count) ** 2
|