From d2da7e56ab3de5377b890ecd98ce4949e7b3ebd4 Mon Sep 17 00:00:00 2001 From: Jhv Date: Tue, 4 Apr 2023 21:28:36 +0800 Subject: [PATCH] Function TFModelInitializerBuilder._set_discretized_features refactored with the following changes: removes a pair of brackets, making the intent slightly clearer Convert for loop into dictionary comprehension (dict-comprehension) --- .../lolly/tf_model_initializer_builder.py | 135 +++++++++--------- 1 file changed, 68 insertions(+), 67 deletions(-) diff --git a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/tf_model_initializer_builder.py b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/tf_model_initializer_builder.py index 2d0342551..8315e3c18 100644 --- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/tf_model_initializer_builder.py +++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/tf_model_initializer_builder.py @@ -3,89 +3,90 @@ from .parsers import LollyModelFeaturesParser class TFModelInitializerBuilder: - def __init__(self, model_features_parser=LollyModelFeaturesParser()): - self._model_features_parser = model_features_parser + def __init__(self, model_features_parser=LollyModelFeaturesParser()): + self._model_features_parser = model_features_parser - def build(self, lolly_model_reader): - ''' - :param lolly_model_reader: LollyModelReader instance - :return: tf_model_initializer dictionary of the following format: - { - "features": { - "bias": 0.0, - "binary": { - # (feature name : feature weight) pairs - "feature_name_1": 0.0, - ... - "feature_nameN": 0.0 - }, - "discretized": { - # (feature name : index aligned lists of bin_boundaries and weights - "feature_name_1": { - "bin_boundaries": [1, ..., inf], - "weights": [0.0, ..., 0.0] - } - ... - "feature_name_K": { - "bin_boundaries": [1, ..., inf], - "weights": [0.0, ..., 0.0] + def build(self, lolly_model_reader): + ''' + :param lolly_model_reader: LollyModelReader instance + :return: tf_model_initializer dictionary of the following format: + { + "features": { + "bias": 0.0, + "binary": { + # (feature name : feature weight) pairs + "feature_name_1": 0.0, + ... + "feature_nameN": 0.0 + }, + "discretized": { + # (feature name : index aligned lists of bin_boundaries and weights + "feature_name_1": { + "bin_boundaries": [1, ..., inf], + "weights": [0.0, ..., 0.0] + } + ... + "feature_name_K": { + "bin_boundaries": [1, ..., inf], + "weights": [0.0, ..., 0.0] + } + } } } + ''' + tf_model_initializer = { + "features": {} } - } - ''' - tf_model_initializer = { - "features": {} - } - features = self._model_features_parser.parse(lolly_model_reader) - tf_model_initializer["features"]["bias"] = features["bias"] - self._set_discretized_features(features["discretized"], tf_model_initializer) + features = self._model_features_parser.parse(lolly_model_reader) + tf_model_initializer["features"]["bias"] = features["bias"] + self._set_discretized_features(features["discretized"], tf_model_initializer) - self._dedup_binary_features(features["binary"], features["discretized"]) - tf_model_initializer["features"]["binary"] = features["binary"] + self._dedup_binary_features(features["binary"], features["discretized"]) + tf_model_initializer["features"]["binary"] = features["binary"] - return tf_model_initializer + return tf_model_initializer - def _set_discretized_features(self, discretized_features, tf_model_initializer): - if len(discretized_features) == 0: - return + def _set_discretized_features(self, discretized_features, tf_model_initializer): + if len(discretized_features) == 0: + return - num_bins = max([len(bins) for bins in discretized_features.values()]) + num_bins = max(len(bins) for bins in discretized_features.values()) - bin_boundaries_and_weights = {} - for feature_name in discretized_features: - bin_boundaries_and_weights[feature_name] = self._extract_bin_boundaries_and_weights( - discretized_features[feature_name], num_bins) + bin_boundaries_and_weights = { + feature_name: self._extract_bin_boundaries_and_weights( + discretized_features[feature_name], num_bins) + for feature_name in discretized_features + } - tf_model_initializer["features"]["discretized"] = bin_boundaries_and_weights + tf_model_initializer["features"]["discretized"] = bin_boundaries_and_weights - def _dedup_binary_features(self, binary_features, discretized_features): - [binary_features.pop(feature_name) for feature_name in discretized_features] + def _dedup_binary_features(self, binary_features, discretized_features): + [binary_features.pop(feature_name) for feature_name in discretized_features] - def _extract_bin_boundaries_and_weights(self, discretized_feature_buckets, num_bins): - bin_boundary_weight_pairs = [] + def _extract_bin_boundaries_and_weights(self, discretized_feature_buckets, num_bins): + bin_boundary_weight_pairs = [] - for bucket in discretized_feature_buckets: - bin_boundary_weight_pairs.append([bucket[0], bucket[2]]) + for bucket in discretized_feature_buckets: + bin_boundary_weight_pairs.append([bucket[0], bucket[2]]) - # The default DBv2 HashingDiscretizer bin membership interval is (a, b] - # - # The Earlybird Lolly prediction engine discretizer bin membership interval is [a, b) - # - # Thus, convert (a, b] to [a, b) by inverting the bin boundaries. - for bin_boundary_weight_pair in bin_boundary_weight_pairs: - if bin_boundary_weight_pair[0] < float("inf"): - bin_boundary_weight_pair[0] *= -1 + # The default DBv2 HashingDiscretizer bin membership interval is (a, b] + # + # The Earlybird Lolly prediction engine discretizer bin membership interval is [a, b) + # + # Thus, convert (a, b] to [a, b) by inverting the bin boundaries. + for bin_boundary_weight_pair in bin_boundary_weight_pairs: + if bin_boundary_weight_pair[0] < float("inf"): + bin_boundary_weight_pair[0] *= -1 - while len(bin_boundary_weight_pairs) < num_bins: - bin_boundary_weight_pairs.append([float("inf"), float(0)]) + while len(bin_boundary_weight_pairs) < num_bins: + bin_boundary_weight_pairs.append([float("inf"), float(0)]) - bin_boundary_weight_pairs.sort(key=lambda bin_boundary_weight_pair: bin_boundary_weight_pair[0]) + bin_boundary_weight_pairs.sort(key=lambda bin_boundary_weight_pair: bin_boundary_weight_pair[0]) - bin_boundaries, weights = list(zip(*bin_boundary_weight_pairs)) + bin_boundaries, weights = list(zip(*bin_boundary_weight_pairs)) - return { - "bin_boundaries": bin_boundaries, - "weights": weights - } + return { + "bin_boundaries": bin_boundaries, + "weights": weights + }