diff --git a/src/python/twitter/deepbird/projects/timelines/configs/rectweet_earlybird/feature_config.py b/src/python/twitter/deepbird/projects/timelines/configs/rectweet_earlybird/feature_config.py index 8cfc83ae6..faec156c6 100644 --- a/src/python/twitter/deepbird/projects/timelines/configs/rectweet_earlybird/feature_config.py +++ b/src/python/twitter/deepbird/projects/timelines/configs/rectweet_earlybird/feature_config.py @@ -47,14 +47,8 @@ def get_feature_config(data_spec_path: str, label: str) -> FeatureConfigBuilder: ("extended_encoded_tweet_features.label_spam_hi_rcl_flag", "A"), ("extended_encoded_tweet_features.periscope_exists", "A"), ("extended_encoded_tweet_features.periscope_has_been_featured", "A"), - ( - "extended_encoded_tweet_features.periscope_is_currently_featured", - "A", - ), - ( - "extended_encoded_tweet_features.periscope_is_from_quality_source", - "A", - ), + ("extended_encoded_tweet_features.periscope_is_currently_featured","A"), + ("extended_encoded_tweet_features.periscope_is_from_quality_source", "A"), ("extended_encoded_tweet_features.periscope_is_live", "A"), ("extended_encoded_tweet_features.quote_count", "A"), ("extended_encoded_tweet_features.reply_count_v2", "A"), diff --git a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/score.py b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/score.py index 1ed012cd2..b018844c5 100644 --- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/score.py +++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/lolly/score.py @@ -6,9 +6,6 @@ from .scorer import LollyModelScorer if __name__ == "__main__": lolly_model_reader = LollyModelReader(lolly_model_file_path=sys.argv[1]) - lolly_model_scorer = LollyModelScorer( - data_example_parser=DBv2DataExampleParser(lolly_model_reader) - ) - + lolly_model_scorer = LollyModelScorer(DBv2DataExampleParser(lolly_model_reader)) score = lolly_model_scorer.score(data_example=sys.argv[2]) print(score) diff --git a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/discretizer_builder.py b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/discretizer_builder.py index a2519d417..619f9306c 100644 --- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/discretizer_builder.py +++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/discretizer_builder.py @@ -40,9 +40,7 @@ class TFModelDiscretizerBuilder(object): :return: a HashingDiscretizer instance. """ discretized_features = tf_model_initializer["features"]["discretized"] - max_bins = 0 - feature_ids = [] bin_vals = [] for feature_name in discretized_features: @@ -53,12 +51,9 @@ class TFModelDiscretizerBuilder(object): np.float(bin_boundary) for bin_boundary in bin_boundaries ] bin_vals.append(np_bin_boundaries) - max_bins = max(max_bins, len(np_bin_boundaries)) - feature_ids_np = np.array(feature_ids) bin_vals_np = np.array(bin_vals).flatten() - return HashingDiscretizer( feature_ids=feature_ids_np, bin_vals=bin_vals_np, diff --git a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/hashing_utils.py b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/hashing_utils.py index 233f91ebb..acb668587 100644 --- a/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/hashing_utils.py +++ b/src/python/twitter/deepbird/projects/timelines/scripts/models/earlybird/tf_model/hashing_utils.py @@ -30,4 +30,4 @@ def make_feature_id(name: str, num_bits: int) -> np.int64: def limit_bits(value: int, num_bits: int) -> int: """Limits the number of bits in the given value.""" - return value & ((2**num_bits) - 1) + return value & ((1< None: + """set weights for binary features""" + for feature_name, weight in binary_features.items(): feature_id = make_feature_id(feature_name, self.num_bits) initial_weights[feature_id][0] = weight @@ -38,6 +43,8 @@ class TFModelWeightsInitializerBuilder(object): initial_weights: np.ndarray, discretized_features: Dict[str, Dict[str, Any]], ) -> None: + """set weights for discretized features""" + for feature_name, discretized_feature in discretized_features.items(): feature_id = make_feature_id(feature_name, self.num_bits) for bin_idx, weight in enumerate(discretized_feature["weights"]): diff --git a/twml/twml/contrib/layers/factorization_machine.py b/twml/twml/contrib/layers/factorization_machine.py index a08d50efe..2484d0a8d 100644 --- a/twml/twml/contrib/layers/factorization_machine.py +++ b/twml/twml/contrib/layers/factorization_machine.py @@ -89,7 +89,7 @@ class FactorizationMachine(Layer): # 2**30 is 1GB. 2 * (2**30) is 2GB. dtype = tf.as_dtype(self.dtype) requested_size = input_shape[1] * self.num_latent_variables * dtype.size - if requested_size >= 2**31: + if requested_size >= (1<<31): raise ValueError( "Weight tensor can not be larger than 2GB. " % "Requested Dimensions(%d, %d) of type %s (%d bytes total)"( diff --git a/twml/twml/layers/full_sparse.py b/twml/twml/layers/full_sparse.py index 2d65739c9..27701ac49 100644 --- a/twml/twml/layers/full_sparse.py +++ b/twml/twml/layers/full_sparse.py @@ -187,7 +187,7 @@ class FullSparse(Layer): requested_size = ( math.ceil(float(split_dim) / num_partitions) * other_dim * dtype.size ) - if requested_size >= 2**31: + if requested_size >= 1 << 31: raise ValueError( "Weight tensor partitions cannot be larger than 2GB.\n" "Requested Dimensions(%d, %d) of type %s (%d bytes total) over %d partitions.\n"