mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-11-13 07:05:10 +01:00
replaced 2**n with 1<<n
This commit is contained in:
parent
458abe083e
commit
60c980c6b1
@ -47,14 +47,8 @@ def get_feature_config(data_spec_path: str, label: str) -> FeatureConfigBuilder:
|
||||
("extended_encoded_tweet_features.label_spam_hi_rcl_flag", "A"),
|
||||
("extended_encoded_tweet_features.periscope_exists", "A"),
|
||||
("extended_encoded_tweet_features.periscope_has_been_featured", "A"),
|
||||
(
|
||||
"extended_encoded_tweet_features.periscope_is_currently_featured",
|
||||
"A",
|
||||
),
|
||||
(
|
||||
"extended_encoded_tweet_features.periscope_is_from_quality_source",
|
||||
"A",
|
||||
),
|
||||
("extended_encoded_tweet_features.periscope_is_currently_featured","A"),
|
||||
("extended_encoded_tweet_features.periscope_is_from_quality_source", "A"),
|
||||
("extended_encoded_tweet_features.periscope_is_live", "A"),
|
||||
("extended_encoded_tweet_features.quote_count", "A"),
|
||||
("extended_encoded_tweet_features.reply_count_v2", "A"),
|
||||
|
@ -6,9 +6,6 @@ from .scorer import LollyModelScorer
|
||||
|
||||
if __name__ == "__main__":
|
||||
lolly_model_reader = LollyModelReader(lolly_model_file_path=sys.argv[1])
|
||||
lolly_model_scorer = LollyModelScorer(
|
||||
data_example_parser=DBv2DataExampleParser(lolly_model_reader)
|
||||
)
|
||||
|
||||
lolly_model_scorer = LollyModelScorer(DBv2DataExampleParser(lolly_model_reader))
|
||||
score = lolly_model_scorer.score(data_example=sys.argv[2])
|
||||
print(score)
|
||||
|
@ -40,9 +40,7 @@ class TFModelDiscretizerBuilder(object):
|
||||
:return: a HashingDiscretizer instance.
|
||||
"""
|
||||
discretized_features = tf_model_initializer["features"]["discretized"]
|
||||
|
||||
max_bins = 0
|
||||
|
||||
feature_ids = []
|
||||
bin_vals = []
|
||||
for feature_name in discretized_features:
|
||||
@ -53,12 +51,9 @@ class TFModelDiscretizerBuilder(object):
|
||||
np.float(bin_boundary) for bin_boundary in bin_boundaries
|
||||
]
|
||||
bin_vals.append(np_bin_boundaries)
|
||||
|
||||
max_bins = max(max_bins, len(np_bin_boundaries))
|
||||
|
||||
feature_ids_np = np.array(feature_ids)
|
||||
bin_vals_np = np.array(bin_vals).flatten()
|
||||
|
||||
return HashingDiscretizer(
|
||||
feature_ids=feature_ids_np,
|
||||
bin_vals=bin_vals_np,
|
||||
|
@ -30,4 +30,4 @@ def make_feature_id(name: str, num_bits: int) -> np.int64:
|
||||
def limit_bits(value: int, num_bits: int) -> int:
|
||||
"""Limits the number of bits in the given value."""
|
||||
|
||||
return value & ((2**num_bits) - 1)
|
||||
return value & ((1<<num_bits) - 1)
|
||||
|
@ -16,9 +16,10 @@ class TFModelWeightsInitializerBuilder(object):
|
||||
"""
|
||||
:return: (bias_initializer, weight_initializer)
|
||||
"""
|
||||
initial_weights = np.zeros((2**self.num_bits, 1))
|
||||
|
||||
initial_weights = np.zeros((1 << self.num_bits, 1))
|
||||
features = tf_model_initializer["features"]
|
||||
|
||||
self._set_binary_feature_weights(initial_weights, features["binary"])
|
||||
self._set_discretized_feature_weights(initial_weights, features["discretized"])
|
||||
|
||||
@ -27,8 +28,12 @@ class TFModelWeightsInitializerBuilder(object):
|
||||
), twml.contrib.initializers.PartitionConstant(initial_weights)
|
||||
|
||||
def _set_binary_feature_weights(
|
||||
self, initial_weights: np.ndarray, binary_features: Dict[str, float]
|
||||
self,
|
||||
initial_weights: np.ndarray,
|
||||
binary_features: Dict[str, float],
|
||||
) -> None:
|
||||
"""set weights for binary features"""
|
||||
|
||||
for feature_name, weight in binary_features.items():
|
||||
feature_id = make_feature_id(feature_name, self.num_bits)
|
||||
initial_weights[feature_id][0] = weight
|
||||
@ -38,6 +43,8 @@ class TFModelWeightsInitializerBuilder(object):
|
||||
initial_weights: np.ndarray,
|
||||
discretized_features: Dict[str, Dict[str, Any]],
|
||||
) -> None:
|
||||
"""set weights for discretized features"""
|
||||
|
||||
for feature_name, discretized_feature in discretized_features.items():
|
||||
feature_id = make_feature_id(feature_name, self.num_bits)
|
||||
for bin_idx, weight in enumerate(discretized_feature["weights"]):
|
||||
|
@ -89,7 +89,7 @@ class FactorizationMachine(Layer):
|
||||
# 2**30 is 1GB. 2 * (2**30) is 2GB.
|
||||
dtype = tf.as_dtype(self.dtype)
|
||||
requested_size = input_shape[1] * self.num_latent_variables * dtype.size
|
||||
if requested_size >= 2**31:
|
||||
if requested_size >= (1<<31):
|
||||
raise ValueError(
|
||||
"Weight tensor can not be larger than 2GB. "
|
||||
% "Requested Dimensions(%d, %d) of type %s (%d bytes total)"(
|
||||
|
@ -187,7 +187,7 @@ class FullSparse(Layer):
|
||||
requested_size = (
|
||||
math.ceil(float(split_dim) / num_partitions) * other_dim * dtype.size
|
||||
)
|
||||
if requested_size >= 2**31:
|
||||
if requested_size >= 1 << 31:
|
||||
raise ValueError(
|
||||
"Weight tensor partitions cannot be larger than 2GB.\n"
|
||||
"Requested Dimensions(%d, %d) of type %s (%d bytes total) over %d partitions.\n"
|
||||
|
Loading…
Reference in New Issue
Block a user