Compare commits

...

No commits in common. "ec83d01dcaebf369444d75ed04b3625a0a645eb9" and "7f90d0ca342b928b479b512ec51ac2c3821f5922" have entirely different histories.

32 changed files with 121 additions and 51 deletions

View File

@ -10,7 +10,7 @@ These are the main components of the Recommendation Algorithm included in this r
| Type | Component | Description |
|------------|------------|------------|
| Feature | [SimClusters](src/scala/com/twitter/simclusters_v2/README.md) | Community detection and sparse embeddings into those communities. |
| Feature | [simclusters-ann](simclusters-ann/README.md) | Community detection and sparse embeddings into those communities. |
| | [TwHIN](https://github.com/twitter/the-algorithm-ml/blob/main/projects/twhin/README.md) | Dense knowledge graph embeddings for Users and Tweets. |
| | [trust-and-safety-models](trust_and_safety_models/README.md) | Models for detecting NSFW or abusive content. |
| | [real-graph](src/scala/com/twitter/interaction_graph/README.md) | Model to predict likelihood of a Twitter User interacting with another User. |

View File

@ -24,6 +24,10 @@ class FollowRecommendationsServiceWarmupHandler @Inject() (warmup: ThriftWarmup)
extends Handler
with Logging {
/**
* this would need to be added to src/main/resources/client_whitelist.yml
* if we implement ClientId filtering in the future
*/
private val clientId = ClientId("thrift-warmup-client")
override def handle(): Unit = {
@ -44,7 +48,7 @@ class FollowRecommendationsServiceWarmupHandler @Inject() (warmup: ThriftWarmup)
RecommendationRequest(
clientContext = clientContext,
displayLocation = displayLocation,
displayContext = None,
displayContext = Some(DisplayContext.Profile(Profile(12L))),
maxResults = Some(3),
fetchPromotedContent = Some(false),
debugParams = Some(DebugParams(doNotLog = Some(true)))

View File

@ -25,7 +25,7 @@ class ServerController @Inject() (
.andThen(Service.mk(serverGetIntersectionHandler))
val getIntersection: Service[GetIntersection.Args, GfsIntersectionResponse] = { args =>
// TODO: Disable updateCache after HTL switch to use PresetIntersection endpoint.
// TODO(yqian): Disable updateCache after HTL switch to use PresetIntersection endpoint.
getIntersectionService(
GetIntersectionRequest.fromGfsIntersectionRequest(args.request, cacheable = true))
}
@ -35,7 +35,7 @@ class ServerController @Inject() (
GetPresetIntersection.Args,
GfsIntersectionResponse
] = { args =>
// TODO: Refactor after HTL switch to PresetIntersection
// TODO(yqian): Refactor after HTL switch to PresetIntersection
val cacheable = args.request.presetFeatureTypes == PresetFeatureTypes.HtlTwoHop
getIntersectionService(
GetIntersectionRequest.fromGfsPresetIntersectionRequest(args.request, cacheable))

View File

@ -28,7 +28,7 @@ class ServerGetIntersectionHandler @Inject() (
import ServerGetIntersectionHandler._
// TODO: Track all the stats based on PresetFeatureType and update the dashboard
// TODO(yqian): Track all the stats based on PresetFeatureType and update the dashboard
private val stats: StatsReceiver = statsReceiver.scope("srv").scope("get_intersection")
private val numCandidatesCount = stats.counter("total_num_candidates")
private val numCandidatesStat = stats.stat("num_candidates")

View File

@ -1,17 +1,13 @@
package com.twitter.graph_feature_service.server.handlers
import com.twitter.finatra.thrift.routing.ThriftWarmup
import com.twitter.graph_feature_service.thriftscala.EdgeType.FavoritedBy
import com.twitter.graph_feature_service.thriftscala.EdgeType.FollowedBy
import com.twitter.graph_feature_service.thriftscala.EdgeType.Following
import com.twitter.graph_feature_service.thriftscala.EdgeType.{FavoritedBy, FollowedBy, Following}
import com.twitter.graph_feature_service.thriftscala.Server.GetIntersection
import com.twitter.graph_feature_service.thriftscala.FeatureType
import com.twitter.graph_feature_service.thriftscala.GfsIntersectionRequest
import com.twitter.graph_feature_service.thriftscala.{FeatureType, GfsIntersectionRequest}
import com.twitter.inject.utils.Handler
import com.twitter.scrooge.Request
import com.twitter.util.logging.Logger
import javax.inject.Inject
import javax.inject.Singleton
import javax.inject.{Inject, Singleton}
import scala.util.Random
@Singleton
@ -19,8 +15,14 @@ class ServerWarmupHandler @Inject() (warmup: ThriftWarmup) extends Handler {
val logger: Logger = Logger("WarmupHandler")
// TODO: Add the testing accounts to warm-up the service.
private val testingAccounts: Array[Long] = Seq.empty.toArray
private val testingAccounts: Array[Long] = {
Seq(
12L, //jack
21447363L, // KATY PERRY
42562446L, // Stephen Curry
813286L // Barack Obama
).toArray
}
private def getRandomRequest: GfsIntersectionRequest = {
GfsIntersectionRequest(

View File

@ -108,7 +108,7 @@ object IntersectionValueCalculator {
}
/**
* TODO: for now it only computes intersection size. Will add more feature types (e.g., dot
* TODO(yaow): for now it only computes intersection size. Will add more feature types (e.g., dot
* product, maximum value).
*
* NOTE that this function assumes both x and y are SORTED arrays.

View File

@ -220,7 +220,30 @@ object HomeTweetTypePredicates {
_.getOrElse(ConversationModule2DisplayedTweetsFeature, false)),
("conversation_module_has_gap", _.getOrElse(ConversationModuleHasGapFeature, false)),
("served_in_recap_tweet_candidate_module_injection", _ => false),
("served_in_threaded_conversation_module", _ => false)
("served_in_threaded_conversation_module", _ => false),
(
"author_is_elon",
candidate =>
candidate
.getOrElse(AuthorIdFeature, None).contains(candidate.getOrElse(DDGStatsElonFeature, 0L))),
(
"author_is_power_user",
candidate =>
candidate
.getOrElse(AuthorIdFeature, None)
.exists(candidate.getOrElse(DDGStatsVitsFeature, Set.empty[Long]).contains)),
(
"author_is_democrat",
candidate =>
candidate
.getOrElse(AuthorIdFeature, None)
.exists(candidate.getOrElse(DDGStatsDemocratsFeature, Set.empty[Long]).contains)),
(
"author_is_republican",
candidate =>
candidate
.getOrElse(AuthorIdFeature, None)
.exists(candidate.getOrElse(DDGStatsRepublicansFeature, Set.empty[Long]).contains)),
)
val PredicateMap = CandidatePredicates.toMap

View File

@ -1,10 +1,12 @@
package com.twitter.home_mixer.functional_component.feature_hydrator
import com.twitter.config.yaml.YamlMap
import com.twitter.finagle.tracing.Annotation.BinaryAnnotation
import com.twitter.finagle.tracing.ForwardAnnotation
import com.twitter.home_mixer.model.HomeFeatures._
import com.twitter.home_mixer.model.request.DeviceContext.RequestContext
import com.twitter.home_mixer.model.request.HasDeviceContext
import com.twitter.home_mixer.param.HomeMixerInjectionNames.DDGStatsAuthors
import com.twitter.joinkey.context.RequestJoinKeyContext
import com.twitter.product_mixer.component_library.model.cursor.UrtOrderedCursor
import com.twitter.product_mixer.core.feature.Feature
@ -22,16 +24,22 @@ import com.twitter.snowflake.id.SnowflakeId
import com.twitter.stitch.Stitch
import java.util.UUID
import javax.inject.Inject
import javax.inject.Named
import javax.inject.Singleton
@Singleton
class RequestQueryFeatureHydrator[
Query <: PipelineQuery with HasPipelineCursor[UrtOrderedCursor] with HasDeviceContext] @Inject() (
) extends QueryFeatureHydrator[Query] {
@Named(DDGStatsAuthors) ddgStatsAuthors: YamlMap)
extends QueryFeatureHydrator[Query] {
override val features: Set[Feature[_, _]] = Set(
AccountAgeFeature,
ClientIdFeature,
DDGStatsDemocratsFeature,
DDGStatsRepublicansFeature,
DDGStatsElonFeature,
DDGStatsVitsFeature,
DeviceLanguageFeature,
GetInitialFeature,
GetMiddleFeature,
@ -51,6 +59,10 @@ class RequestQueryFeatureHydrator[
override val identifier: FeatureHydratorIdentifier = FeatureHydratorIdentifier("Request")
private val DarkRequestAnnotation = "clnt/has_dark_request"
private val Democrats = "democrats"
private val Republicans = "republicans"
private val Elon = "elon"
private val Vits = "vits"
// Convert Language code to ISO 639-3 format
private def getLanguageISOFormatByCode(languageCode: String): String =
@ -71,6 +83,16 @@ class RequestQueryFeatureHydrator[
val featureMap = FeatureMapBuilder()
.add(AccountAgeFeature, query.getOptionalUserId.flatMap(SnowflakeId.timeFromIdOpt))
.add(ClientIdFeature, query.clientContext.appId)
/**
* These author ID lists are used purely for metrics collection. We track how often we are
* serving Tweets from these authors and how often their tweets are being impressed by users.
* This helps us validate in our A/B experimentation platform that we do not ship changes
* that negatively impacts one group over others.
*/
.add(DDGStatsDemocratsFeature, ddgStatsAuthors.longSeq(Democrats).toSet)
.add(DDGStatsRepublicansFeature, ddgStatsAuthors.longSeq(Republicans).toSet)
.add(DDGStatsVitsFeature, ddgStatsAuthors.longSeq(Vits).toSet)
.add(DDGStatsElonFeature, ddgStatsAuthors.longValue(Elon))
.add(DeviceLanguageFeature, query.getLanguageCode.map(getLanguageISOFormatByCode))
.add(
GetInitialFeature,

View File

@ -176,6 +176,10 @@ object HomeFeatures {
override def personalDataTypes: Set[pd.PersonalDataType] = Set(pd.PersonalDataType.ClientType)
}
object CachedScoredTweetsFeature extends Feature[PipelineQuery, Seq[hmt.CachedScoredTweet]]
object DDGStatsElonFeature extends Feature[PipelineQuery, Long]
object DDGStatsVitsFeature extends Feature[PipelineQuery, Set[Long]]
object DDGStatsDemocratsFeature extends Feature[PipelineQuery, Set[Long]]
object DDGStatsRepublicansFeature extends Feature[PipelineQuery, Set[Long]]
object DeviceLanguageFeature extends Feature[PipelineQuery, Option[String]]
object DismissInfoFeature
extends FeatureWithDefaultOnFailure[PipelineQuery, Map[st.SuggestType, Option[DismissInfo]]] {

View File

@ -1,5 +1,18 @@
package com.twitter.home_mixer.module
import com.google.inject.Provides
import com.twitter.config.yaml.YamlMap
import com.twitter.home_mixer.param.HomeMixerInjectionNames.DDGStatsAuthors
import com.twitter.inject.TwitterModule
import javax.inject.Named
import javax.inject.Singleton
object HomeMixerResourcesModule extends TwitterModule {}
object HomeMixerResourcesModule extends TwitterModule {
private val AuthorsFile = "/config/authors.yml"
@Provides
@Singleton
@Named(DDGStatsAuthors)
def providesDDGStatsAuthors(): YamlMap = YamlMap.load(AuthorsFile)
}

View File

@ -4,6 +4,7 @@ object HomeMixerInjectionNames {
final val AuthorFeatureRepository = "AuthorFeatureRepository"
final val CandidateFeaturesScribeEventPublisher = "CandidateFeaturesScribeEventPublisher"
final val CommonFeaturesScribeEventPublisher = "CommonFeaturesScribeEventPublisher"
final val DDGStatsAuthors = "DDGStatsAuthors"
final val EarlybirdRepository = "EarlybirdRepository"
final val EngagementsReceivedByAuthorCache = "EngagementsReceivedByAuthorCache"
final val GraphTwoHopRepository = "GraphTwoHopRepository"

View File

@ -122,7 +122,7 @@ enum FullTypeId {
// TFT_TENSOR[TFT_INT32, TFT_UNKNOWN]
// is a Tensor of int32 element type and unknown shape.
//
// TODO: Define TFT_SHAPE and add more examples.
// TODO(mdan): Define TFT_SHAPE and add more examples.
TFT_TENSOR = 1000;
// Array (or tensorflow::TensorList in the variant type registry).
@ -178,7 +178,7 @@ enum FullTypeId {
// object (for now).
// The bool element type.
// TODO
// TODO(mdan): Quantized types, legacy representations (e.g. ref)
TFT_BOOL = 200;
// Integer element types.
TFT_UINT8 = 201;
@ -195,7 +195,7 @@ enum FullTypeId {
TFT_DOUBLE = 211;
TFT_BFLOAT16 = 215;
// Complex element types.
// TODO: Represent as TFT_COMPLEX[TFT_DOUBLE] instead?
// TODO(mdan): Represent as TFT_COMPLEX[TFT_DOUBLE] instead?
TFT_COMPLEX64 = 212;
TFT_COMPLEX128 = 213;
// The string element type.
@ -240,7 +240,7 @@ enum FullTypeId {
// ownership is in the true sense: "the op argument representing the lock is
// available".
// Mutex locks are the dynamic counterpart of control dependencies.
// TODO: Properly document this thing.
// TODO(mdan): Properly document this thing.
//
// Parametrization: TFT_MUTEX_LOCK[].
TFT_MUTEX_LOCK = 10202;
@ -271,6 +271,6 @@ message FullTypeDef {
oneof attr {
string s = 3;
int64 i = 4;
// TODO: list/tensor, map? Need to reconcile with TFT_RECORD, etc.
// TODO(mdan): list/tensor, map? Need to reconcile with TFT_RECORD, etc.
}
}

View File

@ -23,7 +23,7 @@ message FunctionDefLibrary {
// with a value. When a GraphDef has a call to a function, it must
// have binding for every attr defined in the signature.
//
// TODO:
// TODO(zhifengc):
// * device spec, etc.
message FunctionDef {
// The definition of the function's name, arguments, return values,

View File

@ -61,7 +61,7 @@ message NodeDef {
// one of the names from the corresponding OpDef's attr field).
// The values must have a type matching the corresponding OpDef
// attr's type field.
// TODO: Add some examples here showing best practices.
// TODO(josh11b): Add some examples here showing best practices.
map<string, AttrValue> attr = 5;
message ExperimentalDebugInfo {

View File

@ -96,7 +96,7 @@ message OpDef {
// Human-readable description.
string description = 4;
// TODO: bool is_optional?
// TODO(josh11b): bool is_optional?
// --- Constraints ---
// These constraints are only in effect if specified. Default is no
@ -139,7 +139,7 @@ message OpDef {
// taking input from multiple devices with a tree of aggregate ops
// that aggregate locally within each device (and possibly within
// groups of nearby devices) before communicating.
// TODO: Implement that optimization.
// TODO(josh11b): Implement that optimization.
bool is_aggregate = 16; // for things like add
// Other optimizations go here, like

View File

@ -53,7 +53,7 @@ message MemoryStats {
// Time/size stats recorded for a single execution of a graph node.
message NodeExecStats {
// TODO: Use some more compact form of node identity than
// TODO(tucker): Use some more compact form of node identity than
// the full string name. Either all processes should agree on a
// global id (cost_id?) for each node, or we should use a hash of
// the name.

View File

@ -16,7 +16,7 @@ option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/framewo
message TensorProto {
DataType dtype = 1;
// Shape of the tensor. TODO: sort out the 0-rank issues.
// Shape of the tensor. TODO(touts): sort out the 0-rank issues.
TensorShapeProto tensor_shape = 2;
// Only one of the representations below is set, one of "tensor_contents" and

View File

@ -532,7 +532,7 @@ message ConfigProto {
// We removed the flag client_handles_error_formatting. Marking the tag
// number as reserved.
// TODO: Should we just remove this tag so that it can be
// TODO(shikharagarwal): Should we just remove this tag so that it can be
// used in future for other purpose?
reserved 2;
@ -576,7 +576,7 @@ message ConfigProto {
// - If isolate_session_state is true, session states are isolated.
// - If isolate_session_state is false, session states are shared.
//
// TODO: Add a single API that consistently treats
// TODO(b/129330037): Add a single API that consistently treats
// isolate_session_state and ClusterSpec propagation.
bool share_session_state_in_clusterspec_propagation = 8;
@ -704,7 +704,7 @@ message ConfigProto {
// Options for a single Run() call.
message RunOptions {
// TODO Turn this into a TraceOptions proto which allows
// TODO(pbar) Turn this into a TraceOptions proto which allows
// tracing to be controlled in a more orthogonal manner?
enum TraceLevel {
NO_TRACE = 0;
@ -781,7 +781,7 @@ message RunMetadata {
repeated GraphDef partition_graphs = 3;
message FunctionGraphs {
// TODO: Include some sort of function/cache-key identifier?
// TODO(nareshmodi): Include some sort of function/cache-key identifier?
repeated GraphDef partition_graphs = 1;
GraphDef pre_optimization_graph = 2;

View File

@ -194,7 +194,7 @@ service CoordinationService {
// Report error to the task. RPC sets the receiving instance of coordination
// service agent to error state permanently.
// TODO: Consider splitting this into a different RPC service.
// TODO(b/195990880): Consider splitting this into a different RPC service.
rpc ReportErrorToAgent(ReportErrorToAgentRequest)
returns (ReportErrorToAgentResponse);

View File

@ -46,7 +46,7 @@ message DebugTensorWatch {
// are to be debugged, the callers of Session::Run() must use distinct
// debug_urls to make sure that the streamed or dumped events do not overlap
// among the invocations.
// TODO: More visible documentation of this in g3docs.
// TODO(cais): More visible documentation of this in g3docs.
repeated string debug_urls = 4;
// Do not error out if debug op creation fails (e.g., due to dtype

View File

@ -12,7 +12,7 @@ option java_package = "org.tensorflow.util";
option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobuf/for_core_protos_go_proto";
// Available modes for extracting debugging information from a Tensor.
// TODO: Document the detailed column names and semantics in a separate
// TODO(cais): Document the detailed column names and semantics in a separate
// markdown file once the implementation settles.
enum TensorDebugMode {
UNSPECIFIED = 0;
@ -223,7 +223,7 @@ message DebuggedDevice {
// A debugger-generated ID for the device. Guaranteed to be unique within
// the scope of the debugged TensorFlow program, including single-host and
// multi-host settings.
// TODO: Test the uniqueness guarantee in multi-host settings.
// TODO(cais): Test the uniqueness guarantee in multi-host settings.
int32 device_id = 2;
}
@ -264,7 +264,7 @@ message Execution {
// field with the DebuggedDevice messages.
repeated int32 output_tensor_device_ids = 9;
// TODO support, add more fields
// TODO(cais): When backporting to V1 Session.run() support, add more fields
// such as fetches and feeds.
}

View File

@ -7,7 +7,7 @@ option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobu
// Used to serialize and transmit tensorflow::Status payloads through
// grpc::Status `error_details` since grpc::Status lacks payload API.
// TODO: Use GRPC API once supported.
// TODO(b/204231601): Use GRPC API once supported.
message GrpcPayloadContainer {
map<string, bytes> payloads = 1;
}

View File

@ -172,7 +172,7 @@ message WaitQueueDoneRequest {
}
message WaitQueueDoneResponse {
// TODO: Consider adding NodeExecStats here to be able to
// TODO(nareshmodi): Consider adding NodeExecStats here to be able to
// propagate some stats.
}

View File

@ -94,7 +94,7 @@ message ExtendSessionRequest {
}
message ExtendSessionResponse {
// TODO: Return something about the operation?
// TODO(mrry): Return something about the operation?
// The new version number for the extended graph, to be used in the next call
// to ExtendSession.

View File

@ -176,7 +176,7 @@ message SavedBareConcreteFunction {
// allows the ConcreteFunction to be called with nest structure inputs. This
// field may not be populated. If this field is absent, the concrete function
// can only be called with flat inputs.
// TODO: support calling saved ConcreteFunction with structured
// TODO(b/169361281): support calling saved ConcreteFunction with structured
// inputs in C++ SavedModel API.
FunctionSpec function_spec = 4;
}

View File

@ -17,7 +17,7 @@ option go_package = "github.com/tensorflow/tensorflow/tensorflow/go/core/protobu
// Special header that is associated with a bundle.
//
// TODO: maybe in the future, we can add information about
// TODO(zongheng,zhifengc): maybe in the future, we can add information about
// which binary produced this checkpoint, timestamp, etc. Sometime, these can be
// valuable debugging information. And if needed, these can be used as defensive
// information ensuring reader (binary version) of the checkpoint and the writer

View File

@ -188,7 +188,7 @@ message DeregisterGraphRequest {
}
message DeregisterGraphResponse {
// TODO: Optionally add summary stats for the graph.
// TODO(mrry): Optionally add summary stats for the graph.
}
////////////////////////////////////////////////////////////////////////////////
@ -294,7 +294,7 @@ message RunGraphResponse {
// If the request asked for execution stats, the cost graph, or the partition
// graphs, these are returned here.
// TODO: Package these in a RunMetadata instead.
// TODO(suharshs): Package these in a RunMetadata instead.
StepStats step_stats = 2;
CostGraphDef cost_graph = 3;
repeated GraphDef partition_graph = 4;

View File

@ -13,5 +13,5 @@ message LogMetadata {
SamplingConfig sampling_config = 2;
// List of tags used to load the relevant MetaGraphDef from SavedModel.
repeated string saved_model_tags = 3;
// TODO: Add more metadata as mentioned in the bug.
// TODO(b/33279154): Add more metadata as mentioned in the bug.
}

View File

@ -58,7 +58,7 @@ message FileSystemStoragePathSourceConfig {
// A single servable name/base_path pair to monitor.
// DEPRECATED: Use 'servables' instead.
// TODO: Stop using these fields, and ultimately remove them here.
// TODO(b/30898016): Stop using these fields, and ultimately remove them here.
string servable_name = 1 [deprecated = true];
string base_path = 2 [deprecated = true];
@ -76,7 +76,7 @@ message FileSystemStoragePathSourceConfig {
// check for a version to appear later.)
// DEPRECATED: Use 'servable_versions_always_present' instead, which includes
// this behavior.
// TODO: Remove 2019-10-31 or later.
// TODO(b/30898016): Remove 2019-10-31 or later.
bool fail_if_zero_versions_at_startup = 4 [deprecated = true];
// If true, the servable is always expected to exist on the underlying

View File

@ -9,7 +9,7 @@ import "tensorflow_serving/config/logging_config.proto";
option cc_enable_arenas = true;
// The type of model.
// TODO: DEPRECATED.
// TODO(b/31336131): DEPRECATED.
enum ModelType {
MODEL_TYPE_UNSPECIFIED = 0 [deprecated = true];
TENSORFLOW = 1 [deprecated = true];
@ -31,7 +31,7 @@ message ModelConfig {
string base_path = 2;
// Type of model.
// TODO: DEPRECATED. Please use 'model_platform' instead.
// TODO(b/31336131): DEPRECATED. Please use 'model_platform' instead.
ModelType model_type = 3 [deprecated = true];
// Type of model (e.g. "tensorflow").

View File

@ -231,7 +231,7 @@ object TypeaheadEventCandidate {
/**
* Canonical TweetAnnotationQueryCandidate model. Always prefer this version over all other variants.
*
* TODO Remove score from the candidate and use a Feature instead
* TODO(jhara) Remove score from the candidate and use a Feature instead
*/
final class TweetAnnotationQueryCandidate private (
override val id: String,

View File

@ -8,6 +8,7 @@ import scala.collection.mutable.ListBuffer
* The helper class encodes and decodes tweet ids with tweetypie's card information
* when querying recos salsa library. Inside salsa library, all tweet ids are
* encoded with card information for the purpose of inline filtering.
* TODO (wenqih) change TweetIDMask to a mask interface for future extension
*/
class BipartiteGraphHelper(graph: BipartiteGraph) {
private val tweetIDMask = new TweetIDMask