mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-09-28 18:08:41 +02:00
206 lines
8.9 KiB
Thrift
206 lines
8.9 KiB
Thrift
|
namespace java com.twitter.simclusters_v2.thriftjava
|
||
|
namespace py gen.twitter.simclusters_v2.identifier
|
||
|
#@namespace scala com.twitter.simclusters_v2.thriftscala
|
||
|
#@namespace strato com.twitter.simclusters_v2
|
||
|
|
||
|
include "com/twitter/simclusters_v2/online_store.thrift"
|
||
|
|
||
|
/**
|
||
|
* The uniform type for a SimClusters Embeddings.
|
||
|
* Each embeddings have the uniform underlying storage.
|
||
|
* Warning: Every EmbeddingType should map to one and only one InternalId.
|
||
|
**/
|
||
|
enum EmbeddingType {
|
||
|
// Reserve 001 - 99 for Tweet embeddings
|
||
|
FavBasedTweet = 1, // Deprecated
|
||
|
FollowBasedTweet = 2, // Deprecated
|
||
|
LogFavBasedTweet = 3, // Production Version
|
||
|
FavBasedTwistlyTweet = 10, // Deprecated
|
||
|
LogFavBasedTwistlyTweet = 11, // Deprecated
|
||
|
LogFavLongestL2EmbeddingTweet = 12, // Production Version
|
||
|
|
||
|
// Tweet embeddings generated from non-fav events
|
||
|
// Naming convention: {Event}{Score}BasedTweet
|
||
|
// {Event}: The interaction event we use to build the tweet embeddings
|
||
|
// {Score}: The score from user InterestedIn embeddings
|
||
|
VideoPlayBack50LogFavBasedTweet = 21,
|
||
|
RetweetLogFavBasedTweet = 22,
|
||
|
ReplyLogFavBasedTweet = 23,
|
||
|
PushOpenLogFavBasedTweet = 24,
|
||
|
|
||
|
// [Experimental] Offline generated FavThroughRate-based Tweet Embedding
|
||
|
Pop1000RankDecay11Tweet = 30,
|
||
|
Pop10000RankDecay11Tweet = 31,
|
||
|
OonPop1000RankDecayTweet = 32,
|
||
|
|
||
|
// [Experimental] Offline generated produciton-like LogFavScore-based Tweet Embedding
|
||
|
OfflineGeneratedLogFavBasedTweet = 40,
|
||
|
|
||
|
// Reserve 51-59 for Ads Embedding
|
||
|
LogFavBasedAdsTweet = 51, // Experimenal embedding for ads tweet candidate
|
||
|
LogFavClickBasedAdsTweet = 52, // Experimenal embedding for ads tweet candidate
|
||
|
|
||
|
// Reserve 60-69 for Evergreen content
|
||
|
LogFavBasedEvergreenTweet = 60,
|
||
|
LogFavBasedRealTimeTweet = 65,
|
||
|
|
||
|
// Reserve 101 to 149 for Semantic Core Entity embeddings
|
||
|
FavBasedSematicCoreEntity = 101, // Deprecated
|
||
|
FollowBasedSematicCoreEntity = 102, // Deprecated
|
||
|
FavBasedHashtagEntity = 103, // Deprecated
|
||
|
FollowBasedHashtagEntity = 104, // Deprecated
|
||
|
ProducerFavBasedSemanticCoreEntity = 105, // Deprecated
|
||
|
ProducerFollowBasedSemanticCoreEntity = 106,// Deprecated
|
||
|
FavBasedLocaleSemanticCoreEntity = 107, // Deprecated
|
||
|
FollowBasedLocaleSemanticCoreEntity = 108, // Deprecated
|
||
|
LogFavBasedLocaleSemanticCoreEntity = 109, // Deprecated
|
||
|
LanguageFilteredProducerFavBasedSemanticCoreEntity = 110, // Deprecated
|
||
|
LanguageFilteredFavBasedLocaleSemanticCoreEntity = 111, // Deprecated
|
||
|
FavTfgTopic = 112, // TFG topic embedding built from fav-based user interestedIn
|
||
|
LogFavTfgTopic = 113, // TFG topic embedding built from logfav-based user interestedIn
|
||
|
FavInferredLanguageTfgTopic = 114, // TFG topic embedding built using inferred consumed languages
|
||
|
FavBasedKgoApeTopic = 115, // topic embedding using fav-based aggregatable producer embedding of KGO seed accounts.
|
||
|
LogFavBasedKgoApeTopic = 116, // topic embedding using log fav-based aggregatable producer embedding of KGO seed accounts.
|
||
|
FavBasedOnboardingApeTopic = 117, // topic embedding using fav-based aggregatable producer embedding of onboarding seed accounts.
|
||
|
LogFavBasedOnboardingApeTopic = 118, // topic embedding using log fav-based aggregatable producer embedding of onboarding seed accounts.
|
||
|
LogFavApeBasedMuseTopic = 119, // Deprecated
|
||
|
LogFavApeBasedMuseTopicExperiment = 120 // Deprecated
|
||
|
|
||
|
// Reserved 201 - 299 for Producer embeddings (KnownFor)
|
||
|
FavBasedProducer = 201
|
||
|
FollowBasedProducer = 202
|
||
|
AggregatableFavBasedProducer = 203 // fav-based aggregatable producer embedding.
|
||
|
AggregatableLogFavBasedProducer = 204 // logfav-based aggregatable producer embedding.
|
||
|
RelaxedAggregatableLogFavBasedProducer = 205 // logfav-based aggregatable producer embedding.
|
||
|
AggregatableFollowBasedProducer = 206 // follow-based aggregatable producer embedding.
|
||
|
KnownFor = 300
|
||
|
|
||
|
// Reserved 301 - 399 for User InterestedIn embeddings
|
||
|
FavBasedUserInterestedIn = 301
|
||
|
FollowBasedUserInterestedIn = 302
|
||
|
LogFavBasedUserInterestedIn = 303
|
||
|
RecentFollowBasedUserInterestedIn = 304 // interested-in embedding based on aggregating producer embeddings of recent follows
|
||
|
FilteredUserInterestedIn = 305 // interested-in embedding used by twistly read path
|
||
|
LogFavBasedUserInterestedInFromAPE = 306
|
||
|
FollowBasedUserInterestedInFromAPE = 307
|
||
|
TwiceUserInterestedIn = 308 // interested-in multi-embedding based on clustering producer embeddings of neighbors
|
||
|
UnfilteredUserInterestedIn = 309
|
||
|
UserNextInterestedIn = 310 // next interested-in embedding generated from BeT
|
||
|
|
||
|
// Denser User InterestedIn, generated by Producer embeddings.
|
||
|
FavBasedUserInterestedInFromPE = 311
|
||
|
FollowBasedUserInterestedInFromPE = 312
|
||
|
LogFavBasedUserInterestedInFromPE = 313
|
||
|
FilteredUserInterestedInFromPE = 314 // interested-in embedding used by twistly read path
|
||
|
|
||
|
// [Experimental] Denser User InterestedIn, generated by aggregating IIAPE embedding from AddressBook
|
||
|
LogFavBasedUserInterestedMaxpoolingAddressBookFromIIAPE = 320
|
||
|
LogFavBasedUserInterestedAverageAddressBookFromIIAPE = 321
|
||
|
LogFavBasedUserInterestedBooktypeMaxpoolingAddressBookFromIIAPE = 322
|
||
|
LogFavBasedUserInterestedLargestDimMaxpoolingAddressBookFromIIAPE = 323
|
||
|
LogFavBasedUserInterestedLouvainMaxpoolingAddressBookFromIIAPE = 324
|
||
|
LogFavBasedUserInterestedConnectedMaxpoolingAddressBookFromIIAPE = 325
|
||
|
|
||
|
//Reserved 401 - 500 for Space embedding
|
||
|
FavBasedApeSpace = 401 // DEPRECATED
|
||
|
LogFavBasedListenerSpace = 402 // DEPRECATED
|
||
|
LogFavBasedAPESpeakerSpace = 403 // DEPRCATED
|
||
|
LogFavBasedUserInterestedInListenerSpace = 404 // DEPRECATED
|
||
|
|
||
|
// Experimental, internal-only IDs
|
||
|
ExperimentalThirtyDayRecentFollowBasedUserInterestedIn = 10000 // Like RecentFollowBasedUserInterestedIn, except limited to last 30 days
|
||
|
ExperimentalLogFavLongestL2EmbeddingTweet = 10001 // DEPRECATED
|
||
|
}(persisted = 'true', hasPersonalData = 'false')
|
||
|
|
||
|
/**
|
||
|
* The uniform type for a SimClusters MultiEmbeddings.
|
||
|
* Warning: Every MultiEmbeddingType should map to one and only one InternalId.
|
||
|
**/
|
||
|
enum MultiEmbeddingType {
|
||
|
// Reserved 0-99 for Tweet based MultiEmbedding
|
||
|
|
||
|
// Reserved 100 - 199 for Topic based MultiEmbedding
|
||
|
LogFavApeBasedMuseTopic = 100 // Deprecated
|
||
|
LogFavApeBasedMuseTopicExperiment = 101 // Deprecated
|
||
|
|
||
|
// Reserved 301 - 399 for User InterestedIn embeddings
|
||
|
TwiceUserInterestedIn = 301 // interested-in multi-embedding based on clustering producer embeddings of neighbors
|
||
|
}(persisted = 'true', hasPersonalData = 'true')
|
||
|
|
||
|
// Deprecated. Please use TopicId for future cases.
|
||
|
struct LocaleEntityId {
|
||
|
1: i64 entityId
|
||
|
2: string language
|
||
|
}(persisted = 'true', hasPersonalData = 'false')
|
||
|
|
||
|
enum EngagementType {
|
||
|
Favorite = 1,
|
||
|
Retweet = 2,
|
||
|
}
|
||
|
|
||
|
struct UserEngagedTweetId {
|
||
|
1: i64 tweetId(personalDataType = 'TweetId')
|
||
|
2: i64 userId(personalDataType = 'UserId')
|
||
|
3: EngagementType engagementType(personalDataType = 'EventType')
|
||
|
}(persisted = 'true', hasPersonalData = 'true')
|
||
|
|
||
|
struct TopicId {
|
||
|
1: i64 entityId (personalDataType = 'SemanticcoreClassification')
|
||
|
// 2-letter ISO 639-1 language code
|
||
|
2: optional string language
|
||
|
// 2-letter ISO 3166-1 alpha-2 country code
|
||
|
3: optional string country
|
||
|
}(persisted = 'true', hasPersonalData = 'false')
|
||
|
|
||
|
struct TopicSubId {
|
||
|
1: i64 entityId (personalDataType = 'SemanticcoreClassification')
|
||
|
// 2-letter ISO 639-1 language code
|
||
|
2: optional string language
|
||
|
// 2-letter ISO 3166-1 alpha-2 country code
|
||
|
3: optional string country
|
||
|
4: i32 subId
|
||
|
}(persisted = 'true', hasPersonalData = 'true')
|
||
|
|
||
|
// Will be used for testing purposes in DDG 15536, 15534
|
||
|
struct UserWithLanguageId {
|
||
|
1: required i64 userId(personalDataType = 'UserId')
|
||
|
2: optional string langCode(personalDataType = 'InferredLanguage')
|
||
|
}(persisted = 'true', hasPersonalData = 'true')
|
||
|
|
||
|
/**
|
||
|
* The internal identifier type.
|
||
|
* Need to add ordering in [[com.twitter.simclusters_v2.common.SimClustersEmbeddingId]]
|
||
|
* when adding a new type.
|
||
|
**/
|
||
|
union InternalId {
|
||
|
1: i64 tweetId(personalDataType = 'TweetId')
|
||
|
2: i64 userId(personalDataType = 'UserId')
|
||
|
3: i64 entityId(personalDataType = 'SemanticcoreClassification')
|
||
|
4: string hashtag(personalDataType = 'PublicTweetEntitiesAndMetadata')
|
||
|
5: i32 clusterId
|
||
|
6: LocaleEntityId localeEntityId(personalDataType = 'SemanticcoreClassification')
|
||
|
7: UserEngagedTweetId userEngagedTweetId
|
||
|
8: TopicId topicId
|
||
|
9: TopicSubId topicSubId
|
||
|
10: string spaceId
|
||
|
11: UserWithLanguageId userWithLanguageId
|
||
|
}(persisted = 'true', hasPersonalData = 'true')
|
||
|
|
||
|
/**
|
||
|
* A uniform identifier type for all kinds of SimClusters based embeddings.
|
||
|
**/
|
||
|
struct SimClustersEmbeddingId {
|
||
|
1: required EmbeddingType embeddingType
|
||
|
2: required online_store.ModelVersion modelVersion
|
||
|
3: required InternalId internalId
|
||
|
}(persisted = 'true', hasPersonalData = 'true')
|
||
|
|
||
|
/**
|
||
|
* A uniform identifier type for multiple SimClusters embeddings
|
||
|
**/
|
||
|
struct SimClustersMultiEmbeddingId {
|
||
|
1: required MultiEmbeddingType embeddingType
|
||
|
2: required online_store.ModelVersion modelVersion
|
||
|
3: required InternalId internalId
|
||
|
}(persisted = 'true', hasPersonalData = 'true')
|