Delete recos-injector directory
This commit is contained in:
parent
0ae3f56f0f
commit
70f6c18f0b
|
@ -1 +0,0 @@
|
|||
# This prevents SQ query from grabbing //:all since it traverses up once to find a BUILD (DPB-14048)
|
|
@ -1,10 +0,0 @@
|
|||
; See http://go/CONFIG.ini
|
||||
|
||||
[jira]
|
||||
project: SD
|
||||
|
||||
[docbird]
|
||||
project_name = recos-injector
|
||||
|
||||
[kite]
|
||||
project: recos-injector
|
|
@ -1,40 +0,0 @@
|
|||
# Recos-Injector
|
||||
|
||||
Recos-Injector is a streaming event processor used to build input streams for GraphJet-based services. It is a general-purpose tool that consumes arbitrary incoming event streams (e.g., Fav, RT, Follow, client_events, etc.), applies filtering, and combines and publishes cleaned up events to corresponding GraphJet services. Each GraphJet-based service subscribes to a dedicated Kafka topic, and Recos-Injector enables GraphJet-based services to consume any event they want.
|
||||
|
||||
## How to run Recos-Injector server tests
|
||||
|
||||
You can run tests by using the following command from your project's root directory:
|
||||
|
||||
$ bazel build recos-injector/...
|
||||
$ bazel test recos-injector/...
|
||||
|
||||
## How to run recos-injector-server in development on a local machine
|
||||
|
||||
The simplest way to stand up a service is to run it locally. To run
|
||||
recos-injector-server in development mode, compile the project and then
|
||||
execute it with `bazel run`:
|
||||
|
||||
$ bazel build recos-injector/server:bin
|
||||
$ bazel run recos-injector/server:bin
|
||||
|
||||
A tunnel can be set up in order for downstream queries to work properly.
|
||||
Upon successful server startup, try to `curl` its admin endpoint in another
|
||||
terminal:
|
||||
|
||||
$ curl -s localhost:9990/admin/ping
|
||||
pong
|
||||
|
||||
Run `curl -s localhost:9990/admin` to see a list of all available admin endpoints.
|
||||
|
||||
## Querying Recos-Injector server from a Scala console
|
||||
|
||||
Recos-Injector does not have a Thrift endpoint. Instead, it reads Event Bus and Kafka queues and writes to the Recos-Injector Kafka.
|
||||
|
||||
## Generating a package for deployment
|
||||
|
||||
To package your service into a zip file for deployment, run:
|
||||
|
||||
$ bazel bundle recos-injector/server:bin --bundle-jvm-archive=zip
|
||||
|
||||
If the command is successful, a file named `dist/recos-injector-server.zip` will be created.
|
|
@ -1,43 +0,0 @@
|
|||
target(
|
||||
name = "server",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector",
|
||||
],
|
||||
)
|
||||
|
||||
test_suite(
|
||||
name = "tests",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"recos-injector/server/src/test/scala/com/twitter/recosinjector",
|
||||
],
|
||||
)
|
||||
|
||||
jvm_binary(
|
||||
name = "bin",
|
||||
basename = "recos-injector-server",
|
||||
main = "com.twitter.recosinjector.Main",
|
||||
platform = "java11",
|
||||
runtime_platform = "java11",
|
||||
tags = [
|
||||
"bazel-compatible:migrated",
|
||||
],
|
||||
dependencies = [
|
||||
":server",
|
||||
"3rdparty/jvm/org/slf4j:slf4j-jdk14",
|
||||
],
|
||||
)
|
||||
|
||||
jvm_app(
|
||||
name = "bundle",
|
||||
basename = "recos-injector",
|
||||
binary = ":bin",
|
||||
bundles = [bundle(
|
||||
fileset = ["config/*"],
|
||||
owning_target = "recos-injector/server/config:files",
|
||||
)],
|
||||
tags = [
|
||||
"bazel-compatible:migrated",
|
||||
],
|
||||
)
|
|
@ -1,20 +0,0 @@
|
|||
resources(
|
||||
sources = [
|
||||
"!*.pyc",
|
||||
"!BUILD*",
|
||||
"*",
|
||||
],
|
||||
tags = ["bazel-compatible"],
|
||||
)
|
||||
|
||||
# Created for Bazel compatibility.
|
||||
# In Bazel, loose files must be part of a target to be included into a bundle.
|
||||
# See also http://go/bazel-compatibility/bundle_does_not_match_any_files
|
||||
files(
|
||||
name = "files",
|
||||
sources = [
|
||||
"!BUILD",
|
||||
"**/*",
|
||||
],
|
||||
tags = ["bazel-compatible"],
|
||||
)
|
|
@ -1,7 +0,0 @@
|
|||
[Configs]
|
||||
DCS = all
|
||||
ROLE = recos-injector
|
||||
JOB = recos-injector
|
||||
ENV = prod
|
||||
PACKAGE = recos-injector-release
|
||||
PATH = recos-injector
|
|
@ -1,11 +0,0 @@
|
|||
tweet_event_transformer_user_tweet_entity_edges:
|
||||
comment: "Enables the generation of UserTweetEntity edges in tweet event transformer"
|
||||
default_availability: 0
|
||||
|
||||
enable_emit_tweet_edge_from_reply:
|
||||
comment: "Decides when processing a Reply edge, whether to generate a Tweet edge for it as well"
|
||||
default_availability: 0
|
||||
|
||||
enable_unfavorite_edge:
|
||||
comment: "Decides when processing a UnfavoriteEvent from Timeline events, whether to process unfav edges"
|
||||
default_availability: 0
|
|
@ -1,40 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = [
|
||||
"bazel-compatible",
|
||||
],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/io/netty:netty4-tcnative-boringssl-static",
|
||||
"3rdparty/jvm/org/apache/thrift:libthrift",
|
||||
"eventbus/client",
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/authentication",
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/client",
|
||||
"finagle-internal/mtls/src/main/scala/com/twitter/finagle/mtls/server",
|
||||
"finagle/finagle-core/src/main",
|
||||
"finagle/finagle-http/src/main/scala",
|
||||
"finagle/finagle-stats",
|
||||
"finagle/finagle-thriftmux",
|
||||
"recos-injector/server/config",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/clients",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/config",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/decider",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/edges",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/event_processors",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/publishers",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/uua_processors",
|
||||
"src/thrift/com/twitter/clientapp/gen:clientapp-scala",
|
||||
"src/thrift/com/twitter/gizmoduck:user-thrift-scala",
|
||||
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||
"src/thrift/com/twitter/timelineservice/server/internal:thrift-scala",
|
||||
"src/thrift/com/twitter/tweetypie:events-scala",
|
||||
"src/thrift/com/twitter/tweetypie:tweet-scala",
|
||||
"thrift-web-forms",
|
||||
"twitter-server-internal",
|
||||
"twitter-server/server/src/main/scala",
|
||||
"twitter-server/slf4j-jdk14/src/main/scala/com/twitter/server/logging",
|
||||
"util/util-app",
|
||||
"util/util-logging/src/main/scala",
|
||||
"util/util-stats/src/main/scala",
|
||||
],
|
||||
)
|
|
@ -1,213 +0,0 @@
|
|||
package com.twitter.recosinjector
|
||||
|
||||
import com.twitter.app.Flag
|
||||
import com.twitter.finagle.http.HttpMuxer
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.ElfOwlFilter
|
||||
import com.twitter.recosinjector.clients.Gizmoduck
|
||||
import com.twitter.recosinjector.clients.RecosHoseEntitiesCache
|
||||
import com.twitter.recosinjector.clients.SocialGraph
|
||||
import com.twitter.recosinjector.clients.Tweetypie
|
||||
import com.twitter.recosinjector.clients.UrlResolver
|
||||
import com.twitter.recosinjector.config._
|
||||
import com.twitter.recosinjector.edges.SocialWriteEventToUserUserGraphBuilder
|
||||
import com.twitter.recosinjector.edges.TimelineEventToUserTweetEntityGraphBuilder
|
||||
import com.twitter.recosinjector.edges.TweetEventToUserTweetEntityGraphBuilder
|
||||
import com.twitter.recosinjector.edges.TweetEventToUserUserGraphBuilder
|
||||
import com.twitter.recosinjector.edges.UnifiedUserActionToUserVideoGraphBuilder
|
||||
import com.twitter.recosinjector.edges.UnifiedUserActionToUserAdGraphBuilder
|
||||
import com.twitter.recosinjector.edges.UnifiedUserActionToUserTweetGraphPlusBuilder
|
||||
import com.twitter.recosinjector.edges.UserTweetEntityEdgeBuilder
|
||||
import com.twitter.recosinjector.event_processors.SocialWriteEventProcessor
|
||||
import com.twitter.recosinjector.event_processors.TimelineEventProcessor
|
||||
import com.twitter.recosinjector.event_processors.TweetEventProcessor
|
||||
import com.twitter.recosinjector.publishers.KafkaEventPublisher
|
||||
import com.twitter.recosinjector.uua_processors.UnifiedUserActionProcessor
|
||||
import com.twitter.recosinjector.uua_processors.UnifiedUserActionsConsumer
|
||||
import com.twitter.server.logging.{Logging => JDK14Logging}
|
||||
import com.twitter.server.Deciderable
|
||||
import com.twitter.server.TwitterServer
|
||||
import com.twitter.socialgraph.thriftscala.WriteEvent
|
||||
import com.twitter.timelineservice.thriftscala.{Event => TimelineEvent}
|
||||
import com.twitter.tweetypie.thriftscala.TweetEvent
|
||||
import com.twitter.util.Await
|
||||
import com.twitter.util.Duration
|
||||
import java.util.concurrent.TimeUnit
|
||||
|
||||
object Main extends TwitterServer with JDK14Logging with Deciderable { self =>
|
||||
|
||||
implicit val stats: StatsReceiver = statsReceiver
|
||||
|
||||
private val dataCenter: Flag[String] = flag("service.cluster", "atla", "Data Center")
|
||||
private val serviceRole: Flag[String] = flag("service.role", "Service Role")
|
||||
private val serviceEnv: Flag[String] = flag("service.env", "Service Env")
|
||||
private val serviceName: Flag[String] = flag("service.name", "Service Name")
|
||||
private val shardId = flag("shardId", 0, "Shard ID")
|
||||
private val numShards = flag("numShards", 1, "Number of shards for this service")
|
||||
private val truststoreLocation =
|
||||
flag[String]("truststore_location", "", "Truststore file location")
|
||||
|
||||
def main(): Unit = {
|
||||
val serviceIdentifier = ServiceIdentifier(
|
||||
role = serviceRole(),
|
||||
service = serviceName(),
|
||||
environment = serviceEnv(),
|
||||
zone = dataCenter()
|
||||
)
|
||||
println("ServiceIdentifier = " + serviceIdentifier.toString)
|
||||
log.info("ServiceIdentifier = " + serviceIdentifier.toString)
|
||||
|
||||
val shard = shardId()
|
||||
val numOfShards = numShards()
|
||||
val environment = serviceEnv()
|
||||
|
||||
implicit val config: DeployConfig = {
|
||||
environment match {
|
||||
case "prod" => ProdConfig(serviceIdentifier)(stats)
|
||||
case "staging" | "devel" => StagingConfig(serviceIdentifier)
|
||||
case env => throw new Exception(s"Unknown environment $env")
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize the config and wait for initialization to finish
|
||||
Await.ready(config.init())
|
||||
|
||||
log.info(
|
||||
"Starting Recos Injector: environment %s, clientId %s",
|
||||
environment,
|
||||
config.recosInjectorThriftClientId
|
||||
)
|
||||
log.info("Starting shard Id: %d of %d shards...".format(shard, numOfShards))
|
||||
|
||||
// Client wrappers
|
||||
val cache = new RecosHoseEntitiesCache(config.recosInjectorCoreSvcsCacheClient)
|
||||
val gizmoduck = new Gizmoduck(config.userStore)
|
||||
val socialGraph = new SocialGraph(config.socialGraphIdStore)
|
||||
val tweetypie = new Tweetypie(config.tweetyPieStore)
|
||||
val urlResolver = new UrlResolver(config.urlInfoStore)
|
||||
|
||||
// Edge builders
|
||||
val userTweetEntityEdgeBuilder = new UserTweetEntityEdgeBuilder(cache, urlResolver)
|
||||
|
||||
// Publishers
|
||||
val kafkaEventPublisher = KafkaEventPublisher(
|
||||
"/s/kafka/recommendations:kafka-tls",
|
||||
config.outputKafkaTopicPrefix,
|
||||
config.recosInjectorThriftClientId,
|
||||
truststoreLocation())
|
||||
|
||||
// Message Builders
|
||||
val socialWriteToUserUserMessageBuilder =
|
||||
new SocialWriteEventToUserUserGraphBuilder()(
|
||||
statsReceiver.scope("SocialWriteEventToUserUserGraphBuilder")
|
||||
)
|
||||
|
||||
val timelineToUserTweetEntityMessageBuilder = new TimelineEventToUserTweetEntityGraphBuilder(
|
||||
userTweetEntityEdgeBuilder = userTweetEntityEdgeBuilder
|
||||
)(statsReceiver.scope("TimelineEventToUserTweetEntityGraphBuilder"))
|
||||
|
||||
val tweetEventToUserTweetEntityGraphBuilder = new TweetEventToUserTweetEntityGraphBuilder(
|
||||
userTweetEntityEdgeBuilder = userTweetEntityEdgeBuilder,
|
||||
tweetCreationStore = config.tweetCreationStore,
|
||||
decider = config.recosInjectorDecider
|
||||
)(statsReceiver.scope("TweetEventToUserTweetEntityGraphBuilder"))
|
||||
|
||||
val socialWriteEventProcessor = new SocialWriteEventProcessor(
|
||||
eventBusStreamName = s"recos_injector_social_write_event_$environment",
|
||||
thriftStruct = WriteEvent,
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
kafkaEventPublisher = kafkaEventPublisher,
|
||||
userUserGraphTopic = KafkaEventPublisher.UserUserTopic,
|
||||
userUserGraphMessageBuilder = socialWriteToUserUserMessageBuilder
|
||||
)(statsReceiver.scope("SocialWriteEventProcessor"))
|
||||
|
||||
val tweetToUserUserMessageBuilder = new TweetEventToUserUserGraphBuilder()(
|
||||
statsReceiver.scope("TweetEventToUserUserGraphBuilder")
|
||||
)
|
||||
|
||||
val unifiedUserActionToUserVideoGraphBuilder = new UnifiedUserActionToUserVideoGraphBuilder(
|
||||
userTweetEntityEdgeBuilder = userTweetEntityEdgeBuilder
|
||||
)(statsReceiver.scope("UnifiedUserActionToUserVideoGraphBuilder"))
|
||||
|
||||
val unifiedUserActionToUserAdGraphBuilder = new UnifiedUserActionToUserAdGraphBuilder(
|
||||
userTweetEntityEdgeBuilder = userTweetEntityEdgeBuilder
|
||||
)(statsReceiver.scope("UnifiedUserActionToUserAdGraphBuilder"))
|
||||
|
||||
val unifiedUserActionToUserTweetGraphPlusBuilder =
|
||||
new UnifiedUserActionToUserTweetGraphPlusBuilder(
|
||||
userTweetEntityEdgeBuilder = userTweetEntityEdgeBuilder
|
||||
)(statsReceiver.scope("UnifiedUserActionToUserTweetGraphPlusBuilder"))
|
||||
|
||||
// Processors
|
||||
val tweetEventProcessor = new TweetEventProcessor(
|
||||
eventBusStreamName = s"recos_injector_tweet_events_$environment",
|
||||
thriftStruct = TweetEvent,
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
userUserGraphMessageBuilder = tweetToUserUserMessageBuilder,
|
||||
userUserGraphTopic = KafkaEventPublisher.UserUserTopic,
|
||||
userTweetEntityGraphMessageBuilder = tweetEventToUserTweetEntityGraphBuilder,
|
||||
userTweetEntityGraphTopic = KafkaEventPublisher.UserTweetEntityTopic,
|
||||
kafkaEventPublisher = kafkaEventPublisher,
|
||||
socialGraph = socialGraph,
|
||||
tweetypie = tweetypie,
|
||||
gizmoduck = gizmoduck
|
||||
)(statsReceiver.scope("TweetEventProcessor"))
|
||||
|
||||
val timelineEventProcessor = new TimelineEventProcessor(
|
||||
eventBusStreamName = s"recos_injector_timeline_events_prototype_$environment",
|
||||
thriftStruct = TimelineEvent,
|
||||
serviceIdentifier = serviceIdentifier,
|
||||
kafkaEventPublisher = kafkaEventPublisher,
|
||||
userTweetEntityGraphTopic = KafkaEventPublisher.UserTweetEntityTopic,
|
||||
userTweetEntityGraphMessageBuilder = timelineToUserTweetEntityMessageBuilder,
|
||||
decider = config.recosInjectorDecider,
|
||||
gizmoduck = gizmoduck,
|
||||
tweetypie = tweetypie
|
||||
)(statsReceiver.scope("TimelineEventProcessor"))
|
||||
|
||||
val eventBusProcessors = Seq(
|
||||
timelineEventProcessor,
|
||||
socialWriteEventProcessor,
|
||||
tweetEventProcessor
|
||||
)
|
||||
|
||||
val uuaProcessor = new UnifiedUserActionProcessor(
|
||||
gizmoduck = gizmoduck,
|
||||
tweetypie = tweetypie,
|
||||
kafkaEventPublisher = kafkaEventPublisher,
|
||||
userVideoGraphTopic = KafkaEventPublisher.UserVideoTopic,
|
||||
userVideoGraphBuilder = unifiedUserActionToUserVideoGraphBuilder,
|
||||
userAdGraphTopic = KafkaEventPublisher.UserAdTopic,
|
||||
userAdGraphBuilder = unifiedUserActionToUserAdGraphBuilder,
|
||||
userTweetGraphPlusTopic = KafkaEventPublisher.UserTweetPlusTopic,
|
||||
userTweetGraphPlusBuilder = unifiedUserActionToUserTweetGraphPlusBuilder)(
|
||||
statsReceiver.scope("UnifiedUserActionProcessor"))
|
||||
|
||||
val uuaConsumer = new UnifiedUserActionsConsumer(uuaProcessor, truststoreLocation())
|
||||
|
||||
// Start-up init and graceful shutdown setup
|
||||
|
||||
// wait a bit for services to be ready
|
||||
Thread.sleep(5000L)
|
||||
|
||||
log.info("Starting the event processors")
|
||||
eventBusProcessors.foreach(_.start())
|
||||
|
||||
log.info("Starting the uua processors")
|
||||
uuaConsumer.atLeastOnceProcessor.start()
|
||||
|
||||
this.addAdminRoute(ElfOwlFilter.getPostbackRoute())
|
||||
|
||||
onExit {
|
||||
log.info("Shutting down the event processors")
|
||||
eventBusProcessors.foreach(_.stop())
|
||||
log.info("Shutting down the uua processors")
|
||||
uuaConsumer.atLeastOnceProcessor.close()
|
||||
log.info("done exit")
|
||||
}
|
||||
|
||||
// Wait on the thriftServer so that shutdownTimeout is respected.
|
||||
Await.result(adminHttpServer)
|
||||
}
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"finagle/finagle-memcached/src/main/scala",
|
||||
"finagle/finagle-stats",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/base",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||
"servo/repo/src/main/scala",
|
||||
"src/thrift/com/twitter/gizmoduck:thrift-scala",
|
||||
"src/thrift/com/twitter/gizmoduck:user-thrift-scala",
|
||||
"src/thrift/com/twitter/recos:recos-internal-scala",
|
||||
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||
"src/thrift/com/twitter/tweetypie:service-scala",
|
||||
"stitch/stitch-tweetypie/src/main/scala",
|
||||
"util/util-logging/src/main/scala",
|
||||
],
|
||||
)
|
|
@ -1,26 +0,0 @@
|
|||
package com.twitter.recosinjector.clients
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.gizmoduck.thriftscala.User
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
|
||||
class Gizmoduck(
|
||||
userStore: ReadableStore[Long, User]
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
private val log = Logger()
|
||||
private val stats = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
|
||||
def getUser(userId: Long): Future[Option[User]] = {
|
||||
userStore
|
||||
.get(userId)
|
||||
.rescue {
|
||||
case e =>
|
||||
stats.scope("getUserFailure").counter(e.getClass.getSimpleName).incr()
|
||||
log.error(s"Failed with message ${e.toString}")
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,137 +0,0 @@
|
|||
package com.twitter.recosinjector.clients
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.io.Buf
|
||||
import com.twitter.recos.internal.thriftscala.{RecosHoseEntities, RecosHoseEntity}
|
||||
import com.twitter.servo.cache.ThriftSerializer
|
||||
import com.twitter.util.{Duration, Future, Time}
|
||||
import org.apache.thrift.protocol.TBinaryProtocol
|
||||
|
||||
case class CacheEntityEntry(
|
||||
cachePrefix: String,
|
||||
hashedEntityId: Int,
|
||||
entity: String) {
|
||||
val fullKey: String = cachePrefix + hashedEntityId
|
||||
}
|
||||
|
||||
object RecosHoseEntitiesCache {
|
||||
val EntityTTL: Duration = 30.hours
|
||||
val EntitiesSerializer =
|
||||
new ThriftSerializer[RecosHoseEntities](RecosHoseEntities, new TBinaryProtocol.Factory())
|
||||
|
||||
val HashtagPrefix: String = "h"
|
||||
val UrlPrefix: String = "u"
|
||||
}
|
||||
|
||||
/**
|
||||
* A cache layer to store entities.
|
||||
* Graph services like user_tweet_entity_graph and user_url_graph store user interactions with
|
||||
* entities in a tweet, such as HashTags and URLs. These entities are string values that can be
|
||||
* potentially very big. Therefore, we instead store a hashed id in the graph edge, and keep a
|
||||
* (hashedId -> entity) mapping in this cache. The actual entity values can be recovered
|
||||
* by the graph service at serving time using this cache.
|
||||
*/
|
||||
class RecosHoseEntitiesCache(client: Client) {
|
||||
import RecosHoseEntitiesCache._
|
||||
|
||||
private def isEntityWithinTTL(entity: RecosHoseEntity, ttlInMillis: Long): Boolean = {
|
||||
entity.timestamp.exists(timestamp => Time.now.inMilliseconds - timestamp <= ttlInMillis)
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new RecosHoseEntity into RecosHoseEntities
|
||||
*/
|
||||
private def updateRecosHoseEntities(
|
||||
existingEntitiesOpt: Option[RecosHoseEntities],
|
||||
newEntityString: String,
|
||||
stats: StatsReceiver
|
||||
): RecosHoseEntities = {
|
||||
val existingEntities = existingEntitiesOpt.map(_.entities).getOrElse(Nil)
|
||||
|
||||
// Discard expired and duplicate existing entities
|
||||
val validExistingEntities = existingEntities
|
||||
.filter(entity => isEntityWithinTTL(entity, EntityTTL.inMillis))
|
||||
.filter(_.entity != newEntityString)
|
||||
|
||||
val newRecosHoseEntity = RecosHoseEntity(newEntityString, Some(Time.now.inMilliseconds))
|
||||
RecosHoseEntities(validExistingEntities :+ newRecosHoseEntity)
|
||||
}
|
||||
|
||||
private def getRecosHoseEntitiesCache(
|
||||
cacheEntries: Seq[CacheEntityEntry],
|
||||
stats: StatsReceiver
|
||||
): Future[Map[String, Option[RecosHoseEntities]]] = {
|
||||
client
|
||||
.get(cacheEntries.map(_.fullKey))
|
||||
.map(_.map {
|
||||
case (cacheKey, buf) =>
|
||||
val recosHoseEntitiesTry = EntitiesSerializer.from(Buf.ByteArray.Owned.extract(buf))
|
||||
if (recosHoseEntitiesTry.isThrow) {
|
||||
stats.counter("cache_get_deserialization_failure").incr()
|
||||
}
|
||||
cacheKey -> recosHoseEntitiesTry.toOption
|
||||
})
|
||||
.onSuccess { _ => stats.counter("get_cache_success").incr() }
|
||||
.onFailure { ex =>
|
||||
stats.scope("get_cache_failure").counter(ex.getClass.getSimpleName).incr()
|
||||
}
|
||||
}
|
||||
|
||||
private def putRecosHoseEntitiesCache(
|
||||
cacheKey: String,
|
||||
recosHoseEntities: RecosHoseEntities,
|
||||
stats: StatsReceiver
|
||||
): Unit = {
|
||||
val serialized = EntitiesSerializer.to(recosHoseEntities)
|
||||
if (serialized.isThrow) {
|
||||
stats.counter("cache_put_serialization_failure").incr()
|
||||
}
|
||||
serialized.toOption.map { bytes =>
|
||||
client
|
||||
.set(cacheKey, 0, EntityTTL.fromNow, Buf.ByteArray.Owned(bytes))
|
||||
.onSuccess { _ => stats.counter("put_cache_success").incr() }
|
||||
.onFailure { ex =>
|
||||
stats.scope("put_cache_failure").counter(ex.getClass.getSimpleName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Store a list of new entities into the cache by their cacheKeys, and remove expired/invalid
|
||||
* values in the existing cache entries at the same time
|
||||
*/
|
||||
def updateEntitiesCache(
|
||||
newCacheEntries: Seq[CacheEntityEntry],
|
||||
stats: StatsReceiver
|
||||
): Future[Unit] = {
|
||||
stats.counter("update_cache_request").incr()
|
||||
getRecosHoseEntitiesCache(newCacheEntries, stats)
|
||||
.map { existingCacheEntries =>
|
||||
newCacheEntries.foreach { newCacheEntry =>
|
||||
val fullKey = newCacheEntry.fullKey
|
||||
val existingRecosHoseEntities = existingCacheEntries.get(fullKey).flatten
|
||||
stats.stat("num_existing_entities").add(existingRecosHoseEntities.size)
|
||||
if (existingRecosHoseEntities.isEmpty) {
|
||||
stats.counter("existing_entities_empty").incr()
|
||||
}
|
||||
|
||||
val updatedRecosHoseEntities = updateRecosHoseEntities(
|
||||
existingRecosHoseEntities,
|
||||
newCacheEntry.entity,
|
||||
stats
|
||||
)
|
||||
stats.stat("num_updated_entities").add(updatedRecosHoseEntities.entities.size)
|
||||
|
||||
if (updatedRecosHoseEntities.entities.nonEmpty) {
|
||||
putRecosHoseEntitiesCache(fullKey, updatedRecosHoseEntities, stats)
|
||||
}
|
||||
}
|
||||
}
|
||||
.onSuccess { _ => stats.counter("update_cache_success").incr() }
|
||||
.onFailure { ex =>
|
||||
stats.scope("update_cache_failure").counter(ex.getClass.getSimpleName).incr()
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,80 +0,0 @@
|
|||
package com.twitter.recosinjector.clients
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.socialgraph.thriftscala._
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
|
||||
class SocialGraph(
|
||||
socialGraphIdStore: ReadableStore[IdsRequest, IdsResult]
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
import SocialGraph._
|
||||
private val log = Logger()
|
||||
|
||||
private val followedByNotMutedByStats = statsReceiver.scope("followedByNotMutedBy")
|
||||
|
||||
private def fetchIdsFromSocialGraph(
|
||||
userId: Long,
|
||||
ids: Seq[Long],
|
||||
relationshipTypes: Map[RelationshipType, Boolean],
|
||||
lookupContext: Option[LookupContext] = IncludeInactiveUnionLookupContext,
|
||||
stats: StatsReceiver
|
||||
): Future[Seq[Long]] = {
|
||||
if (ids.isEmpty) {
|
||||
stats.counter("fetchIdsEmpty").incr()
|
||||
Future.Nil
|
||||
} else {
|
||||
val relationships = relationshipTypes.map {
|
||||
case (relationshipType, hasRelationship) =>
|
||||
SrcRelationship(
|
||||
source = userId,
|
||||
relationshipType = relationshipType,
|
||||
hasRelationship = hasRelationship,
|
||||
targets = Some(ids)
|
||||
)
|
||||
}.toSeq
|
||||
val idsRequest = IdsRequest(
|
||||
relationships = relationships,
|
||||
pageRequest = SelectAllPageRequest,
|
||||
context = lookupContext
|
||||
)
|
||||
socialGraphIdStore
|
||||
.get(idsRequest)
|
||||
.map { _.map(_.ids).getOrElse(Nil) }
|
||||
.rescue {
|
||||
case e =>
|
||||
stats.scope("fetchIdsFailure").counter(e.getClass.getSimpleName).incr()
|
||||
log.error(s"Failed with message ${e.toString}")
|
||||
Future.Nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// which of the users in candidates follow userId and have not muted userId
|
||||
def followedByNotMutedBy(userId: Long, candidates: Seq[Long]): Future[Seq[Long]] = {
|
||||
fetchIdsFromSocialGraph(
|
||||
userId,
|
||||
candidates,
|
||||
FollowedByNotMutedRelationships,
|
||||
IncludeInactiveLookupContext,
|
||||
followedByNotMutedByStats
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object SocialGraph {
|
||||
val SelectAllPageRequest = Some(PageRequest(selectAll = Some(true)))
|
||||
|
||||
val IncludeInactiveLookupContext = Some(LookupContext(includeInactive = true))
|
||||
val IncludeInactiveUnionLookupContext = Some(
|
||||
LookupContext(includeInactive = true, performUnion = Some(true))
|
||||
)
|
||||
|
||||
val FollowedByNotMutedRelationships: Map[RelationshipType, Boolean] = Map(
|
||||
RelationshipType.FollowedBy -> true,
|
||||
RelationshipType.MutedBy -> false
|
||||
)
|
||||
}
|
|
@ -1,30 +0,0 @@
|
|||
package com.twitter.recosinjector.clients
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.stitch.tweetypie.TweetyPie.{TweetyPieException, TweetyPieResult}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.tweetypie.thriftscala.Tweet
|
||||
import com.twitter.util.Future
|
||||
|
||||
class Tweetypie(
|
||||
tweetyPieStore: ReadableStore[Long, TweetyPieResult]
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
private val stats = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
private val failureStats = stats.scope("getTweetFailure")
|
||||
|
||||
def getTweet(tweetId: Long): Future[Option[Tweet]] = {
|
||||
tweetyPieStore
|
||||
.get(tweetId)
|
||||
.map { _.map(_.tweet) }
|
||||
.rescue {
|
||||
case e: TweetyPieException =>
|
||||
// Usually results from trying to query a protected or unsafe tweet
|
||||
failureStats.scope("TweetyPieException").counter(e.result.tweetState.toString).incr()
|
||||
Future.None
|
||||
case e =>
|
||||
failureStats.counter(e.getClass.getSimpleName).incr()
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,105 +0,0 @@
|
|||
package com.twitter.recosinjector.clients
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.util.DefaultTimer
|
||||
import com.twitter.frigate.common.util.{SnowflakeUtils, UrlInfo}
|
||||
import com.twitter.storehaus.{FutureOps, ReadableStore}
|
||||
import com.twitter.util.{Duration, Future, Timer}
|
||||
|
||||
class UrlResolver(
|
||||
urlInfoStore: ReadableStore[String, UrlInfo]
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
private val EmptyFutureMap = Future.value(Map.empty[String, String])
|
||||
private val stats = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
private val twitterResolvedUrlCounter = stats.counter("twitterResolvedUrl")
|
||||
private val resolvedUrlCounter = stats.counter("resolvedUrl")
|
||||
private val noResolvedUrlCounter = stats.counter("noResolvedUrl")
|
||||
|
||||
private val numNoDelayCounter = stats.counter("urlResolver_no_delay")
|
||||
private val numDelayCounter = stats.counter("urlResolver_delay")
|
||||
|
||||
implicit val timer: Timer = DefaultTimer
|
||||
|
||||
/**
|
||||
* Get the resolved URL map of the input raw URLs
|
||||
*
|
||||
* @param rawUrls list of raw URLs to query
|
||||
* @return map of raw URL to resolved URL
|
||||
*/
|
||||
def getResolvedUrls(rawUrls: Set[String]): Future[Map[String, String]] = {
|
||||
FutureOps
|
||||
.mapCollect(urlInfoStore.multiGet[String](rawUrls))
|
||||
.map { resolvedUrlsMap =>
|
||||
resolvedUrlsMap.flatMap {
|
||||
case (
|
||||
url,
|
||||
Some(
|
||||
UrlInfo(
|
||||
Some(resolvedUrl),
|
||||
Some(_),
|
||||
Some(domain),
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
Some(_),
|
||||
_,
|
||||
_,
|
||||
_,
|
||||
_))) =>
|
||||
if (domain == "Twitter") { // Filter out Twitter based URLs
|
||||
twitterResolvedUrlCounter.incr()
|
||||
None
|
||||
} else {
|
||||
resolvedUrlCounter.incr()
|
||||
Some(url -> resolvedUrl)
|
||||
}
|
||||
case _ =>
|
||||
noResolvedUrlCounter.incr()
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get resolved url maps given a list of urls, grouping urls that point to the same webpage
|
||||
*/
|
||||
def getResolvedUrls(urls: Seq[String], tweetId: Long): Future[Map[String, String]] = {
|
||||
if (urls.isEmpty) {
|
||||
EmptyFutureMap
|
||||
} else {
|
||||
Future
|
||||
.sleep(getUrlResolverDelayDuration(tweetId))
|
||||
.before(getResolvedUrls(urls.toSet))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a tweet, return the amount of delay needed before attempting to resolve the Urls
|
||||
*/
|
||||
private def getUrlResolverDelayDuration(
|
||||
tweetId: Long
|
||||
): Duration = {
|
||||
val urlResolverDelaySinceCreation = 12.seconds
|
||||
val urlResolverDelayDuration = 4.seconds
|
||||
val noDelay = 0.seconds
|
||||
|
||||
// Check whether the tweet was created more than the specified delay duration before now.
|
||||
// If the tweet ID is not based on Snowflake, this is false, and the delay is applied.
|
||||
val isCreatedBeforeDelayThreshold = SnowflakeUtils
|
||||
.tweetCreationTime(tweetId)
|
||||
.map(_.untilNow)
|
||||
.exists(_ > urlResolverDelaySinceCreation)
|
||||
|
||||
if (isCreatedBeforeDelayThreshold) {
|
||||
numNoDelayCounter.incr()
|
||||
noDelay
|
||||
} else {
|
||||
numDelayCounter.incr()
|
||||
urlResolverDelayDuration
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -1,36 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/bijection:scrooge",
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"abdecider",
|
||||
"decider/src/main/scala",
|
||||
"finagle/finagle-memcached/src/main/scala",
|
||||
"finagle/finagle-stats",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/util",
|
||||
"hermit/hermit-core:store",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/common",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/gizmoduck",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/store/tweetypie",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/decider",
|
||||
"scribelib/validators/src/main/scala/com/twitter/scribelib/validators",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache",
|
||||
"src/scala/com/twitter/storehaus_internal/memcache/config",
|
||||
"src/scala/com/twitter/storehaus_internal/util",
|
||||
"src/thrift/com/twitter/gizmoduck:thrift-java",
|
||||
"src/thrift/com/twitter/gizmoduck:thrift-scala",
|
||||
"src/thrift/com/twitter/gizmoduck:user-thrift-scala",
|
||||
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||
"src/thrift/com/twitter/spam/rtf:safety-level-scala",
|
||||
"src/thrift/com/twitter/tweetypie:service-scala",
|
||||
"stitch/stitch-core",
|
||||
"stitch/stitch-socialgraph",
|
||||
"stitch/stitch-storehaus/src/main/scala",
|
||||
"stitch/stitch-tweetypie/src/main/scala",
|
||||
"util/util-hashing/src/main/scala",
|
||||
"util/util-logging/src/main/scala",
|
||||
],
|
||||
)
|
|
@ -1,23 +0,0 @@
|
|||
package com.twitter.recosinjector.config
|
||||
|
||||
import com.twitter.finagle.memcached.Client
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.storehaus_internal.memcache.MemcacheStore
|
||||
import com.twitter.storehaus_internal.util.{ClientName, ZkEndPoint}
|
||||
|
||||
trait CacheConfig {
|
||||
implicit def statsReceiver: StatsReceiver
|
||||
|
||||
def serviceIdentifier: ServiceIdentifier
|
||||
|
||||
def recosInjectorCoreSvcsCacheDest: String
|
||||
|
||||
val recosInjectorCoreSvcsCacheClient: Client = MemcacheStore.memcachedClient(
|
||||
name = ClientName("memcache-recos-injector"),
|
||||
dest = ZkEndPoint(recosInjectorCoreSvcsCacheDest),
|
||||
statsReceiver = statsReceiver,
|
||||
serviceIdentifier = serviceIdentifier
|
||||
)
|
||||
|
||||
}
|
|
@ -1,41 +0,0 @@
|
|||
package com.twitter.recosinjector.config
|
||||
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.thrift.ClientId
|
||||
import com.twitter.frigate.common.store.TweetCreationTimeMHStore
|
||||
import com.twitter.frigate.common.util.UrlInfo
|
||||
import com.twitter.gizmoduck.thriftscala.User
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDecider
|
||||
import com.twitter.socialgraph.thriftscala.{IdsRequest, IdsResult}
|
||||
import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.util.Future
|
||||
|
||||
trait Config { self =>
|
||||
implicit def statsReceiver: StatsReceiver
|
||||
|
||||
// ReadableStores
|
||||
def tweetyPieStore: ReadableStore[Long, TweetyPieResult]
|
||||
|
||||
def userStore: ReadableStore[Long, User]
|
||||
|
||||
def socialGraphIdStore: ReadableStore[IdsRequest, IdsResult]
|
||||
|
||||
def urlInfoStore: ReadableStore[String, UrlInfo]
|
||||
|
||||
// Manhattan stores
|
||||
def tweetCreationStore: TweetCreationTimeMHStore
|
||||
|
||||
// Decider
|
||||
def recosInjectorDecider: RecosInjectorDecider
|
||||
|
||||
// Constants
|
||||
def recosInjectorThriftClientId: ClientId
|
||||
|
||||
def serviceIdentifier: ServiceIdentifier
|
||||
|
||||
def outputKafkaTopicPrefix: String
|
||||
|
||||
def init(): Future[Unit] = Future.Done
|
||||
}
|
|
@ -1,215 +0,0 @@
|
|||
package com.twitter.recosinjector.config
|
||||
|
||||
import com.twitter.bijection.scrooge.BinaryScalaCodec
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.client.ClientRegistry
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.TweetCreationTimeMHStore
|
||||
import com.twitter.frigate.common.util.Finagle._
|
||||
import com.twitter.frigate.common.util.{UrlInfo, UrlInfoInjection, UrlResolver}
|
||||
import com.twitter.gizmoduck.thriftscala.{LookupContext, QueryFields, User, UserService}
|
||||
import com.twitter.hermit.store.common.{ObservedCachedReadableStore, ObservedMemcachedReadableStore}
|
||||
import com.twitter.hermit.store.gizmoduck.GizmoduckUserStore
|
||||
import com.twitter.hermit.store.tweetypie.TweetyPieStore
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.pink_floyd.thriftscala.{ClientIdentifier, Storer}
|
||||
import com.twitter.socialgraph.thriftscala.{IdsRequest, SocialGraphService}
|
||||
import com.twitter.spam.rtf.thriftscala.SafetyLevel
|
||||
import com.twitter.stitch.socialgraph.SocialGraph
|
||||
import com.twitter.stitch.storehaus.ReadableStoreOfStitch
|
||||
import com.twitter.stitch.tweetypie.TweetyPie.TweetyPieResult
|
||||
import com.twitter.storage.client.manhattan.kv.{
|
||||
ManhattanKVClient,
|
||||
ManhattanKVClientMtlsParams,
|
||||
ManhattanKVEndpointBuilder
|
||||
}
|
||||
import com.twitter.storehaus.ReadableStore
|
||||
import com.twitter.tweetypie.thriftscala.{GetTweetOptions, TweetService}
|
||||
import com.twitter.util.Future
|
||||
|
||||
/*
|
||||
* Any finagle clients should not be defined as lazy. If defined lazy,
|
||||
* ClientRegistry.expAllRegisteredClientsResolved() call in init will not ensure that the clients
|
||||
* are active before thrift endpoint is active. We want the clients to be active, because zookeeper
|
||||
* resolution triggered by first request(s) might result in the request(s) failing.
|
||||
*/
|
||||
trait DeployConfig extends Config with CacheConfig {
|
||||
implicit def statsReceiver: StatsReceiver
|
||||
|
||||
def log: Logger
|
||||
|
||||
// Clients
|
||||
val gizmoduckClient = new UserService.FinagledClient(
|
||||
readOnlyThriftService(
|
||||
"gizmoduck",
|
||||
"/s/gizmoduck/gizmoduck",
|
||||
statsReceiver,
|
||||
recosInjectorThriftClientId,
|
||||
requestTimeout = 450.milliseconds,
|
||||
mTLSServiceIdentifier = Some(serviceIdentifier)
|
||||
)
|
||||
)
|
||||
val tweetyPieClient = new TweetService.FinagledClient(
|
||||
readOnlyThriftService(
|
||||
"tweetypie",
|
||||
"/s/tweetypie/tweetypie",
|
||||
statsReceiver,
|
||||
recosInjectorThriftClientId,
|
||||
requestTimeout = 450.milliseconds,
|
||||
mTLSServiceIdentifier = Some(serviceIdentifier)
|
||||
)
|
||||
)
|
||||
|
||||
val sgsClient = new SocialGraphService.FinagledClient(
|
||||
readOnlyThriftService(
|
||||
"socialgraph",
|
||||
"/s/socialgraph/socialgraph",
|
||||
statsReceiver,
|
||||
recosInjectorThriftClientId,
|
||||
requestTimeout = 450.milliseconds,
|
||||
mTLSServiceIdentifier = Some(serviceIdentifier)
|
||||
)
|
||||
)
|
||||
|
||||
val pinkStoreClient = new Storer.FinagledClient(
|
||||
readOnlyThriftService(
|
||||
"pink_store",
|
||||
"/s/spiderduck/pink-store",
|
||||
statsReceiver,
|
||||
recosInjectorThriftClientId,
|
||||
requestTimeout = 450.milliseconds,
|
||||
mTLSServiceIdentifier = Some(serviceIdentifier)
|
||||
)
|
||||
)
|
||||
|
||||
// Stores
|
||||
private val _gizmoduckStore = {
|
||||
val queryFields: Set[QueryFields] = Set(
|
||||
QueryFields.Discoverability,
|
||||
QueryFields.Labels,
|
||||
QueryFields.Safety
|
||||
)
|
||||
val context: LookupContext = LookupContext(
|
||||
includeDeactivated = true,
|
||||
safetyLevel = Some(SafetyLevel.Recommendations)
|
||||
)
|
||||
|
||||
GizmoduckUserStore(
|
||||
client = gizmoduckClient,
|
||||
queryFields = queryFields,
|
||||
context = context,
|
||||
statsReceiver = statsReceiver
|
||||
)
|
||||
}
|
||||
|
||||
override val userStore: ReadableStore[Long, User] = {
|
||||
// memcache based cache
|
||||
ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = _gizmoduckStore,
|
||||
cacheClient = recosInjectorCoreSvcsCacheClient,
|
||||
ttl = 2.hours
|
||||
)(
|
||||
valueInjection = BinaryScalaCodec(User),
|
||||
statsReceiver = statsReceiver.scope("UserStore"),
|
||||
keyToString = { k: Long =>
|
||||
s"usri/$k"
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* TweetyPie store, used to fetch tweet objects when unavailable, and also as a source of
|
||||
* tweet SafetyLevel filtering.
|
||||
* Note: we do NOT cache TweetyPie calls, as it makes tweet SafetyLevel filtering less accurate.
|
||||
* TweetyPie QPS is < 20K/cluster.
|
||||
* More info is here:
|
||||
* https://cgit.twitter.biz/source/tree/src/thrift/com/twitter/spam/rtf/safety_level.thrift
|
||||
*/
|
||||
override val tweetyPieStore: ReadableStore[Long, TweetyPieResult] = {
|
||||
val getTweetOptions = Some(
|
||||
GetTweetOptions(
|
||||
includeCards = true,
|
||||
safetyLevel = Some(SafetyLevel.RecosWritePath)
|
||||
)
|
||||
)
|
||||
TweetyPieStore(
|
||||
tweetyPieClient,
|
||||
getTweetOptions,
|
||||
convertExceptionsToNotFound = false // Do not suppress TweetyPie errors. Leave it to caller
|
||||
)
|
||||
}
|
||||
|
||||
private val _urlInfoStore = {
|
||||
//Initialize pink store client, for parsing url
|
||||
UrlResolver(
|
||||
pinkStoreClient,
|
||||
statsReceiver.scope("urlFetcher"),
|
||||
clientId = ClientIdentifier.Recoshose
|
||||
)
|
||||
}
|
||||
|
||||
override val urlInfoStore: ReadableStore[String, UrlInfo] = {
|
||||
// memcache based cache
|
||||
val memcachedStore = ObservedMemcachedReadableStore.fromCacheClient(
|
||||
backingStore = _urlInfoStore,
|
||||
cacheClient = recosInjectorCoreSvcsCacheClient,
|
||||
ttl = 2.hours
|
||||
)(
|
||||
valueInjection = UrlInfoInjection,
|
||||
statsReceiver = statsReceiver.scope("UrlInfoStore"),
|
||||
keyToString = { k: String =>
|
||||
s"uisri/$k"
|
||||
}
|
||||
)
|
||||
|
||||
ObservedCachedReadableStore.from(
|
||||
memcachedStore,
|
||||
ttl = 1.minutes,
|
||||
maxKeys = 1e5.toInt,
|
||||
windowSize = 10000L,
|
||||
cacheName = "url_store_in_proc_cache"
|
||||
)(statsReceiver.scope("url_store_in_proc_cache"))
|
||||
}
|
||||
|
||||
override val socialGraphIdStore = ReadableStoreOfStitch { idsRequest: IdsRequest =>
|
||||
SocialGraph(sgsClient).ids(idsRequest)
|
||||
}
|
||||
|
||||
/**
|
||||
* MH Store for updating the last time user created a tweet
|
||||
*/
|
||||
val tweetCreationStore: TweetCreationTimeMHStore = {
|
||||
val client = ManhattanKVClient(
|
||||
appId = "recos_tweet_creation_info",
|
||||
dest = "/s/manhattan/omega.native-thrift",
|
||||
mtlsParams = ManhattanKVClientMtlsParams(serviceIdentifier)
|
||||
)
|
||||
|
||||
val endpoint = ManhattanKVEndpointBuilder(client)
|
||||
.defaultMaxTimeout(700.milliseconds)
|
||||
.statsReceiver(
|
||||
statsReceiver
|
||||
.scope(serviceIdentifier.zone)
|
||||
.scope(serviceIdentifier.environment)
|
||||
.scope("recos_injector_tweet_creation_info_store")
|
||||
)
|
||||
.build()
|
||||
|
||||
val dataset = if (serviceIdentifier.environment == "prod") {
|
||||
"recos_injector_tweet_creation_info"
|
||||
} else {
|
||||
"recos_injector_tweet_creation_info_staging"
|
||||
}
|
||||
|
||||
new TweetCreationTimeMHStore(
|
||||
cluster = serviceIdentifier.zone,
|
||||
endpoint = endpoint,
|
||||
dataset = dataset,
|
||||
writeTtl = Some(14.days),
|
||||
statsReceiver.scope("recos_injector_tweet_creation_info_store")
|
||||
)
|
||||
}
|
||||
|
||||
// wait for all serversets to populate
|
||||
override def init(): Future[Unit] = ClientRegistry.expAllRegisteredClientsResolved().unit
|
||||
}
|
|
@ -1,29 +0,0 @@
|
|||
package com.twitter.recosinjector.config
|
||||
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.thrift.ClientId
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDecider
|
||||
|
||||
case class ProdConfig(
|
||||
override val serviceIdentifier: ServiceIdentifier
|
||||
)(implicit val statsReceiver: StatsReceiver) extends {
|
||||
// Due to trait initialization logic in Scala, any abstract members declared in Config or
|
||||
// DeployConfig should be declared in this block. Otherwise the abstract member might initialize
|
||||
// to null if invoked before before object creation finishing.
|
||||
|
||||
val recosInjectorThriftClientId = ClientId("recos-injector.prod")
|
||||
|
||||
val outputKafkaTopicPrefix = "recos_injector"
|
||||
|
||||
val log = Logger("ProdConfig")
|
||||
|
||||
val recosInjectorCoreSvcsCacheDest = "/srv#/prod/local/cache/recos_metadata"
|
||||
|
||||
val recosInjectorDecider = RecosInjectorDecider(
|
||||
isProd = true,
|
||||
dataCenter = serviceIdentifier.zone
|
||||
)
|
||||
|
||||
} with DeployConfig
|
|
@ -1,33 +0,0 @@
|
|||
package com.twitter.recosinjector.config
|
||||
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.thrift.ClientId
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDecider
|
||||
|
||||
case class StagingConfig(
|
||||
override val serviceIdentifier: ServiceIdentifier
|
||||
)(
|
||||
implicit val statsReceiver: StatsReceiver)
|
||||
extends {
|
||||
// Due to trait initialization logic in Scala, any abstract members declared in Config or
|
||||
// DeployConfig should be declared in this block. Otherwise the abstract member might initialize
|
||||
// to null if invoked before before object creation finishing.
|
||||
|
||||
val recosInjectorThriftClientId = ClientId("recos-injector.staging")
|
||||
|
||||
val outputKafkaTopicPrefix = "staging_recos_injector"
|
||||
|
||||
val log = Logger("StagingConfig")
|
||||
|
||||
val recosInjectorCoreSvcsCacheDest = "/srv#/test/local/cache/twemcache_recos"
|
||||
|
||||
val recosInjectorDecider = RecosInjectorDecider(
|
||||
isProd = false,
|
||||
dataCenter = serviceIdentifier.zone
|
||||
)
|
||||
|
||||
val abDeciderLoggerNode = "staging_abdecider_scribe"
|
||||
|
||||
} with DeployConfig
|
|
@ -1,7 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"decider/src/main/scala",
|
||||
],
|
||||
)
|
|
@ -1,33 +0,0 @@
|
|||
package com.twitter.recosinjector.decider
|
||||
|
||||
import com.twitter.decider.{Decider, DeciderFactory, RandomRecipient, Recipient}
|
||||
|
||||
case class RecosInjectorDecider(isProd: Boolean, dataCenter: String) {
|
||||
lazy val decider: Decider = DeciderFactory(
|
||||
Some("config/decider.yml"),
|
||||
Some(getOverlayPath(isProd, dataCenter))
|
||||
)()
|
||||
|
||||
private def getOverlayPath(isProd: Boolean, dataCenter: String): String = {
|
||||
if (isProd) {
|
||||
s"/usr/local/config/overlays/recos-injector/recos-injector/prod/$dataCenter/decider_overlay.yml"
|
||||
} else {
|
||||
s"/usr/local/config/overlays/recos-injector/recos-injector/staging/$dataCenter/decider_overlay.yml"
|
||||
}
|
||||
}
|
||||
|
||||
def getDecider: Decider = decider
|
||||
|
||||
def isAvailable(feature: String, recipient: Option[Recipient]): Boolean = {
|
||||
decider.isAvailable(feature, recipient)
|
||||
}
|
||||
|
||||
def isAvailable(feature: String): Boolean = isAvailable(feature, Some(RandomRecipient))
|
||||
}
|
||||
|
||||
object RecosInjectorDeciderConstants {
|
||||
val TweetEventTransformerUserTweetEntityEdgesDecider =
|
||||
"tweet_event_transformer_user_tweet_entity_edges"
|
||||
val EnableEmitTweetEdgeFromReply = "enable_emit_tweet_edge_from_reply"
|
||||
val EnableUnfavoriteEdge = "enable_unfavorite_edge"
|
||||
}
|
|
@ -1,23 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/storehaus:core",
|
||||
"finagle/finagle-stats",
|
||||
"frigate/frigate-common/src/main/scala/com/twitter/frigate/common/store",
|
||||
"hermit/hermit-core/src/main/scala/com/twitter/hermit/predicate/socialgraph",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/clients",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/decider",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/filters",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/publishers",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/util",
|
||||
"src/scala/com/twitter/recos/util:recos-util",
|
||||
"src/thrift/com/twitter/recos:recos-injector-scala",
|
||||
"src/thrift/com/twitter/recos:recos-internal-scala",
|
||||
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||
"src/thrift/com/twitter/timelineservice/server/internal:thrift-scala",
|
||||
"src/thrift/com/twitter/tweetypie:events-scala",
|
||||
"src/thrift/com/twitter/tweetypie:tweet-scala",
|
||||
],
|
||||
)
|
|
@ -1,87 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.recos.internal.thriftscala.RecosHoseMessage
|
||||
import com.twitter.recos.recos_injector.thriftscala.{Features, UserTweetAuthorGraphMessage}
|
||||
import com.twitter.recos.util.Action.Action
|
||||
import com.twitter.recosinjector.util.TweetDetails
|
||||
import scala.collection.Map
|
||||
|
||||
trait Edge {
|
||||
// RecosHoseMessage is the thrift struct that the graphs consume.
|
||||
def convertToRecosHoseMessage: RecosHoseMessage
|
||||
|
||||
// UserTweetAuthorGraphMessage is the thrift struct that user_tweet_author_graph consumes.
|
||||
def convertToUserTweetAuthorGraphMessage: UserTweetAuthorGraphMessage
|
||||
}
|
||||
|
||||
/**
|
||||
* Edge corresponding to UserTweetEntityEdge.
|
||||
* It captures user-tweet interactions: Create, Like, Retweet, Reply etc.
|
||||
*/
|
||||
case class UserTweetEntityEdge(
|
||||
sourceUser: Long,
|
||||
targetTweet: Long,
|
||||
action: Action,
|
||||
cardInfo: Option[Byte],
|
||||
metadata: Option[Long],
|
||||
entitiesMap: Option[Map[Byte, Seq[Int]]],
|
||||
tweetDetails: Option[TweetDetails])
|
||||
extends Edge {
|
||||
|
||||
override def convertToRecosHoseMessage: RecosHoseMessage = {
|
||||
RecosHoseMessage(
|
||||
leftId = sourceUser,
|
||||
rightId = targetTweet,
|
||||
action = action.id.toByte,
|
||||
card = cardInfo,
|
||||
entities = entitiesMap,
|
||||
edgeMetadata = metadata
|
||||
)
|
||||
}
|
||||
|
||||
private def getFeatures(tweetDetails: TweetDetails): Features = {
|
||||
Features(
|
||||
hasPhoto = Some(tweetDetails.hasPhoto),
|
||||
hasVideo = Some(tweetDetails.hasVideo),
|
||||
hasUrl = Some(tweetDetails.hasUrl),
|
||||
hasHashtag = Some(tweetDetails.hasHashtag)
|
||||
)
|
||||
}
|
||||
|
||||
override def convertToUserTweetAuthorGraphMessage: UserTweetAuthorGraphMessage = {
|
||||
UserTweetAuthorGraphMessage(
|
||||
leftId = sourceUser,
|
||||
rightId = targetTweet,
|
||||
action = action.id.toByte,
|
||||
card = cardInfo,
|
||||
authorId = tweetDetails.flatMap(_.authorId),
|
||||
features = tweetDetails.map(getFeatures)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Edge corresponding to UserUserGraph.
|
||||
* It captures user-user interactions: Follow, Mention, Mediatag.
|
||||
*/
|
||||
case class UserUserEdge(
|
||||
sourceUser: Long,
|
||||
targetUser: Long,
|
||||
action: Action,
|
||||
metadata: Option[Long])
|
||||
extends Edge {
|
||||
override def convertToRecosHoseMessage: RecosHoseMessage = {
|
||||
RecosHoseMessage(
|
||||
leftId = sourceUser,
|
||||
rightId = targetUser,
|
||||
action = action.id.toByte,
|
||||
edgeMetadata = metadata
|
||||
)
|
||||
}
|
||||
|
||||
override def convertToUserTweetAuthorGraphMessage: UserTweetAuthorGraphMessage = {
|
||||
throw new RuntimeException(
|
||||
"convertToUserTweetAuthorGraphMessage not implemented in UserUserEdge.")
|
||||
}
|
||||
|
||||
}
|
|
@ -1,82 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.base.Stats.track
|
||||
import com.twitter.util.Future
|
||||
|
||||
/**
|
||||
* This is the generic interface that converts incoming Events (ex. TweetEvent, FavEvent, etc)
|
||||
* into Edge for a specific output graph. It applies the following flow:
|
||||
*
|
||||
* event -> update event stats -> build edges -> filter edges
|
||||
*
|
||||
* Top-level statistics are provided for each step, such as latency and number of events
|
||||
*/
|
||||
trait EventToMessageBuilder[Event, E <: Edge] {
|
||||
implicit val statsReceiver: StatsReceiver
|
||||
|
||||
private lazy val processEventStats = statsReceiver.scope("process_event")
|
||||
private lazy val numEventsStats = statsReceiver.counter("num_process_event")
|
||||
private lazy val rejectEventStats = statsReceiver.counter("num_reject_event")
|
||||
private lazy val buildEdgesStats = statsReceiver.scope("build")
|
||||
private lazy val numAllEdgesStats = buildEdgesStats.counter("num_all_edges")
|
||||
private lazy val filterEdgesStats = statsReceiver.scope("filter")
|
||||
private lazy val numValidEdgesStats = statsReceiver.counter("num_valid_edges")
|
||||
private lazy val numRecosHoseMessageStats = statsReceiver.counter("num_RecosHoseMessage")
|
||||
|
||||
/**
|
||||
* Given an incoming event, process and convert it into a sequence of RecosHoseMessages
|
||||
* @param event
|
||||
* @return
|
||||
*/
|
||||
def processEvent(event: Event): Future[Seq[Edge]] = {
|
||||
track(processEventStats) {
|
||||
shouldProcessEvent(event).flatMap {
|
||||
case true =>
|
||||
numEventsStats.incr()
|
||||
updateEventStatus(event)
|
||||
for {
|
||||
allEdges <- track(buildEdgesStats)(buildEdges(event))
|
||||
filteredEdges <- track(filterEdgesStats)(filterEdges(event, allEdges))
|
||||
} yield {
|
||||
numAllEdgesStats.incr(allEdges.size)
|
||||
numValidEdgesStats.incr(filteredEdges.size)
|
||||
numRecosHoseMessageStats.incr(filteredEdges.size)
|
||||
filteredEdges
|
||||
}
|
||||
case false =>
|
||||
rejectEventStats.incr()
|
||||
Future.Nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Pre-process filter that determines whether the given event should be used to build edges.
|
||||
* @param event
|
||||
* @return
|
||||
*/
|
||||
def shouldProcessEvent(event: Event): Future[Boolean]
|
||||
|
||||
/**
|
||||
* Update cache/event logging related to the specific event.
|
||||
* By default, no action will be taken. Override when necessary
|
||||
* @param event
|
||||
*/
|
||||
def updateEventStatus(event: Event): Unit = {}
|
||||
|
||||
/**
|
||||
* Given an event, extract info and build a sequence of edges
|
||||
* @param event
|
||||
* @return
|
||||
*/
|
||||
def buildEdges(event: Event): Future[Seq[E]]
|
||||
|
||||
/**
|
||||
* Given a sequence of edges, filter and return the valid edges
|
||||
* @param event
|
||||
* @param edges
|
||||
* @return
|
||||
*/
|
||||
def filterEdges(event: Event, edges: Seq[E]): Future[Seq[E]]
|
||||
}
|
|
@ -1,73 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.socialgraph.thriftscala.{
|
||||
Action => SocialGraphAction,
|
||||
FollowGraphEvent,
|
||||
FollowType,
|
||||
WriteEvent
|
||||
}
|
||||
import com.twitter.util.Future
|
||||
|
||||
/**
|
||||
* Converts a WriteEvent to UserUserGraph's messages, including Mention and Mediatag messages
|
||||
*/
|
||||
class SocialWriteEventToUserUserGraphBuilder()(override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[WriteEvent, UserUserEdge] {
|
||||
private val followOrFrictionlessFollowCounter =
|
||||
statsReceiver.counter("num_follow_or_frictionless")
|
||||
private val notFollowOrFrictionlessFollowCounter =
|
||||
statsReceiver.counter("num_not_follow_or_frictionless")
|
||||
private val followEdgeCounter = statsReceiver.counter("num_follow_edge")
|
||||
|
||||
/**
|
||||
* For now, we are only interested in Follow events
|
||||
*/
|
||||
override def shouldProcessEvent(event: WriteEvent): Future[Boolean] = {
|
||||
event.action match {
|
||||
case SocialGraphAction.Follow | SocialGraphAction.FrictionlessFollow =>
|
||||
followOrFrictionlessFollowCounter.incr()
|
||||
Future(true)
|
||||
case _ =>
|
||||
notFollowOrFrictionlessFollowCounter.incr()
|
||||
Future(false)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine whether a Follow event is valid/error free.
|
||||
*/
|
||||
private def isValidFollowEvent(followEvent: FollowGraphEvent): Boolean = {
|
||||
followEvent.followType match {
|
||||
case Some(FollowType.NormalFollow) | Some(FollowType.FrictionlessFollow) =>
|
||||
followEvent.result.validationError.isEmpty
|
||||
case _ =>
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
override def buildEdges(event: WriteEvent): Future[Seq[UserUserEdge]] = {
|
||||
val userUserEdges = event.follow
|
||||
.map(_.collect {
|
||||
case followEvent if isValidFollowEvent(followEvent) =>
|
||||
val sourceUserId = followEvent.result.request.source
|
||||
val targetUserId = followEvent.result.request.target
|
||||
followEdgeCounter.incr()
|
||||
UserUserEdge(
|
||||
sourceUserId,
|
||||
targetUserId,
|
||||
Action.Follow,
|
||||
Some(System.currentTimeMillis())
|
||||
)
|
||||
}).getOrElse(Nil)
|
||||
Future(userUserEdges)
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: WriteEvent,
|
||||
edges: Seq[UserUserEdge]
|
||||
): Future[Seq[UserUserEdge]] = {
|
||||
Future(edges)
|
||||
}
|
||||
}
|
|
@ -1,60 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.util.TweetFavoriteEventDetails
|
||||
import com.twitter.util.Future
|
||||
|
||||
class TimelineEventToUserTweetEntityGraphBuilder(
|
||||
userTweetEntityEdgeBuilder: UserTweetEntityEdgeBuilder
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[TweetFavoriteEventDetails, UserTweetEntityEdge] {
|
||||
|
||||
private val numFavEdgeCounter = statsReceiver.counter("num_favorite_edge")
|
||||
private val numUnfavEdgeCounter = statsReceiver.counter("num_unfavorite_edge")
|
||||
|
||||
override def shouldProcessEvent(event: TweetFavoriteEventDetails): Future[Boolean] = {
|
||||
Future(true)
|
||||
}
|
||||
|
||||
override def buildEdges(details: TweetFavoriteEventDetails): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val engagement = details.userTweetEngagement
|
||||
val tweetDetails = engagement.tweetDetails
|
||||
|
||||
val entitiesMapFut = userTweetEntityEdgeBuilder.getEntitiesMapAndUpdateCache(
|
||||
tweetId = engagement.tweetId,
|
||||
tweetDetails = tweetDetails
|
||||
)
|
||||
|
||||
entitiesMapFut
|
||||
.map { entitiesMap =>
|
||||
UserTweetEntityEdge(
|
||||
sourceUser = engagement.engageUserId,
|
||||
targetTweet = engagement.tweetId,
|
||||
action = engagement.action,
|
||||
metadata = engagement.engagementTimeMillis,
|
||||
cardInfo = engagement.tweetDetails.map(_.cardInfo.toByte),
|
||||
entitiesMap = entitiesMap,
|
||||
tweetDetails = tweetDetails
|
||||
)
|
||||
}
|
||||
.map { edge =>
|
||||
edge match {
|
||||
case fav if fav.action == Action.Favorite =>
|
||||
numFavEdgeCounter.incr()
|
||||
case unfav if unfav.action == Action.Unfavorite =>
|
||||
numUnfavEdgeCounter.incr()
|
||||
case _ =>
|
||||
}
|
||||
Seq(edge)
|
||||
}
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: TweetFavoriteEventDetails,
|
||||
edges: Seq[UserTweetEntityEdge]
|
||||
): Future[Seq[UserTweetEntityEdge]] = {
|
||||
Future(edges)
|
||||
}
|
||||
}
|
|
@ -1,54 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.util.TweetFavoriteEventDetails
|
||||
import com.twitter.util.Future
|
||||
|
||||
class TimelineEventToUserTweetGraphBuilder(
|
||||
userTweetEntityEdgeBuilder: UserTweetEntityEdgeBuilder
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[TweetFavoriteEventDetails, UserTweetEntityEdge] {
|
||||
|
||||
override def shouldProcessEvent(event: TweetFavoriteEventDetails): Future[Boolean] = {
|
||||
Future(true)
|
||||
}
|
||||
|
||||
override def buildEdges(details: TweetFavoriteEventDetails): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val engagement = details.userTweetEngagement
|
||||
|
||||
engagement.action match {
|
||||
case Action.Favorite =>
|
||||
val tweetDetails = engagement.tweetDetails
|
||||
|
||||
val entitiesMapFut = userTweetEntityEdgeBuilder.getEntitiesMapAndUpdateCache(
|
||||
tweetId = engagement.tweetId,
|
||||
tweetDetails = tweetDetails
|
||||
)
|
||||
|
||||
entitiesMapFut
|
||||
.map { entitiesMap =>
|
||||
UserTweetEntityEdge(
|
||||
sourceUser = engagement.engageUserId,
|
||||
targetTweet = engagement.tweetId,
|
||||
action = engagement.action,
|
||||
metadata = engagement.engagementTimeMillis,
|
||||
cardInfo = engagement.tweetDetails.map(_.cardInfo.toByte),
|
||||
entitiesMap = entitiesMap,
|
||||
tweetDetails = tweetDetails
|
||||
)
|
||||
}
|
||||
.map(Seq(_))
|
||||
|
||||
case _ => Future.Nil
|
||||
}
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: TweetFavoriteEventDetails,
|
||||
edges: Seq[UserTweetEntityEdge]
|
||||
): Future[Seq[UserTweetEntityEdge]] = {
|
||||
Future(edges)
|
||||
}
|
||||
}
|
|
@ -1,343 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.TweetCreationTimeMHStore
|
||||
import com.twitter.frigate.common.util.SnowflakeUtils
|
||||
import com.twitter.recos.internal.thriftscala.{RecosUserTweetInfo, TweetType}
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDecider
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDeciderConstants
|
||||
import com.twitter.recosinjector.util.TweetCreateEventDetails
|
||||
import com.twitter.util.{Future, Time}
|
||||
|
||||
class TweetEventToUserTweetEntityGraphBuilder(
|
||||
userTweetEntityEdgeBuilder: UserTweetEntityEdgeBuilder,
|
||||
tweetCreationStore: TweetCreationTimeMHStore,
|
||||
decider: RecosInjectorDecider
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[TweetCreateEventDetails, UserTweetEntityEdge] {
|
||||
|
||||
// TweetCreationStore counters
|
||||
private val lastTweetTimeNotInMh = statsReceiver.counter("last_tweet_time_not_in_mh")
|
||||
private val tweetCreationStoreInserts = statsReceiver.counter("tweet_creation_store_inserts")
|
||||
|
||||
private val numInvalidActionCounter = statsReceiver.counter("num_invalid_tweet_action")
|
||||
|
||||
private val numTweetEdgesCounter = statsReceiver.counter("num_tweet_edge")
|
||||
private val numRetweetEdgesCounter = statsReceiver.counter("num_retweet_edge")
|
||||
private val numReplyEdgesCounter = statsReceiver.counter("num_reply_edge")
|
||||
private val numQuoteEdgesCounter = statsReceiver.counter("num_quote_edge")
|
||||
private val numIsMentionedEdgesCounter = statsReceiver.counter("num_isMentioned_edge")
|
||||
private val numIsMediataggedEdgesCounter = statsReceiver.counter("num_isMediatagged_edge")
|
||||
|
||||
private val numIsDecider = statsReceiver.counter("num_decider_enabled")
|
||||
private val numIsNotDecider = statsReceiver.counter("num_decider_not_enabled")
|
||||
|
||||
override def shouldProcessEvent(event: TweetCreateEventDetails): Future[Boolean] = {
|
||||
val isDecider = decider.isAvailable(
|
||||
RecosInjectorDeciderConstants.TweetEventTransformerUserTweetEntityEdgesDecider
|
||||
)
|
||||
if (isDecider) {
|
||||
numIsDecider.incr()
|
||||
Future(true)
|
||||
} else {
|
||||
numIsNotDecider.incr()
|
||||
Future(false)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build edges Reply event. Reply event emits 2 edges:
|
||||
* author -> Reply -> SourceTweetId
|
||||
* author -> Tweet -> ReplyId
|
||||
* Do not associate entities in reply tweet to the source tweet
|
||||
*/
|
||||
private def buildReplyEdge(event: TweetCreateEventDetails) = {
|
||||
val userTweetEngagement = event.userTweetEngagement
|
||||
val authorId = userTweetEngagement.engageUserId
|
||||
|
||||
val replyEdgeFut = event.sourceTweetDetails
|
||||
.map { sourceTweetDetails =>
|
||||
val sourceTweetId = sourceTweetDetails.tweet.id
|
||||
val sourceTweetEntitiesMapFut = userTweetEntityEdgeBuilder.getEntitiesMapAndUpdateCache(
|
||||
tweetId = sourceTweetId,
|
||||
tweetDetails = Some(sourceTweetDetails)
|
||||
)
|
||||
|
||||
sourceTweetEntitiesMapFut.map { sourceTweetEntitiesMap =>
|
||||
val replyEdge = UserTweetEntityEdge(
|
||||
sourceUser = authorId,
|
||||
targetTweet = sourceTweetId,
|
||||
action = Action.Reply,
|
||||
metadata = Some(userTweetEngagement.tweetId),
|
||||
cardInfo = Some(sourceTweetDetails.cardInfo.toByte),
|
||||
entitiesMap = sourceTweetEntitiesMap,
|
||||
tweetDetails = Some(sourceTweetDetails)
|
||||
)
|
||||
numReplyEdgesCounter.incr()
|
||||
Some(replyEdge)
|
||||
}
|
||||
}.getOrElse(Future.None)
|
||||
|
||||
val tweetCreationEdgeFut =
|
||||
if (decider.isAvailable(RecosInjectorDeciderConstants.EnableEmitTweetEdgeFromReply)) {
|
||||
getAndUpdateLastTweetCreationTime(
|
||||
authorId = authorId,
|
||||
tweetId = userTweetEngagement.tweetId,
|
||||
tweetType = TweetType.Reply
|
||||
).map { lastTweetTime =>
|
||||
val edge = UserTweetEntityEdge(
|
||||
sourceUser = authorId,
|
||||
targetTweet = userTweetEngagement.tweetId,
|
||||
action = Action.Tweet,
|
||||
metadata = lastTweetTime,
|
||||
cardInfo = userTweetEngagement.tweetDetails.map(_.cardInfo.toByte),
|
||||
entitiesMap = None,
|
||||
tweetDetails = userTweetEngagement.tweetDetails
|
||||
)
|
||||
numTweetEdgesCounter.incr()
|
||||
Some(edge)
|
||||
}
|
||||
} else {
|
||||
Future.None
|
||||
}
|
||||
|
||||
Future.join(replyEdgeFut, tweetCreationEdgeFut).map {
|
||||
case (replyEdgeOpt, tweetCreationEdgeOpt) =>
|
||||
tweetCreationEdgeOpt.toSeq ++ replyEdgeOpt.toSeq
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Retweet UTEG edge: author -> RT -> SourceTweetId.
|
||||
*/
|
||||
private def buildRetweetEdge(event: TweetCreateEventDetails) = {
|
||||
val userTweetEngagement = event.userTweetEngagement
|
||||
val tweetId = userTweetEngagement.tweetId
|
||||
|
||||
event.sourceTweetDetails
|
||||
.map { sourceTweetDetails =>
|
||||
val sourceTweetId = sourceTweetDetails.tweet.id // Id of the tweet being Retweeted
|
||||
val sourceTweetEntitiesMapFut = userTweetEntityEdgeBuilder.getEntitiesMapAndUpdateCache(
|
||||
tweetId = sourceTweetId,
|
||||
tweetDetails = Some(sourceTweetDetails)
|
||||
)
|
||||
|
||||
sourceTweetEntitiesMapFut.map { sourceTweetEntitiesMap =>
|
||||
val edge = UserTweetEntityEdge(
|
||||
sourceUser = userTweetEngagement.engageUserId,
|
||||
targetTweet = sourceTweetId,
|
||||
action = Action.Retweet,
|
||||
metadata = Some(tweetId), // metadata is the tweetId
|
||||
cardInfo = Some(sourceTweetDetails.cardInfo.toByte),
|
||||
entitiesMap = sourceTweetEntitiesMap,
|
||||
tweetDetails = Some(sourceTweetDetails)
|
||||
)
|
||||
numRetweetEdgesCounter.incr()
|
||||
Seq(edge)
|
||||
}
|
||||
}.getOrElse(Future.Nil)
|
||||
}
|
||||
|
||||
/**
|
||||
* Build edges for a Quote event. Quote tweet emits 2 edges:
|
||||
* 1. A quote social proof: author -> Quote -> SourceTweetId
|
||||
* 2. A tweet creation edge: author -> Tweet -> QuoteTweetId
|
||||
*/
|
||||
private def buildQuoteEdges(
|
||||
event: TweetCreateEventDetails
|
||||
): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val userTweetEngagement = event.userTweetEngagement
|
||||
val tweetId = userTweetEngagement.tweetId
|
||||
val authorId = userTweetEngagement.engageUserId
|
||||
|
||||
// do not associate entities in quote tweet to the source tweet,
|
||||
// but associate entities to quote tweet in tweet creation event
|
||||
val quoteTweetEdgeFut = event.sourceTweetDetails
|
||||
.map { sourceTweetDetails =>
|
||||
val sourceTweetId = sourceTweetDetails.tweet.id // Id of the tweet being quoted
|
||||
val sourceTweetEntitiesMapFut = userTweetEntityEdgeBuilder.getEntitiesMapAndUpdateCache(
|
||||
tweetId = sourceTweetId,
|
||||
tweetDetails = event.sourceTweetDetails
|
||||
)
|
||||
|
||||
sourceTweetEntitiesMapFut.map { sourceTweetEntitiesMap =>
|
||||
val edge = UserTweetEntityEdge(
|
||||
sourceUser = authorId,
|
||||
targetTweet = sourceTweetId,
|
||||
action = Action.Quote,
|
||||
metadata = Some(tweetId), // metadata is tweetId
|
||||
cardInfo = Some(sourceTweetDetails.cardInfo.toByte), // cardInfo of the source tweet
|
||||
entitiesMap = sourceTweetEntitiesMap,
|
||||
tweetDetails = Some(sourceTweetDetails)
|
||||
)
|
||||
numQuoteEdgesCounter.incr()
|
||||
Seq(edge)
|
||||
}
|
||||
}.getOrElse(Future.Nil)
|
||||
|
||||
val tweetCreationEdgeFut = getAndUpdateLastTweetCreationTime(
|
||||
authorId = authorId,
|
||||
tweetId = tweetId,
|
||||
tweetType = TweetType.Quote
|
||||
).map { lastTweetTime =>
|
||||
val metadata = lastTweetTime
|
||||
val cardInfo = userTweetEngagement.tweetDetails.map(_.cardInfo.toByte)
|
||||
val edge = UserTweetEntityEdge(
|
||||
sourceUser = authorId,
|
||||
targetTweet = tweetId,
|
||||
action = Action.Tweet,
|
||||
metadata = metadata,
|
||||
cardInfo = cardInfo,
|
||||
entitiesMap = None,
|
||||
tweetDetails = userTweetEngagement.tweetDetails
|
||||
)
|
||||
numTweetEdgesCounter.incr()
|
||||
Seq(edge)
|
||||
}
|
||||
|
||||
Future.join(quoteTweetEdgeFut, tweetCreationEdgeFut).map {
|
||||
case (quoteEdge, creationEdge) =>
|
||||
quoteEdge ++ creationEdge
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build edges for a Tweet event. A Tweet emits 3 tyes edges:
|
||||
* 1. A tweet creation edge: author -> Tweet -> TweetId
|
||||
* 2. IsMentioned edges: mentionedUserId -> IsMentioned -> TweetId
|
||||
* 3. IsMediatagged edges: mediataggedUserId -> IsMediatagged -> TweetId
|
||||
*/
|
||||
private def buildTweetEdges(event: TweetCreateEventDetails): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val userTweetEngagement = event.userTweetEngagement
|
||||
val tweetDetails = userTweetEngagement.tweetDetails
|
||||
val tweetId = userTweetEngagement.tweetId
|
||||
val authorId = userTweetEngagement.engageUserId
|
||||
|
||||
val cardInfo = tweetDetails.map(_.cardInfo.toByte)
|
||||
|
||||
val entitiesMapFut = userTweetEntityEdgeBuilder.getEntitiesMapAndUpdateCache(
|
||||
tweetId = tweetId,
|
||||
tweetDetails = tweetDetails
|
||||
)
|
||||
|
||||
val lastTweetTimeFut = getAndUpdateLastTweetCreationTime(
|
||||
authorId = authorId,
|
||||
tweetId = tweetId,
|
||||
tweetType = TweetType.Tweet
|
||||
)
|
||||
|
||||
Future.join(entitiesMapFut, lastTweetTimeFut).map {
|
||||
case (entitiesMap, lastTweetTime) =>
|
||||
val tweetCreationEdge = UserTweetEntityEdge(
|
||||
sourceUser = authorId,
|
||||
targetTweet = tweetId,
|
||||
action = Action.Tweet,
|
||||
metadata = lastTweetTime,
|
||||
cardInfo = cardInfo,
|
||||
entitiesMap = entitiesMap,
|
||||
tweetDetails = userTweetEngagement.tweetDetails
|
||||
)
|
||||
numTweetEdgesCounter.incr()
|
||||
|
||||
val isMentionedEdges = event.validMentionUserIds
|
||||
.map(_.map { mentionedUserId =>
|
||||
UserTweetEntityEdge(
|
||||
sourceUser = mentionedUserId,
|
||||
targetTweet = tweetId,
|
||||
action = Action.IsMentioned,
|
||||
metadata = Some(tweetId),
|
||||
cardInfo = cardInfo,
|
||||
entitiesMap = entitiesMap,
|
||||
tweetDetails = userTweetEngagement.tweetDetails
|
||||
)
|
||||
}).getOrElse(Nil)
|
||||
numIsMentionedEdgesCounter.incr(isMentionedEdges.size)
|
||||
|
||||
val isMediataggedEdges = event.validMediatagUserIds
|
||||
.map(_.map { mediataggedUserId =>
|
||||
UserTweetEntityEdge(
|
||||
sourceUser = mediataggedUserId,
|
||||
targetTweet = tweetId,
|
||||
action = Action.IsMediaTagged,
|
||||
metadata = Some(tweetId),
|
||||
cardInfo = cardInfo,
|
||||
entitiesMap = entitiesMap,
|
||||
tweetDetails = userTweetEngagement.tweetDetails
|
||||
)
|
||||
}).getOrElse(Nil)
|
||||
numIsMediataggedEdgesCounter.incr(isMediataggedEdges.size)
|
||||
|
||||
Seq(tweetCreationEdge) ++ isMentionedEdges ++ isMediataggedEdges
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* For a given user, read the user's last time tweeted from the MH store, and
|
||||
* write the new tweet time into the MH store before returning.
|
||||
* Note this function is async, so the MH write operations will continue to execute on its own.
|
||||
* This might create a read/write race condition, but it's expected.
|
||||
*/
|
||||
private def getAndUpdateLastTweetCreationTime(
|
||||
authorId: Long,
|
||||
tweetId: Long,
|
||||
tweetType: TweetType
|
||||
): Future[Option[Long]] = {
|
||||
val newTweetInfo = RecosUserTweetInfo(
|
||||
authorId,
|
||||
tweetId,
|
||||
tweetType,
|
||||
SnowflakeUtils.tweetCreationTime(tweetId).map(_.inMillis).getOrElse(Time.now.inMillis)
|
||||
)
|
||||
|
||||
tweetCreationStore
|
||||
.get(authorId)
|
||||
.map(_.map { previousTweetInfoSeq =>
|
||||
val lastTweetTime = previousTweetInfoSeq
|
||||
.filter(info => info.tweetType == TweetType.Tweet || info.tweetType == TweetType.Quote)
|
||||
.map(_.tweetTimestamp)
|
||||
.sortBy(-_)
|
||||
.headOption // Fetch the latest time user Tweeted or Quoted
|
||||
.getOrElse(
|
||||
Time.Bottom.inMillis
|
||||
) // Last tweet time never recorded in MH, default to oldest point in time
|
||||
|
||||
if (lastTweetTime == Time.Bottom.inMillis) lastTweetTimeNotInMh.incr()
|
||||
lastTweetTime
|
||||
})
|
||||
.ensure {
|
||||
tweetCreationStore
|
||||
.put(authorId, newTweetInfo)
|
||||
.onSuccess(_ => tweetCreationStoreInserts.incr())
|
||||
.onFailure { e =>
|
||||
statsReceiver.counter("write_failed_with_ex:" + e.getClass.getName).incr()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
override def buildEdges(event: TweetCreateEventDetails): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val userTweetEngagement = event.userTweetEngagement
|
||||
userTweetEngagement.action match {
|
||||
case Action.Reply =>
|
||||
buildReplyEdge(event)
|
||||
case Action.Retweet =>
|
||||
buildRetweetEdge(event)
|
||||
case Action.Tweet =>
|
||||
buildTweetEdges(event)
|
||||
case Action.Quote =>
|
||||
buildQuoteEdges(event)
|
||||
case _ =>
|
||||
numInvalidActionCounter.incr()
|
||||
Future.Nil
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: TweetCreateEventDetails,
|
||||
edges: Seq[UserTweetEntityEdge]
|
||||
): Future[Seq[UserTweetEntityEdge]] = {
|
||||
Future(edges) // No filtering for now. Add more if needed
|
||||
}
|
||||
}
|
|
@ -1,88 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.store.TweetCreationTimeMHStore
|
||||
import com.twitter.frigate.common.util.SnowflakeUtils
|
||||
import com.twitter.recos.internal.thriftscala.RecosUserTweetInfo
|
||||
import com.twitter.recos.internal.thriftscala.TweetType
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDecider
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDeciderConstants
|
||||
import com.twitter.recosinjector.util.TweetCreateEventDetails
|
||||
import com.twitter.util.Future
|
||||
import com.twitter.util.Time
|
||||
|
||||
class TweetEventToUserTweetGraphBuilder(
|
||||
userTweetEntityEdgeBuilder: UserTweetEntityEdgeBuilder,
|
||||
tweetCreationStore: TweetCreationTimeMHStore,
|
||||
decider: RecosInjectorDecider
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[TweetCreateEventDetails, UserTweetEntityEdge] {
|
||||
|
||||
private val numRetweetEdgesCounter = statsReceiver.counter("num_retweet_edge")
|
||||
private val numIsDecider = statsReceiver.counter("num_decider_enabled")
|
||||
private val numIsNotDecider = statsReceiver.counter("num_decider_not_enabled")
|
||||
|
||||
override def shouldProcessEvent(event: TweetCreateEventDetails): Future[Boolean] = {
|
||||
val isDecider = decider.isAvailable(
|
||||
RecosInjectorDeciderConstants.TweetEventTransformerUserTweetEntityEdgesDecider
|
||||
)
|
||||
if (isDecider) {
|
||||
numIsDecider.incr()
|
||||
Future(true)
|
||||
} else {
|
||||
numIsNotDecider.incr()
|
||||
Future(false)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Build a Retweet edge: author -> RT -> SourceTweetId.
|
||||
*/
|
||||
private def buildRetweetEdge(event: TweetCreateEventDetails) = {
|
||||
val userTweetEngagement = event.userTweetEngagement
|
||||
val tweetId = userTweetEngagement.tweetId
|
||||
|
||||
event.sourceTweetDetails
|
||||
.map { sourceTweetDetails =>
|
||||
val sourceTweetId = sourceTweetDetails.tweet.id // Id of the tweet being Retweeted
|
||||
val sourceTweetEntitiesMapFut = userTweetEntityEdgeBuilder.getEntitiesMapAndUpdateCache(
|
||||
tweetId = sourceTweetId,
|
||||
tweetDetails = Some(sourceTweetDetails)
|
||||
)
|
||||
|
||||
sourceTweetEntitiesMapFut.map { sourceTweetEntitiesMap =>
|
||||
val edge = UserTweetEntityEdge(
|
||||
sourceUser = userTweetEngagement.engageUserId,
|
||||
targetTweet = sourceTweetId,
|
||||
action = Action.Retweet,
|
||||
metadata = Some(tweetId), // metadata is the tweetId
|
||||
cardInfo = Some(sourceTweetDetails.cardInfo.toByte),
|
||||
entitiesMap = sourceTweetEntitiesMap,
|
||||
tweetDetails = Some(sourceTweetDetails)
|
||||
)
|
||||
numRetweetEdgesCounter.incr()
|
||||
Seq(edge)
|
||||
}
|
||||
}.getOrElse(Future.Nil)
|
||||
}
|
||||
|
||||
override def buildEdges(event: TweetCreateEventDetails): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val userTweetEngagement = event.userTweetEngagement
|
||||
userTweetEngagement.action match {
|
||||
case Action.Retweet =>
|
||||
buildRetweetEdge(event)
|
||||
case _ =>
|
||||
Future.Nil
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: TweetCreateEventDetails,
|
||||
edges: Seq[UserTweetEntityEdge]
|
||||
): Future[Seq[UserTweetEntityEdge]] = {
|
||||
Future(edges) // No filtering for now. Add more if needed
|
||||
}
|
||||
}
|
|
@ -1,65 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.util.TweetCreateEventDetails
|
||||
import com.twitter.util.Future
|
||||
|
||||
/**
|
||||
* Given a tweet creation event, parse for UserUserGraph edges. Specifically, when a new tweet is
|
||||
* created, extract the valid mentioned and mediatagged users in the tweet and create edges for them
|
||||
*/
|
||||
class TweetEventToUserUserGraphBuilder(
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[TweetCreateEventDetails, UserUserEdge] {
|
||||
private val tweetOrQuoteEventCounter = statsReceiver.counter("num_tweet_or_quote_event")
|
||||
private val nonTweetOrQuoteEventCounter = statsReceiver.counter("num_non_tweet_or_quote_event")
|
||||
private val mentionEdgeCounter = statsReceiver.counter("num_mention_edge")
|
||||
private val mediatagEdgeCounter = statsReceiver.counter("num_mediatag_edge")
|
||||
|
||||
override def shouldProcessEvent(event: TweetCreateEventDetails): Future[Boolean] = {
|
||||
// For user interactions, only new tweets and quotes are considered (no replies or retweets)
|
||||
event.userTweetEngagement.action match {
|
||||
case Action.Tweet | Action.Quote =>
|
||||
tweetOrQuoteEventCounter.incr()
|
||||
Future(true)
|
||||
case _ =>
|
||||
nonTweetOrQuoteEventCounter.incr()
|
||||
Future(false)
|
||||
}
|
||||
}
|
||||
|
||||
override def buildEdges(event: TweetCreateEventDetails): Future[Seq[UserUserEdge]] = {
|
||||
val mentionEdges = event.validMentionUserIds
|
||||
.map(_.map { mentionUserId =>
|
||||
UserUserEdge(
|
||||
sourceUser = event.userTweetEngagement.engageUserId,
|
||||
targetUser = mentionUserId,
|
||||
action = Action.Mention,
|
||||
metadata = Some(System.currentTimeMillis())
|
||||
)
|
||||
}).getOrElse(Nil)
|
||||
|
||||
val mediatagEdges = event.validMediatagUserIds
|
||||
.map(_.map { mediatagUserId =>
|
||||
UserUserEdge(
|
||||
sourceUser = event.userTweetEngagement.engageUserId,
|
||||
targetUser = mediatagUserId,
|
||||
action = Action.MediaTag,
|
||||
metadata = Some(System.currentTimeMillis())
|
||||
)
|
||||
}).getOrElse(Nil)
|
||||
|
||||
mentionEdgeCounter.incr(mentionEdges.size)
|
||||
mediatagEdgeCounter.incr(mediatagEdges.size)
|
||||
Future(mentionEdges ++ mediatagEdges)
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: TweetCreateEventDetails,
|
||||
edges: Seq[UserUserEdge]
|
||||
): Future[Seq[UserUserEdge]] = {
|
||||
Future(edges)
|
||||
}
|
||||
}
|
|
@ -1,44 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.util.UuaEngagementEventDetails
|
||||
import com.twitter.util.Future
|
||||
|
||||
class UnifiedUserActionToUserAdGraphBuilder(
|
||||
userTweetEntityEdgeBuilder: UserTweetEntityEdgeBuilder
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[UuaEngagementEventDetails, UserTweetEntityEdge] {
|
||||
|
||||
override def shouldProcessEvent(event: UuaEngagementEventDetails): Future[Boolean] = {
|
||||
event.userTweetEngagement.action match {
|
||||
case Action.Click | Action.VideoPlayback75 | Action.Favorite => Future(true)
|
||||
case _ => Future(false)
|
||||
}
|
||||
}
|
||||
|
||||
override def buildEdges(details: UuaEngagementEventDetails): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val engagement = details.userTweetEngagement
|
||||
val tweetDetails = engagement.tweetDetails
|
||||
|
||||
Future.value(
|
||||
Seq(
|
||||
UserTweetEntityEdge(
|
||||
sourceUser = engagement.engageUserId,
|
||||
targetTweet = engagement.tweetId,
|
||||
action = engagement.action,
|
||||
metadata = engagement.engagementTimeMillis,
|
||||
cardInfo = engagement.tweetDetails.map(_.cardInfo.toByte),
|
||||
entitiesMap = None,
|
||||
tweetDetails = tweetDetails
|
||||
)))
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: UuaEngagementEventDetails,
|
||||
edges: Seq[UserTweetEntityEdge]
|
||||
): Future[Seq[UserTweetEntityEdge]] = {
|
||||
Future(edges)
|
||||
}
|
||||
}
|
|
@ -1,51 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.util.UuaEngagementEventDetails
|
||||
import com.twitter.util.Future
|
||||
|
||||
class UnifiedUserActionToUserTweetGraphPlusBuilder(
|
||||
userTweetEntityEdgeBuilder: UserTweetEntityEdgeBuilder
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[UuaEngagementEventDetails, UserTweetEntityEdge] {
|
||||
|
||||
override def shouldProcessEvent(event: UuaEngagementEventDetails): Future[Boolean] = {
|
||||
event.userTweetEngagement.action match {
|
||||
case Action.Click | Action.VideoQualityView => Future(true)
|
||||
case Action.Favorite | Action.Retweet | Action.Share => Future(true)
|
||||
case Action.NotificationOpen | Action.EmailClick => Future(true)
|
||||
case Action.Quote | Action.Reply => Future(true)
|
||||
case Action.TweetNotInterestedIn | Action.TweetNotRelevant | Action.TweetSeeFewer |
|
||||
Action.TweetReport | Action.TweetMuteAuthor | Action.TweetBlockAuthor =>
|
||||
Future(true)
|
||||
case _ => Future(false)
|
||||
}
|
||||
}
|
||||
|
||||
override def buildEdges(details: UuaEngagementEventDetails): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val engagement = details.userTweetEngagement
|
||||
val tweetDetails = engagement.tweetDetails
|
||||
|
||||
Future
|
||||
.value(
|
||||
UserTweetEntityEdge(
|
||||
sourceUser = engagement.engageUserId,
|
||||
targetTweet = engagement.tweetId,
|
||||
action = engagement.action,
|
||||
metadata = engagement.engagementTimeMillis,
|
||||
cardInfo = engagement.tweetDetails.map(_.cardInfo.toByte),
|
||||
entitiesMap = None,
|
||||
tweetDetails = tweetDetails
|
||||
)
|
||||
).map(Seq(_))
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: UuaEngagementEventDetails,
|
||||
edges: Seq[UserTweetEntityEdge]
|
||||
): Future[Seq[UserTweetEntityEdge]] = {
|
||||
Future(edges)
|
||||
}
|
||||
}
|
|
@ -1,56 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.util.UuaEngagementEventDetails
|
||||
import com.twitter.util.Future
|
||||
|
||||
class UnifiedUserActionToUserVideoGraphBuilder(
|
||||
userTweetEntityEdgeBuilder: UserTweetEntityEdgeBuilder
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventToMessageBuilder[UuaEngagementEventDetails, UserTweetEntityEdge] {
|
||||
|
||||
private val numVideoPlayback50EdgeCounter = statsReceiver.counter("num_video_playback50_edge")
|
||||
private val numUnVideoPlayback50Counter = statsReceiver.counter("num_non_video_playback50_edge")
|
||||
|
||||
override def shouldProcessEvent(event: UuaEngagementEventDetails): Future[Boolean] = {
|
||||
event.userTweetEngagement.action match {
|
||||
case Action.VideoPlayback50 => Future(true)
|
||||
case _ => Future(false)
|
||||
}
|
||||
}
|
||||
|
||||
override def buildEdges(details: UuaEngagementEventDetails): Future[Seq[UserTweetEntityEdge]] = {
|
||||
val engagement = details.userTweetEngagement
|
||||
val tweetDetails = engagement.tweetDetails
|
||||
|
||||
Future
|
||||
.value(
|
||||
UserTweetEntityEdge(
|
||||
sourceUser = engagement.engageUserId,
|
||||
targetTweet = engagement.tweetId,
|
||||
action = engagement.action,
|
||||
metadata = engagement.engagementTimeMillis,
|
||||
cardInfo = engagement.tweetDetails.map(_.cardInfo.toByte),
|
||||
entitiesMap = None,
|
||||
tweetDetails = tweetDetails
|
||||
)
|
||||
).map { edge =>
|
||||
edge match {
|
||||
case videoPlayback50 if videoPlayback50.action == Action.VideoPlayback50 =>
|
||||
numVideoPlayback50EdgeCounter.incr()
|
||||
case _ =>
|
||||
numUnVideoPlayback50Counter.incr()
|
||||
}
|
||||
Seq(edge)
|
||||
}
|
||||
}
|
||||
|
||||
override def filterEdges(
|
||||
event: UuaEngagementEventDetails,
|
||||
edges: Seq[UserTweetEntityEdge]
|
||||
): Future[Seq[UserTweetEntityEdge]] = {
|
||||
Future(edges)
|
||||
}
|
||||
}
|
|
@ -1,80 +0,0 @@
|
|||
package com.twitter.recosinjector.edges
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.graphjet.algorithms.RecommendationType
|
||||
import com.twitter.recosinjector.clients.CacheEntityEntry
|
||||
import com.twitter.recosinjector.clients.RecosHoseEntitiesCache
|
||||
import com.twitter.recosinjector.clients.UrlResolver
|
||||
import com.twitter.recosinjector.util.TweetDetails
|
||||
import com.twitter.util.Future
|
||||
import scala.collection.Map
|
||||
import scala.util.hashing.MurmurHash3
|
||||
|
||||
class UserTweetEntityEdgeBuilder(
|
||||
cache: RecosHoseEntitiesCache,
|
||||
urlResolver: UrlResolver
|
||||
)(
|
||||
implicit val stats: StatsReceiver) {
|
||||
|
||||
def getHashedEntities(entities: Seq[String]): Seq[Int] = {
|
||||
entities.map(MurmurHash3.stringHash)
|
||||
}
|
||||
|
||||
/**
|
||||
* Given the entities and their corresponding hashedIds, store the hashId->entity mapping into a
|
||||
* cache.
|
||||
* This is because UTEG edges only store the hashIds, and relies on the cache values to
|
||||
* recover the actual entities. This allows us to store integer values instead of string in the
|
||||
* edges to save space.
|
||||
*/
|
||||
private def storeEntitiesInCache(
|
||||
urlEntities: Seq[String],
|
||||
urlHashIds: Seq[Int]
|
||||
): Future[Unit] = {
|
||||
val urlCacheEntries = urlHashIds.zip(urlEntities).map {
|
||||
case (hashId, url) =>
|
||||
CacheEntityEntry(RecosHoseEntitiesCache.UrlPrefix, hashId, url)
|
||||
}
|
||||
cache.updateEntitiesCache(
|
||||
newCacheEntries = urlCacheEntries,
|
||||
stats = stats.scope("urlCache")
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* Return an entity mapping from GraphJet recType -> hash(entity)
|
||||
*/
|
||||
private def getEntitiesMap(
|
||||
urlHashIds: Seq[Int]
|
||||
) = {
|
||||
val entitiesMap = Seq(
|
||||
RecommendationType.URL.getValue.toByte -> urlHashIds
|
||||
).collect {
|
||||
case (keys, ids) if ids.nonEmpty => keys -> ids
|
||||
}.toMap
|
||||
if (entitiesMap.isEmpty) None else Some(entitiesMap)
|
||||
}
|
||||
|
||||
def getEntitiesMapAndUpdateCache(
|
||||
tweetId: Long,
|
||||
tweetDetails: Option[TweetDetails]
|
||||
): Future[Option[Map[Byte, Seq[Int]]]] = {
|
||||
val resolvedUrlFut = urlResolver
|
||||
.getResolvedUrls(
|
||||
urls = tweetDetails.flatMap(_.urls).getOrElse(Nil),
|
||||
tweetId = tweetId
|
||||
).map(_.values.toSeq)
|
||||
|
||||
resolvedUrlFut.map { resolvedUrls =>
|
||||
val urlEntities = resolvedUrls
|
||||
val urlHashIds = getHashedEntities(urlEntities)
|
||||
|
||||
// Async call to cache
|
||||
storeEntitiesInCache(
|
||||
urlEntities = urlEntities,
|
||||
urlHashIds = urlHashIds
|
||||
)
|
||||
getEntitiesMap(urlHashIds)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,20 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"eventbus/client",
|
||||
"recos-injector/server/config",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/clients",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/config",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/decider",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/edges",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/publishers",
|
||||
"src/thrift/com/twitter/clientapp/gen:clientapp-scala",
|
||||
"src/thrift/com/twitter/gizmoduck:user-thrift-scala",
|
||||
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||
"src/thrift/com/twitter/timelineservice/server/internal:thrift-scala",
|
||||
"src/thrift/com/twitter/tweetypie:events-scala",
|
||||
"src/thrift/com/twitter/tweetypie:tweet-scala",
|
||||
],
|
||||
)
|
|
@ -1,60 +0,0 @@
|
|||
package com.twitter.recosinjector.event_processors
|
||||
|
||||
import com.twitter.eventbus.client.{EventBusSubscriber, EventBusSubscriberBuilder}
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.logging.Logger
|
||||
import com.twitter.scrooge.{ThriftStruct, ThriftStructCodec}
|
||||
import com.twitter.util.Future
|
||||
|
||||
/**
|
||||
* Main processor class that handles incoming EventBus events, which take forms of a ThriftStruct.
|
||||
* This class is responsible for setting up the EventBus streams, and provides a processEvent()
|
||||
* where child classes can decide what to do with incoming events
|
||||
*/
|
||||
trait EventBusProcessor[Event <: ThriftStruct] {
|
||||
private val log = Logger()
|
||||
|
||||
implicit def statsReceiver: StatsReceiver
|
||||
|
||||
/**
|
||||
* Full name of the EventBus stream this processor listens to
|
||||
*/
|
||||
val eventBusStreamName: String
|
||||
|
||||
/**
|
||||
* the thriftStruct definition of the objects passed in from the EventBus streams, such as
|
||||
* TweetEvent, WriteEvent, etc.
|
||||
*/
|
||||
val thriftStruct: ThriftStructCodec[Event]
|
||||
|
||||
val serviceIdentifier: ServiceIdentifier
|
||||
|
||||
def processEvent(event: Event): Future[Unit]
|
||||
|
||||
private def getEventBusSubscriberBuilder: EventBusSubscriberBuilder[Event] =
|
||||
EventBusSubscriberBuilder()
|
||||
.subscriberId(eventBusStreamName)
|
||||
.serviceIdentifier(serviceIdentifier)
|
||||
.thriftStruct(thriftStruct)
|
||||
.numThreads(8)
|
||||
.fromAllZones(true) // Receives traffic from all data centers
|
||||
.skipToLatest(false) // Ensures we don't miss out on events during restart
|
||||
.statsReceiver(statsReceiver)
|
||||
|
||||
// lazy val ensures the subscriber is only initialized when start() is called
|
||||
private lazy val eventBusSubscriber = getEventBusSubscriberBuilder.build(processEvent)
|
||||
|
||||
def start(): EventBusSubscriber[Event] = eventBusSubscriber
|
||||
|
||||
def stop(): Unit = {
|
||||
eventBusSubscriber
|
||||
.close()
|
||||
.onSuccess { _ =>
|
||||
log.info(s"EventBus processor ${this.getClass.getSimpleName} is stopped")
|
||||
}
|
||||
.onFailure { ex: Throwable =>
|
||||
log.error(ex, s"Exception while stopping EventBus processor ${this.getClass.getSimpleName}")
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,33 +0,0 @@
|
|||
package com.twitter.recosinjector.event_processors
|
||||
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recosinjector.edges.{EventToMessageBuilder, UserUserEdge}
|
||||
import com.twitter.recosinjector.publishers.KafkaEventPublisher
|
||||
import com.twitter.scrooge.ThriftStructCodec
|
||||
import com.twitter.socialgraph.thriftscala.WriteEvent
|
||||
import com.twitter.util.Future
|
||||
|
||||
/**
|
||||
* This processor listens to events from social graphs services. In particular, a major use case is
|
||||
* to listen to user-user follow events.
|
||||
*/
|
||||
class SocialWriteEventProcessor(
|
||||
override val eventBusStreamName: String,
|
||||
override val thriftStruct: ThriftStructCodec[WriteEvent],
|
||||
override val serviceIdentifier: ServiceIdentifier,
|
||||
kafkaEventPublisher: KafkaEventPublisher,
|
||||
userUserGraphTopic: String,
|
||||
userUserGraphMessageBuilder: EventToMessageBuilder[WriteEvent, UserUserEdge]
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventBusProcessor[WriteEvent] {
|
||||
|
||||
override def processEvent(event: WriteEvent): Future[Unit] = {
|
||||
userUserGraphMessageBuilder.processEvent(event).map { edges =>
|
||||
edges.foreach { edge =>
|
||||
kafkaEventPublisher.publish(edge.convertToRecosHoseMessage, userUserGraphTopic)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,150 +0,0 @@
|
|||
package com.twitter.recosinjector.event_processors
|
||||
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recosinjector.clients.Gizmoduck
|
||||
import com.twitter.recosinjector.clients.Tweetypie
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDecider
|
||||
import com.twitter.recosinjector.decider.RecosInjectorDeciderConstants
|
||||
import com.twitter.recosinjector.edges.TimelineEventToUserTweetEntityGraphBuilder
|
||||
import com.twitter.recosinjector.filters.TweetFilter
|
||||
import com.twitter.recosinjector.filters.UserFilter
|
||||
import com.twitter.recosinjector.publishers.KafkaEventPublisher
|
||||
import com.twitter.recosinjector.util.TweetDetails
|
||||
import com.twitter.recosinjector.util.TweetFavoriteEventDetails
|
||||
import com.twitter.recosinjector.util.UserTweetEngagement
|
||||
import com.twitter.scrooge.ThriftStructCodec
|
||||
import com.twitter.timelineservice.thriftscala.FavoriteEvent
|
||||
import com.twitter.timelineservice.thriftscala.UnfavoriteEvent
|
||||
import com.twitter.timelineservice.thriftscala.{Event => TimelineEvent}
|
||||
import com.twitter.util.Future
|
||||
|
||||
/**
|
||||
* Processor for Timeline events, such as Favorite (liking) tweets
|
||||
*/
|
||||
class TimelineEventProcessor(
|
||||
override val eventBusStreamName: String,
|
||||
override val thriftStruct: ThriftStructCodec[TimelineEvent],
|
||||
override val serviceIdentifier: ServiceIdentifier,
|
||||
kafkaEventPublisher: KafkaEventPublisher,
|
||||
userTweetEntityGraphTopic: String,
|
||||
userTweetEntityGraphMessageBuilder: TimelineEventToUserTweetEntityGraphBuilder,
|
||||
decider: RecosInjectorDecider,
|
||||
gizmoduck: Gizmoduck,
|
||||
tweetypie: Tweetypie
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventBusProcessor[TimelineEvent] {
|
||||
|
||||
private val processEventDeciderCounter = statsReceiver.counter("num_process_timeline_event")
|
||||
private val numFavoriteEventCounter = statsReceiver.counter("num_favorite_event")
|
||||
private val numUnFavoriteEventCounter = statsReceiver.counter("num_unfavorite_event")
|
||||
private val numNotFavoriteEventCounter = statsReceiver.counter("num_not_favorite_event")
|
||||
|
||||
private val numSelfFavoriteCounter = statsReceiver.counter("num_self_favorite_event")
|
||||
private val numNullCastTweetCounter = statsReceiver.counter("num_null_cast_tweet")
|
||||
private val numTweetFailSafetyLevelCounter = statsReceiver.counter("num_fail_tweetypie_safety")
|
||||
private val numFavoriteUserUnsafeCounter = statsReceiver.counter("num_favorite_user_unsafe")
|
||||
private val engageUserFilter = new UserFilter(gizmoduck)(statsReceiver.scope("engage_user"))
|
||||
private val tweetFilter = new TweetFilter(tweetypie)
|
||||
|
||||
private val numProcessFavorite = statsReceiver.counter("num_process_favorite")
|
||||
private val numNoProcessFavorite = statsReceiver.counter("num_no_process_favorite")
|
||||
|
||||
private def getFavoriteEventDetails(
|
||||
favoriteEvent: FavoriteEvent
|
||||
): TweetFavoriteEventDetails = {
|
||||
|
||||
val engagement = UserTweetEngagement(
|
||||
engageUserId = favoriteEvent.userId,
|
||||
engageUser = favoriteEvent.user,
|
||||
action = Action.Favorite,
|
||||
engagementTimeMillis = Some(favoriteEvent.eventTimeMs),
|
||||
tweetId = favoriteEvent.tweetId, // the tweet, or source tweet if target tweet is a retweet
|
||||
tweetDetails = favoriteEvent.tweet.map(TweetDetails) // tweet always exists
|
||||
)
|
||||
TweetFavoriteEventDetails(userTweetEngagement = engagement)
|
||||
}
|
||||
|
||||
private def getUnfavoriteEventDetails(
|
||||
unfavoriteEvent: UnfavoriteEvent
|
||||
): TweetFavoriteEventDetails = {
|
||||
val engagement = UserTweetEngagement(
|
||||
engageUserId = unfavoriteEvent.userId,
|
||||
engageUser = unfavoriteEvent.user,
|
||||
action = Action.Unfavorite,
|
||||
engagementTimeMillis = Some(unfavoriteEvent.eventTimeMs),
|
||||
tweetId = unfavoriteEvent.tweetId, // the tweet, or source tweet if target tweet is a retweet
|
||||
tweetDetails = unfavoriteEvent.tweet.map(TweetDetails) // tweet always exists
|
||||
)
|
||||
TweetFavoriteEventDetails(userTweetEngagement = engagement)
|
||||
}
|
||||
|
||||
private def shouldProcessFavoriteEvent(event: TweetFavoriteEventDetails): Future[Boolean] = {
|
||||
val engagement = event.userTweetEngagement
|
||||
val engageUserId = engagement.engageUserId
|
||||
val tweetId = engagement.tweetId
|
||||
val authorIdOpt = engagement.tweetDetails.flatMap(_.authorId)
|
||||
|
||||
val isSelfFavorite = authorIdOpt.contains(engageUserId)
|
||||
val isNullCastTweet = engagement.tweetDetails.forall(_.isNullCastTweet)
|
||||
val isEngageUserSafeFut = engageUserFilter.filterByUserId(engageUserId)
|
||||
val isTweetPassSafetyFut = tweetFilter.filterForTweetypieSafetyLevel(tweetId)
|
||||
|
||||
Future.join(isEngageUserSafeFut, isTweetPassSafetyFut).map {
|
||||
case (isEngageUserSafe, isTweetPassSafety) =>
|
||||
if (isSelfFavorite) numSelfFavoriteCounter.incr()
|
||||
if (isNullCastTweet) numNullCastTweetCounter.incr()
|
||||
if (!isEngageUserSafe) numFavoriteUserUnsafeCounter.incr()
|
||||
if (!isTweetPassSafety) numTweetFailSafetyLevelCounter.incr()
|
||||
|
||||
!isSelfFavorite && !isNullCastTweet && isEngageUserSafe && isTweetPassSafety
|
||||
}
|
||||
}
|
||||
|
||||
private def processFavoriteEvent(favoriteEvent: FavoriteEvent): Future[Unit] = {
|
||||
val eventDetails = getFavoriteEventDetails(favoriteEvent)
|
||||
shouldProcessFavoriteEvent(eventDetails).map {
|
||||
case true =>
|
||||
numProcessFavorite.incr()
|
||||
// Convert the event for UserTweetEntityGraph
|
||||
userTweetEntityGraphMessageBuilder.processEvent(eventDetails).map { edges =>
|
||||
edges.foreach { edge =>
|
||||
kafkaEventPublisher.publish(edge.convertToRecosHoseMessage, userTweetEntityGraphTopic)
|
||||
}
|
||||
}
|
||||
case false =>
|
||||
numNoProcessFavorite.incr()
|
||||
}
|
||||
}
|
||||
|
||||
private def processUnFavoriteEvent(unFavoriteEvent: UnfavoriteEvent): Future[Unit] = {
|
||||
if (decider.isAvailable(RecosInjectorDeciderConstants.EnableUnfavoriteEdge)) {
|
||||
val eventDetails = getUnfavoriteEventDetails(unFavoriteEvent)
|
||||
// Convert the event for UserTweetEntityGraph
|
||||
userTweetEntityGraphMessageBuilder.processEvent(eventDetails).map { edges =>
|
||||
edges.foreach { edge =>
|
||||
kafkaEventPublisher.publish(edge.convertToRecosHoseMessage, userTweetEntityGraphTopic)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Future.Unit
|
||||
}
|
||||
}
|
||||
|
||||
override def processEvent(event: TimelineEvent): Future[Unit] = {
|
||||
processEventDeciderCounter.incr()
|
||||
event match {
|
||||
case TimelineEvent.Favorite(favoriteEvent: FavoriteEvent) =>
|
||||
numFavoriteEventCounter.incr()
|
||||
processFavoriteEvent(favoriteEvent)
|
||||
case TimelineEvent.Unfavorite(unFavoriteEvent: UnfavoriteEvent) =>
|
||||
numUnFavoriteEventCounter.incr()
|
||||
processUnFavoriteEvent(unFavoriteEvent)
|
||||
case _ =>
|
||||
numNotFavoriteEventCounter.incr()
|
||||
Future.Unit
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,256 +0,0 @@
|
|||
package com.twitter.recosinjector.event_processors
|
||||
|
||||
import com.twitter.finagle.mtls.authentication.ServiceIdentifier
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.frigate.common.util.SnowflakeUtils
|
||||
import com.twitter.gizmoduck.thriftscala.User
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recos.util.Action.Action
|
||||
import com.twitter.recosinjector.clients.Gizmoduck
|
||||
import com.twitter.recosinjector.clients.SocialGraph
|
||||
import com.twitter.recosinjector.clients.Tweetypie
|
||||
import com.twitter.recosinjector.edges.TweetEventToUserTweetEntityGraphBuilder
|
||||
import com.twitter.recosinjector.edges.TweetEventToUserUserGraphBuilder
|
||||
import com.twitter.recosinjector.filters.TweetFilter
|
||||
import com.twitter.recosinjector.filters.UserFilter
|
||||
import com.twitter.recosinjector.publishers.KafkaEventPublisher
|
||||
import com.twitter.recosinjector.util.TweetCreateEventDetails
|
||||
import com.twitter.recosinjector.util.TweetDetails
|
||||
import com.twitter.recosinjector.util.UserTweetEngagement
|
||||
import com.twitter.scrooge.ThriftStructCodec
|
||||
import com.twitter.tweetypie.thriftscala.Tweet
|
||||
import com.twitter.tweetypie.thriftscala.TweetCreateEvent
|
||||
import com.twitter.tweetypie.thriftscala.TweetEvent
|
||||
import com.twitter.tweetypie.thriftscala.TweetEventData
|
||||
import com.twitter.util.Future
|
||||
|
||||
/**
|
||||
* Event processor for tweet_events EventBus stream from Tweetypie. This stream provides all the
|
||||
* key events related to a new tweet, like Creation, Retweet, Quote Tweet, and Replying.
|
||||
* It also carries the entities/metadata information in a tweet, including
|
||||
* @ Mention, HashTag, MediaTag, URL, etc.
|
||||
*/
|
||||
class TweetEventProcessor(
|
||||
override val eventBusStreamName: String,
|
||||
override val thriftStruct: ThriftStructCodec[TweetEvent],
|
||||
override val serviceIdentifier: ServiceIdentifier,
|
||||
userUserGraphMessageBuilder: TweetEventToUserUserGraphBuilder,
|
||||
userUserGraphTopic: String,
|
||||
userTweetEntityGraphMessageBuilder: TweetEventToUserTweetEntityGraphBuilder,
|
||||
userTweetEntityGraphTopic: String,
|
||||
kafkaEventPublisher: KafkaEventPublisher,
|
||||
socialGraph: SocialGraph,
|
||||
gizmoduck: Gizmoduck,
|
||||
tweetypie: Tweetypie
|
||||
)(
|
||||
override implicit val statsReceiver: StatsReceiver)
|
||||
extends EventBusProcessor[TweetEvent] {
|
||||
|
||||
private val tweetCreateEventCounter = statsReceiver.counter("num_tweet_create_events")
|
||||
private val nonTweetCreateEventCounter = statsReceiver.counter("num_non_tweet_create_events")
|
||||
|
||||
private val tweetActionStats = statsReceiver.scope("tweet_action")
|
||||
private val numUrlCounter = statsReceiver.counter("num_tweet_url")
|
||||
private val numMediaUrlCounter = statsReceiver.counter("num_tweet_media_url")
|
||||
private val numHashTagCounter = statsReceiver.counter("num_tweet_hashtag")
|
||||
|
||||
private val numMentionsCounter = statsReceiver.counter("num_tweet_mention")
|
||||
private val numMediatagCounter = statsReceiver.counter("num_tweet_mediatag")
|
||||
private val numValidMentionsCounter = statsReceiver.counter("num_tweet_valid_mention")
|
||||
private val numValidMediatagCounter = statsReceiver.counter("num_tweet_valid_mediatag")
|
||||
|
||||
private val numNullCastTweetCounter = statsReceiver.counter("num_null_cast_tweet")
|
||||
private val numNullCastSourceTweetCounter = statsReceiver.counter("num_null_cast_source_tweet")
|
||||
private val numTweetFailSafetyLevelCounter = statsReceiver.counter("num_fail_tweetypie_safety")
|
||||
private val numAuthorUnsafeCounter = statsReceiver.counter("num_author_unsafe")
|
||||
private val numProcessTweetCounter = statsReceiver.counter("num_process_tweet")
|
||||
private val numNoProcessTweetCounter = statsReceiver.counter("num_no_process_tweet")
|
||||
|
||||
private val selfRetweetCounter = statsReceiver.counter("num_retweets_self")
|
||||
|
||||
private val engageUserFilter = new UserFilter(gizmoduck)(statsReceiver.scope("author_user"))
|
||||
private val tweetFilter = new TweetFilter(tweetypie)
|
||||
|
||||
private def trackTweetCreateEventStats(details: TweetCreateEventDetails): Unit = {
|
||||
tweetActionStats.counter(details.userTweetEngagement.action.toString).incr()
|
||||
|
||||
details.userTweetEngagement.tweetDetails.foreach { tweetDetails =>
|
||||
tweetDetails.mentionUserIds.foreach(mention => numMentionsCounter.incr(mention.size))
|
||||
tweetDetails.mediatagUserIds.foreach(mediatag => numMediatagCounter.incr(mediatag.size))
|
||||
tweetDetails.urls.foreach(urls => numUrlCounter.incr(urls.size))
|
||||
tweetDetails.mediaUrls.foreach(mediaUrls => numMediaUrlCounter.incr(mediaUrls.size))
|
||||
tweetDetails.hashtags.foreach(hashtags => numHashTagCounter.incr(hashtags.size))
|
||||
}
|
||||
|
||||
details.validMentionUserIds.foreach(mentions => numValidMentionsCounter.incr(mentions.size))
|
||||
details.validMediatagUserIds.foreach(mediatags => numValidMediatagCounter.incr(mediatags.size))
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a created tweet, return what type of tweet it is, i.e. Tweet, Retweet, Quote, or Reply。
|
||||
* Retweet, Quote, or Reply are responsive actions to a source tweet, so for these tweets,
|
||||
* we also return the tweet id and author of the source tweet (ex. the tweet being retweeted).
|
||||
*/
|
||||
private def getTweetAction(tweetDetails: TweetDetails): Action = {
|
||||
(tweetDetails.replySourceId, tweetDetails.retweetSourceId, tweetDetails.quoteSourceId) match {
|
||||
case (Some(_), _, _) =>
|
||||
Action.Reply
|
||||
case (_, Some(_), _) =>
|
||||
Action.Retweet
|
||||
case (_, _, Some(_)) =>
|
||||
Action.Quote
|
||||
case _ =>
|
||||
Action.Tweet
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a list of mentioned users and mediatagged users in the tweet, return the users who
|
||||
* actually follow the source user.
|
||||
*/
|
||||
private def getFollowedByIds(
|
||||
sourceUserId: Long,
|
||||
mentionUserIds: Option[Seq[Long]],
|
||||
mediatagUserIds: Option[Seq[Long]]
|
||||
): Future[Seq[Long]] = {
|
||||
val uniqueEntityUserIds =
|
||||
(mentionUserIds.getOrElse(Nil) ++ mediatagUserIds.getOrElse(Nil)).distinct
|
||||
if (uniqueEntityUserIds.isEmpty) {
|
||||
Future.Nil
|
||||
} else {
|
||||
socialGraph.followedByNotMutedBy(sourceUserId, uniqueEntityUserIds)
|
||||
}
|
||||
}
|
||||
|
||||
private def getSourceTweet(tweetDetails: TweetDetails): Future[Option[Tweet]] = {
|
||||
tweetDetails.sourceTweetId match {
|
||||
case Some(sourceTweetId) =>
|
||||
tweetypie.getTweet(sourceTweetId)
|
||||
case _ =>
|
||||
Future.None
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Extract and return the details when the source user created a new tweet.
|
||||
*/
|
||||
private def getTweetDetails(
|
||||
tweet: Tweet,
|
||||
engageUser: User
|
||||
): Future[TweetCreateEventDetails] = {
|
||||
val tweetDetails = TweetDetails(tweet)
|
||||
|
||||
val action = getTweetAction(tweetDetails)
|
||||
val tweetCreationTimeMillis = SnowflakeUtils.tweetCreationTime(tweet.id).map(_.inMilliseconds)
|
||||
val engageUserId = engageUser.id
|
||||
val userTweetEngagement = UserTweetEngagement(
|
||||
engageUserId = engageUserId,
|
||||
engageUser = Some(engageUser),
|
||||
action = action,
|
||||
engagementTimeMillis = tweetCreationTimeMillis,
|
||||
tweetId = tweet.id,
|
||||
tweetDetails = Some(tweetDetails)
|
||||
)
|
||||
|
||||
val sourceTweetFut = getSourceTweet(tweetDetails)
|
||||
val followedByIdsFut = getFollowedByIds(
|
||||
engageUserId,
|
||||
tweetDetails.mentionUserIds,
|
||||
tweetDetails.mediatagUserIds
|
||||
)
|
||||
|
||||
Future.join(followedByIdsFut, sourceTweetFut).map {
|
||||
case (followedByIds, sourceTweet) =>
|
||||
TweetCreateEventDetails(
|
||||
userTweetEngagement = userTweetEngagement,
|
||||
validEntityUserIds = followedByIds,
|
||||
sourceTweetDetails = sourceTweet.map(TweetDetails)
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Exclude any Retweets of one's own tweets
|
||||
*/
|
||||
private def isEventSelfRetweet(tweetEvent: TweetCreateEventDetails): Boolean = {
|
||||
(tweetEvent.userTweetEngagement.action == Action.Retweet) &&
|
||||
tweetEvent.userTweetEngagement.tweetDetails.exists(
|
||||
_.sourceTweetUserId.contains(
|
||||
tweetEvent.userTweetEngagement.engageUserId
|
||||
))
|
||||
}
|
||||
|
||||
private def isTweetPassSafetyFilter(tweetEvent: TweetCreateEventDetails): Future[Boolean] = {
|
||||
tweetEvent.userTweetEngagement.action match {
|
||||
case Action.Reply | Action.Retweet | Action.Quote =>
|
||||
tweetEvent.userTweetEngagement.tweetDetails
|
||||
.flatMap(_.sourceTweetId).map { sourceTweetId =>
|
||||
tweetFilter.filterForTweetypieSafetyLevel(sourceTweetId)
|
||||
}.getOrElse(Future(false))
|
||||
case Action.Tweet =>
|
||||
tweetFilter.filterForTweetypieSafetyLevel(tweetEvent.userTweetEngagement.tweetId)
|
||||
}
|
||||
}
|
||||
|
||||
private def shouldProcessTweetEvent(event: TweetCreateEventDetails): Future[Boolean] = {
|
||||
val engagement = event.userTweetEngagement
|
||||
val engageUserId = engagement.engageUserId
|
||||
|
||||
val isNullCastTweet = engagement.tweetDetails.forall(_.isNullCastTweet)
|
||||
val isNullCastSourceTweet = event.sourceTweetDetails.exists(_.isNullCastTweet)
|
||||
val isSelfRetweet = isEventSelfRetweet(event)
|
||||
val isEngageUserSafeFut = engageUserFilter.filterByUserId(engageUserId)
|
||||
val isTweetPassSafetyFut = isTweetPassSafetyFilter(event)
|
||||
|
||||
Future.join(isEngageUserSafeFut, isTweetPassSafetyFut).map {
|
||||
case (isEngageUserSafe, isTweetPassSafety) =>
|
||||
if (isNullCastTweet) numNullCastTweetCounter.incr()
|
||||
if (isNullCastSourceTweet) numNullCastSourceTweetCounter.incr()
|
||||
if (!isEngageUserSafe) numAuthorUnsafeCounter.incr()
|
||||
if (isSelfRetweet) selfRetweetCounter.incr()
|
||||
if (!isTweetPassSafety) numTweetFailSafetyLevelCounter.incr()
|
||||
|
||||
!isNullCastTweet &&
|
||||
!isNullCastSourceTweet &&
|
||||
!isSelfRetweet &&
|
||||
isEngageUserSafe &&
|
||||
isTweetPassSafety
|
||||
}
|
||||
}
|
||||
|
||||
override def processEvent(event: TweetEvent): Future[Unit] = {
|
||||
event.data match {
|
||||
case TweetEventData.TweetCreateEvent(event: TweetCreateEvent) =>
|
||||
getTweetDetails(
|
||||
tweet = event.tweet,
|
||||
engageUser = event.user
|
||||
).flatMap { eventWithDetails =>
|
||||
tweetCreateEventCounter.incr()
|
||||
|
||||
shouldProcessTweetEvent(eventWithDetails).map {
|
||||
case true =>
|
||||
numProcessTweetCounter.incr()
|
||||
trackTweetCreateEventStats(eventWithDetails)
|
||||
// Convert the event for UserUserGraph
|
||||
userUserGraphMessageBuilder.processEvent(eventWithDetails).map { edges =>
|
||||
edges.foreach { edge =>
|
||||
kafkaEventPublisher.publish(edge.convertToRecosHoseMessage, userUserGraphTopic)
|
||||
}
|
||||
}
|
||||
// Convert the event for UserTweetEntityGraph
|
||||
userTweetEntityGraphMessageBuilder.processEvent(eventWithDetails).map { edges =>
|
||||
edges.foreach { edge =>
|
||||
kafkaEventPublisher
|
||||
.publish(edge.convertToRecosHoseMessage, userTweetEntityGraphTopic)
|
||||
}
|
||||
}
|
||||
case false =>
|
||||
numNoProcessTweetCounter.incr()
|
||||
}
|
||||
}
|
||||
case _ =>
|
||||
nonTweetCreateEventCounter.incr()
|
||||
Future.Unit
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,10 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"finagle/finagle-stats",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/clients",
|
||||
"src/thrift/com/twitter/gizmoduck:thrift-scala",
|
||||
],
|
||||
)
|
|
@ -1,34 +0,0 @@
|
|||
package com.twitter.recosinjector.filters
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recosinjector.clients.Tweetypie
|
||||
import com.twitter.util.Future
|
||||
|
||||
/**
|
||||
* Filters tweets that are null cast, i.e. tweet is not delivered to a user's followers,
|
||||
* not shown in the user's timeline, and does not appear in search results.
|
||||
* They are mainly ads tweets.
|
||||
*/
|
||||
class NullCastTweetFilter(
|
||||
tweetypie: Tweetypie
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
private val stats = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
private val requests = stats.counter("requests")
|
||||
private val filtered = stats.counter("filtered")
|
||||
|
||||
// Return Future(True) to keep the Tweet.
|
||||
def filter(tweetId: Long): Future[Boolean] = {
|
||||
requests.incr()
|
||||
tweetypie
|
||||
.getTweet(tweetId)
|
||||
.map { tweetOpt =>
|
||||
// If the null cast bit is Some(true), drop the tweet.
|
||||
val isNullCastTweet = tweetOpt.flatMap(_.coreData).exists(_.nullcast)
|
||||
if (isNullCastTweet) {
|
||||
filtered.incr()
|
||||
}
|
||||
!isNullCastTweet
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,31 +0,0 @@
|
|||
package com.twitter.recosinjector.filters
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recosinjector.clients.Tweetypie
|
||||
import com.twitter.util.Future
|
||||
|
||||
class TweetFilter(
|
||||
tweetypie: Tweetypie
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
private val stats = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
private val requests = stats.counter("requests")
|
||||
private val filtered = stats.counter("filtered")
|
||||
|
||||
/**
|
||||
* Query Tweetypie to see if we can fetch a tweet object successfully. TweetyPie applies a safety
|
||||
* filter and will not return the tweet object if the filter does not pass.
|
||||
*/
|
||||
def filterForTweetypieSafetyLevel(tweetId: Long): Future[Boolean] = {
|
||||
requests.incr()
|
||||
tweetypie
|
||||
.getTweet(tweetId)
|
||||
.map {
|
||||
case Some(_) =>
|
||||
true
|
||||
case _ =>
|
||||
filtered.incr()
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,69 +0,0 @@
|
|||
package com.twitter.recosinjector.filters
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.gizmoduck.thriftscala.{LabelValue, User}
|
||||
import com.twitter.recosinjector.clients.Gizmoduck
|
||||
import com.twitter.util.Future
|
||||
|
||||
class UserFilter(
|
||||
gizmoduck: Gizmoduck
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
private val stats = statsReceiver.scope(this.getClass.getSimpleName)
|
||||
private val requests = stats.counter("requests")
|
||||
private val filtered = stats.counter("filtered")
|
||||
|
||||
private def isUnsafe(user: User): Boolean =
|
||||
user.safety.exists { s =>
|
||||
s.deactivated || s.suspended || s.restricted || s.nsfwUser || s.nsfwAdmin || s.isProtected
|
||||
}
|
||||
|
||||
private def hasNsfwHighPrecisionLabel(user: User): Boolean =
|
||||
user.labels.exists {
|
||||
_.labels.exists(_.labelValue == LabelValue.NsfwHighPrecision)
|
||||
}
|
||||
|
||||
/**
|
||||
* NOTE: This will by-pass Gizmoduck's safety level, and might allow invalid users to pass filter.
|
||||
* Consider using filterByUserId instead.
|
||||
* Return true if the user is valid, otherwise return false.
|
||||
* It will first attempt to use the user object provided by the caller, and will call Gizmoduck
|
||||
* to back fill if the caller does not provide it. This helps reduce Gizmoduck traffic.
|
||||
*/
|
||||
def filterByUser(
|
||||
userId: Long,
|
||||
userOpt: Option[User] = None
|
||||
): Future[Boolean] = {
|
||||
requests.incr()
|
||||
val userFut = userOpt match {
|
||||
case Some(user) => Future(Some(user))
|
||||
case _ => gizmoduck.getUser(userId)
|
||||
}
|
||||
|
||||
userFut.map(_.exists { user =>
|
||||
val isValidUser = !isUnsafe(user) && !hasNsfwHighPrecisionLabel(user)
|
||||
if (!isValidUser) filtered.incr()
|
||||
isValidUser
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* Given a userId, return true if the user is valid. This id done in 2 steps:
|
||||
* 1. Applying Gizmoduck's safety level while querying for the user from Gizmoduck
|
||||
* 2. If a user passes Gizmoduck's safety level, check its specific user status
|
||||
*/
|
||||
def filterByUserId(userId: Long): Future[Boolean] = {
|
||||
requests.incr()
|
||||
gizmoduck
|
||||
.getUser(userId)
|
||||
.map { userOpt =>
|
||||
val isValidUser = userOpt.exists { user =>
|
||||
!(isUnsafe(user) || hasNsfwHighPrecisionLabel(user))
|
||||
}
|
||||
if (!isValidUser) {
|
||||
filtered.incr()
|
||||
}
|
||||
isValidUser
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/org/apache/thrift:libthrift",
|
||||
"finatra-internal/messaging/kafka/src/main/scala",
|
||||
"servo/repo/src/main/scala",
|
||||
"src/thrift/com/twitter/recos:recos-injector-scala",
|
||||
"src/thrift/com/twitter/recos:recos-internal-scala",
|
||||
],
|
||||
)
|
|
@ -1,54 +0,0 @@
|
|||
package com.twitter.recosinjector.publishers
|
||||
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finagle.thrift.ClientId
|
||||
import com.twitter.finatra.kafka.producers.FinagleKafkaProducerBuilder
|
||||
import com.twitter.finatra.kafka.serde.ScalaSerdes
|
||||
import com.twitter.recos.internal.thriftscala.RecosHoseMessage
|
||||
import org.apache.kafka.clients.CommonClientConfigs
|
||||
import org.apache.kafka.clients.producer.ProducerRecord
|
||||
import org.apache.kafka.common.config.SaslConfigs
|
||||
import org.apache.kafka.common.config.SslConfigs
|
||||
import org.apache.kafka.common.security.auth.SecurityProtocol
|
||||
import org.apache.kafka.common.serialization.StringSerializer
|
||||
|
||||
case class KafkaEventPublisher(
|
||||
kafkaDest: String,
|
||||
outputKafkaTopicPrefix: String,
|
||||
clientId: ClientId,
|
||||
truststoreLocation: String) {
|
||||
|
||||
private val producer = FinagleKafkaProducerBuilder[String, RecosHoseMessage]()
|
||||
.dest(kafkaDest)
|
||||
.clientId(clientId.name)
|
||||
.keySerializer(new StringSerializer)
|
||||
.valueSerializer(ScalaSerdes.Thrift[RecosHoseMessage].serializer)
|
||||
.withConfig(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SecurityProtocol.SASL_SSL.toString)
|
||||
.withConfig(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, truststoreLocation)
|
||||
.withConfig(SaslConfigs.SASL_MECHANISM, SaslConfigs.GSSAPI_MECHANISM)
|
||||
.withConfig(SaslConfigs.SASL_KERBEROS_SERVICE_NAME, "kafka")
|
||||
.withConfig(SaslConfigs.SASL_KERBEROS_SERVER_NAME, "kafka")
|
||||
// Use Native Kafka Client
|
||||
.buildClient()
|
||||
|
||||
def publish(
|
||||
message: RecosHoseMessage,
|
||||
topic: String
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver
|
||||
): Unit = {
|
||||
val topicName = s"${outputKafkaTopicPrefix}_$topic"
|
||||
// Kafka Producer is thread-safe. No extra Future-pool protect.
|
||||
producer.send(new ProducerRecord(topicName, message))
|
||||
statsReceiver.counter(topicName + "_written_msg_success").incr()
|
||||
}
|
||||
}
|
||||
|
||||
object KafkaEventPublisher {
|
||||
// Kafka topics available for publishing
|
||||
val UserVideoTopic = "user_video"
|
||||
val UserTweetEntityTopic = "user_tweet_entity"
|
||||
val UserUserTopic = "user_user"
|
||||
val UserAdTopic = "user_tweet"
|
||||
val UserTweetPlusTopic = "user_tweet_plus"
|
||||
}
|
|
@ -1,12 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"3rdparty/jvm/com/twitter/graphjet",
|
||||
"finagle/finagle-stats",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/clients",
|
||||
"src/scala/com/twitter/recos/util:recos-util",
|
||||
"src/thrift/com/twitter/tweetypie:tweet-scala",
|
||||
],
|
||||
)
|
|
@ -1,126 +0,0 @@
|
|||
package com.twitter.recosinjector.util
|
||||
|
||||
import com.twitter.frigate.common.base.TweetUtil
|
||||
import com.twitter.gizmoduck.thriftscala.User
|
||||
import com.twitter.recos.util.Action.Action
|
||||
import com.twitter.tweetypie.thriftscala.Tweet
|
||||
|
||||
/**
|
||||
* This is used to store information about a newly created tweet
|
||||
* @param validEntityUserIds For users mentioned or mediatagged in the tweet, these follow the
|
||||
* engage user and only they are are considered valid
|
||||
* @param sourceTweetDetails For Reply, Quote, or RT, source tweet is the tweet being actioned on
|
||||
*/
|
||||
case class TweetCreateEventDetails(
|
||||
userTweetEngagement: UserTweetEngagement,
|
||||
validEntityUserIds: Seq[Long],
|
||||
sourceTweetDetails: Option[TweetDetails]) {
|
||||
// A mention is only valid if the mentioned user follows the source user
|
||||
val validMentionUserIds: Option[Seq[Long]] = {
|
||||
userTweetEngagement.tweetDetails.flatMap(_.mentionUserIds.map(_.intersect(validEntityUserIds)))
|
||||
}
|
||||
|
||||
// A mediatag is only valid if the mediatagged user follows the source user
|
||||
val validMediatagUserIds: Option[Seq[Long]] = {
|
||||
userTweetEngagement.tweetDetails.flatMap(_.mediatagUserIds.map(_.intersect(validEntityUserIds)))
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Stores information about a favorite/unfav engagement.
|
||||
* NOTE: This could either be Likes, or UNLIKEs (i.e. when user cancels the Like)
|
||||
* @param userTweetEngagement the engagement details
|
||||
*/
|
||||
case class TweetFavoriteEventDetails(
|
||||
userTweetEngagement: UserTweetEngagement)
|
||||
|
||||
/**
|
||||
* Stores information about a unified user action engagement.
|
||||
* @param userTweetEngagement the engagement details
|
||||
*/
|
||||
case class UuaEngagementEventDetails(
|
||||
userTweetEngagement: UserTweetEngagement)
|
||||
|
||||
/**
|
||||
* Details about a user-tweet engagement, like when a user tweeted/liked a tweet
|
||||
* @param engageUserId User that engaged with the tweet
|
||||
* @param action The action the user took on the tweet
|
||||
* @param tweetId The type of engagement the user took on the tweet
|
||||
*/
|
||||
case class UserTweetEngagement(
|
||||
engageUserId: Long,
|
||||
engageUser: Option[User],
|
||||
action: Action,
|
||||
engagementTimeMillis: Option[Long],
|
||||
tweetId: Long,
|
||||
tweetDetails: Option[TweetDetails])
|
||||
|
||||
/**
|
||||
* Helper class that decomposes a tweet object and provides related details about this tweet
|
||||
*/
|
||||
case class TweetDetails(tweet: Tweet) {
|
||||
val authorId: Option[Long] = tweet.coreData.map(_.userId)
|
||||
|
||||
val urls: Option[Seq[String]] = tweet.urls.map(_.map(_.url))
|
||||
|
||||
val mediaUrls: Option[Seq[String]] = tweet.media.map(_.map(_.expandedUrl))
|
||||
|
||||
val hashtags: Option[Seq[String]] = tweet.hashtags.map(_.map(_.text))
|
||||
|
||||
// mentionUserIds include reply user ids at the beginning of a tweet
|
||||
val mentionUserIds: Option[Seq[Long]] = tweet.mentions.map(_.flatMap(_.userId))
|
||||
|
||||
val mediatagUserIds: Option[Seq[Long]] = tweet.mediaTags.map {
|
||||
_.tagMap.flatMap {
|
||||
case (_, mediaTag) => mediaTag.flatMap(_.userId)
|
||||
}.toSeq
|
||||
}
|
||||
|
||||
val replySourceId: Option[Long] = tweet.coreData.flatMap(_.reply.flatMap(_.inReplyToStatusId))
|
||||
val replyUserId: Option[Long] = tweet.coreData.flatMap(_.reply.map(_.inReplyToUserId))
|
||||
|
||||
val retweetSourceId: Option[Long] = tweet.coreData.flatMap(_.share.map(_.sourceStatusId))
|
||||
val retweetUserId: Option[Long] = tweet.coreData.flatMap(_.share.map(_.sourceUserId))
|
||||
|
||||
val quoteSourceId: Option[Long] = tweet.quotedTweet.map(_.tweetId)
|
||||
val quoteUserId: Option[Long] = tweet.quotedTweet.map(_.userId)
|
||||
val quoteTweetUrl: Option[String] = tweet.quotedTweet.flatMap(_.permalink.map(_.shortUrl))
|
||||
|
||||
//If the tweet is retweet/reply/quote, this is the tweet that the new tweet responds to
|
||||
val (sourceTweetId, sourceTweetUserId) = {
|
||||
(replySourceId, retweetSourceId, quoteSourceId) match {
|
||||
case (Some(replyId), _, _) =>
|
||||
(Some(replyId), replyUserId)
|
||||
case (_, Some(retweetId), _) =>
|
||||
(Some(retweetId), retweetUserId)
|
||||
case (_, _, Some(quoteId)) =>
|
||||
(Some(quoteId), quoteUserId)
|
||||
case _ =>
|
||||
(None, None)
|
||||
}
|
||||
}
|
||||
|
||||
// Boolean information
|
||||
val hasPhoto: Boolean = TweetUtil.containsPhotoTweet(tweet)
|
||||
|
||||
val hasVideo: Boolean = TweetUtil.containsVideoTweet(tweet)
|
||||
|
||||
// TweetyPie does not populate url fields in a quote tweet create event, even though we
|
||||
// consider quote tweets as url tweets. This boolean helps make up for it.
|
||||
// Details: https://groups.google.com/a/twitter.com/d/msg/eng/BhK1XAcSSWE/F8Gc4_5uDwAJ
|
||||
val hasQuoteTweetUrl: Boolean = tweet.quotedTweet.exists(_.permalink.isDefined)
|
||||
|
||||
val hasUrl: Boolean = this.urls.exists(_.nonEmpty) || hasQuoteTweetUrl
|
||||
|
||||
val hasHashtag: Boolean = this.hashtags.exists(_.nonEmpty)
|
||||
|
||||
val isCard: Boolean = hasUrl | hasPhoto | hasVideo
|
||||
|
||||
implicit def bool2Long(b: Boolean): Long = if (b) 1L else 0L
|
||||
|
||||
// Return a hashed long that contains card type information of the tweet
|
||||
val cardInfo: Long = isCard | (hasUrl << 1) | (hasPhoto << 2) | (hasVideo << 3)
|
||||
|
||||
// nullcast tweet is one that is purposefully not broadcast to followers, ex. an ad tweet.
|
||||
val isNullCastTweet: Boolean = tweet.coreData.exists(_.nullcast)
|
||||
}
|
|
@ -1,24 +0,0 @@
|
|||
scala_library(
|
||||
platform = "java11",
|
||||
strict_deps = False,
|
||||
tags = ["bazel-compatible"],
|
||||
dependencies = [
|
||||
"eventbus/client",
|
||||
"kafka/finagle-kafka/finatra-kafka/src/main/scala",
|
||||
"kafka/libs/src/main/scala/com/twitter/kafka/client/headers",
|
||||
"kafka/libs/src/main/scala/com/twitter/kafka/client/processor",
|
||||
"recos-injector/server/config",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/clients",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/config",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/decider",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/edges",
|
||||
"recos-injector/server/src/main/scala/com/twitter/recosinjector/publishers",
|
||||
"src/thrift/com/twitter/clientapp/gen:clientapp-scala",
|
||||
"src/thrift/com/twitter/gizmoduck:user-thrift-scala",
|
||||
"src/thrift/com/twitter/socialgraph:thrift-scala",
|
||||
"src/thrift/com/twitter/timelineservice/server/internal:thrift-scala",
|
||||
"src/thrift/com/twitter/tweetypie:events-scala",
|
||||
"src/thrift/com/twitter/tweetypie:tweet-scala",
|
||||
"unified_user_actions/thrift/src/main/thrift/com/twitter/unified_user_actions:unified_user_actions-scala",
|
||||
],
|
||||
)
|
|
@ -1,181 +0,0 @@
|
|||
package com.twitter.recosinjector.uua_processors
|
||||
|
||||
import org.apache.kafka.clients.consumer.ConsumerRecord
|
||||
import com.twitter.finatra.kafka.serde.UnKeyed
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.recos.util.Action
|
||||
import com.twitter.recos.util.Action.Action
|
||||
import com.twitter.recosinjector.clients.Gizmoduck
|
||||
import com.twitter.recosinjector.clients.Tweetypie
|
||||
import com.twitter.recosinjector.edges.UnifiedUserActionToUserVideoGraphBuilder
|
||||
import com.twitter.recosinjector.edges.UnifiedUserActionToUserAdGraphBuilder
|
||||
import com.twitter.recosinjector.edges.UnifiedUserActionToUserTweetGraphPlusBuilder
|
||||
import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction
|
||||
import com.twitter.unified_user_actions.thriftscala.ActionType
|
||||
import com.twitter.unified_user_actions.thriftscala.Item
|
||||
import com.twitter.recosinjector.filters.UserFilter
|
||||
import com.twitter.recosinjector.publishers.KafkaEventPublisher
|
||||
import com.twitter.recosinjector.util.TweetDetails
|
||||
import com.twitter.recosinjector.util.UserTweetEngagement
|
||||
import com.twitter.recosinjector.util.UuaEngagementEventDetails
|
||||
import com.twitter.unified_user_actions.thriftscala.NotificationContent
|
||||
import com.twitter.unified_user_actions.thriftscala.NotificationInfo
|
||||
import com.twitter.util.Future
|
||||
|
||||
class UnifiedUserActionProcessor(
|
||||
gizmoduck: Gizmoduck,
|
||||
tweetypie: Tweetypie,
|
||||
kafkaEventPublisher: KafkaEventPublisher,
|
||||
userVideoGraphTopic: String,
|
||||
userVideoGraphBuilder: UnifiedUserActionToUserVideoGraphBuilder,
|
||||
userAdGraphTopic: String,
|
||||
userAdGraphBuilder: UnifiedUserActionToUserAdGraphBuilder,
|
||||
userTweetGraphPlusTopic: String,
|
||||
userTweetGraphPlusBuilder: UnifiedUserActionToUserTweetGraphPlusBuilder
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
|
||||
val messagesProcessedCount = statsReceiver.counter("messages_processed")
|
||||
|
||||
val eventsByTypeCounts = statsReceiver.scope("events_by_type")
|
||||
private val numSelfEngageCounter = statsReceiver.counter("num_self_engage_event")
|
||||
private val numTweetFailSafetyLevelCounter = statsReceiver.counter("num_fail_tweetypie_safety")
|
||||
private val numNullCastTweetCounter = statsReceiver.counter("num_null_cast_tweet")
|
||||
private val numEngageUserUnsafeCounter = statsReceiver.counter("num_engage_user_unsafe")
|
||||
private val engageUserFilter = new UserFilter(gizmoduck)(statsReceiver.scope("engage_user"))
|
||||
private val numNoProcessTweetCounter = statsReceiver.counter("num_no_process_tweet")
|
||||
private val numProcessTweetCounter = statsReceiver.counter("num_process_tweet")
|
||||
|
||||
private def getUuaEngagementEventDetails(
|
||||
unifiedUserAction: UnifiedUserAction
|
||||
): Option[Future[UuaEngagementEventDetails]] = {
|
||||
val userIdOpt = unifiedUserAction.userIdentifier.userId
|
||||
val tweetIdOpt = unifiedUserAction.item match {
|
||||
case Item.TweetInfo(tweetInfo) => Some(tweetInfo.actionTweetId)
|
||||
case Item.NotificationInfo(
|
||||
NotificationInfo(_, NotificationContent.TweetNotification(notification))) =>
|
||||
Some(notification.tweetId)
|
||||
case _ => None
|
||||
}
|
||||
val timestamp = unifiedUserAction.eventMetadata.sourceTimestampMs
|
||||
val action = getTweetAction(unifiedUserAction.actionType)
|
||||
|
||||
tweetIdOpt
|
||||
.flatMap { tweetId =>
|
||||
userIdOpt.map { engageUserId =>
|
||||
val tweetFut = tweetypie.getTweet(tweetId)
|
||||
tweetFut.map { tweetOpt =>
|
||||
val tweetDetailsOpt = tweetOpt.map(TweetDetails)
|
||||
val engagement = UserTweetEngagement(
|
||||
engageUserId = engageUserId,
|
||||
action = action,
|
||||
engagementTimeMillis = Some(timestamp),
|
||||
tweetId = tweetId,
|
||||
engageUser = None,
|
||||
tweetDetails = tweetDetailsOpt
|
||||
)
|
||||
UuaEngagementEventDetails(engagement)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private def getTweetAction(action: ActionType): Action = {
|
||||
action match {
|
||||
case ActionType.ClientTweetVideoPlayback50 => Action.VideoPlayback50
|
||||
case ActionType.ClientTweetClick => Action.Click
|
||||
case ActionType.ClientTweetVideoPlayback75 => Action.VideoPlayback75
|
||||
case ActionType.ClientTweetVideoQualityView => Action.VideoQualityView
|
||||
case ActionType.ServerTweetFav => Action.Favorite
|
||||
case ActionType.ServerTweetReply => Action.Reply
|
||||
case ActionType.ServerTweetRetweet => Action.Retweet
|
||||
case ActionType.ClientTweetQuote => Action.Quote
|
||||
case ActionType.ClientNotificationOpen => Action.NotificationOpen
|
||||
case ActionType.ClientTweetEmailClick => Action.EmailClick
|
||||
case ActionType.ClientTweetShareViaBookmark => Action.Share
|
||||
case ActionType.ClientTweetShareViaCopyLink => Action.Share
|
||||
case ActionType.ClientTweetSeeFewer => Action.TweetSeeFewer
|
||||
case ActionType.ClientTweetNotRelevant => Action.TweetNotRelevant
|
||||
case ActionType.ClientTweetNotInterestedIn => Action.TweetNotInterestedIn
|
||||
case ActionType.ServerTweetReport => Action.TweetReport
|
||||
case ActionType.ClientTweetMuteAuthor => Action.TweetMuteAuthor
|
||||
case ActionType.ClientTweetBlockAuthor => Action.TweetBlockAuthor
|
||||
case _ => Action.UnDefined
|
||||
}
|
||||
}
|
||||
private def shouldProcessTweetEngagement(
|
||||
event: UuaEngagementEventDetails,
|
||||
isAdsUseCase: Boolean = false
|
||||
): Future[Boolean] = {
|
||||
val engagement = event.userTweetEngagement
|
||||
val engageUserId = engagement.engageUserId
|
||||
val authorIdOpt = engagement.tweetDetails.flatMap(_.authorId)
|
||||
|
||||
val isSelfEngage = authorIdOpt.contains(engageUserId)
|
||||
val isNullCastTweet = engagement.tweetDetails.forall(_.isNullCastTweet)
|
||||
val isEngageUserSafeFut = engageUserFilter.filterByUserId(engageUserId)
|
||||
val isTweetPassSafety =
|
||||
engagement.tweetDetails.isDefined // Tweetypie can fetch a tweet object successfully
|
||||
|
||||
isEngageUserSafeFut.map { isEngageUserSafe =>
|
||||
if (isSelfEngage) numSelfEngageCounter.incr()
|
||||
if (isNullCastTweet) numNullCastTweetCounter.incr()
|
||||
if (!isEngageUserSafe) numEngageUserUnsafeCounter.incr()
|
||||
if (!isTweetPassSafety) numTweetFailSafetyLevelCounter.incr()
|
||||
|
||||
!isSelfEngage && (!isNullCastTweet && !isAdsUseCase || isNullCastTweet && isAdsUseCase) && isEngageUserSafe && isTweetPassSafety
|
||||
}
|
||||
}
|
||||
|
||||
def apply(record: ConsumerRecord[UnKeyed, UnifiedUserAction]): Future[Unit] = {
|
||||
|
||||
messagesProcessedCount.incr()
|
||||
val unifiedUserAction = record.value
|
||||
eventsByTypeCounts.counter(unifiedUserAction.actionType.toString).incr()
|
||||
|
||||
getTweetAction(unifiedUserAction.actionType) match {
|
||||
case Action.UnDefined =>
|
||||
numNoProcessTweetCounter.incr()
|
||||
Future.Unit
|
||||
case action =>
|
||||
getUuaEngagementEventDetails(unifiedUserAction)
|
||||
.map {
|
||||
_.flatMap { detail =>
|
||||
// The following cases are set up specifically for an ads relevance demo.
|
||||
val actionForAds = Set(Action.Click, Action.Favorite, Action.VideoPlayback75)
|
||||
if (actionForAds.contains(action))
|
||||
shouldProcessTweetEngagement(detail, isAdsUseCase = true).map {
|
||||
case true =>
|
||||
userAdGraphBuilder.processEvent(detail).map { edges =>
|
||||
edges.foreach { edge =>
|
||||
kafkaEventPublisher
|
||||
.publish(edge.convertToRecosHoseMessage, userAdGraphTopic)
|
||||
}
|
||||
}
|
||||
numProcessTweetCounter.incr()
|
||||
case _ =>
|
||||
}
|
||||
|
||||
shouldProcessTweetEngagement(detail).map {
|
||||
case true =>
|
||||
userVideoGraphBuilder.processEvent(detail).map { edges =>
|
||||
edges.foreach { edge =>
|
||||
kafkaEventPublisher
|
||||
.publish(edge.convertToRecosHoseMessage, userVideoGraphTopic)
|
||||
}
|
||||
}
|
||||
|
||||
userTweetGraphPlusBuilder.processEvent(detail).map { edges =>
|
||||
edges.foreach { edge =>
|
||||
kafkaEventPublisher
|
||||
.publish(edge.convertToRecosHoseMessage, userTweetGraphPlusTopic)
|
||||
}
|
||||
}
|
||||
numProcessTweetCounter.incr()
|
||||
case _ =>
|
||||
}
|
||||
}
|
||||
}.getOrElse(Future.Unit)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,71 +0,0 @@
|
|||
package com.twitter.recosinjector.uua_processors
|
||||
|
||||
import com.twitter.conversions.DurationOps._
|
||||
import com.twitter.finagle.stats.StatsReceiver
|
||||
import com.twitter.finatra.kafka.consumers.FinagleKafkaConsumerBuilder
|
||||
import com.twitter.finatra.kafka.domain.KafkaGroupId
|
||||
import com.twitter.finatra.kafka.domain.SeekStrategy
|
||||
import com.twitter.finatra.kafka.serde.ScalaSerdes
|
||||
import com.twitter.finatra.kafka.serde.UnKeyed
|
||||
import com.twitter.finatra.kafka.serde.UnKeyedSerde
|
||||
import org.apache.kafka.clients.CommonClientConfigs
|
||||
import org.apache.kafka.common.config.SaslConfigs
|
||||
import org.apache.kafka.common.config.SslConfigs
|
||||
import org.apache.kafka.common.security.auth.SecurityProtocol
|
||||
import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction
|
||||
import com.twitter.kafka.client.processor.AtLeastOnceProcessor
|
||||
import com.twitter.kafka.client.processor.ThreadSafeKafkaConsumerClient
|
||||
import com.twitter.conversions.StorageUnitOps._
|
||||
|
||||
class UnifiedUserActionsConsumer(
|
||||
processor: UnifiedUserActionProcessor,
|
||||
truststoreLocation: String
|
||||
)(
|
||||
implicit statsReceiver: StatsReceiver) {
|
||||
import UnifiedUserActionsConsumer._
|
||||
|
||||
private val kafkaClient = new ThreadSafeKafkaConsumerClient[UnKeyed, UnifiedUserAction](
|
||||
FinagleKafkaConsumerBuilder[UnKeyed, UnifiedUserAction]()
|
||||
.groupId(KafkaGroupId(uuaRecosInjectorGroupId))
|
||||
.keyDeserializer(UnKeyedSerde.deserializer)
|
||||
.valueDeserializer(ScalaSerdes.Thrift[UnifiedUserAction].deserializer)
|
||||
.dest(uuaDest)
|
||||
.maxPollRecords(maxPollRecords)
|
||||
.maxPollInterval(maxPollInterval)
|
||||
.fetchMax(fetchMax)
|
||||
.seekStrategy(SeekStrategy.END)
|
||||
.enableAutoCommit(false) // AtLeastOnceProcessor performs commits manually
|
||||
.withConfig(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, SecurityProtocol.SASL_SSL.toString)
|
||||
.withConfig(SslConfigs.SSL_TRUSTSTORE_LOCATION_CONFIG, truststoreLocation)
|
||||
.withConfig(SaslConfigs.SASL_MECHANISM, SaslConfigs.GSSAPI_MECHANISM)
|
||||
.withConfig(SaslConfigs.SASL_KERBEROS_SERVICE_NAME, "kafka")
|
||||
.withConfig(SaslConfigs.SASL_KERBEROS_SERVER_NAME, "kafka")
|
||||
.config)
|
||||
|
||||
val atLeastOnceProcessor: AtLeastOnceProcessor[UnKeyed, UnifiedUserAction] = {
|
||||
AtLeastOnceProcessor[UnKeyed, UnifiedUserAction](
|
||||
name = processorName,
|
||||
topic = uuaTopic,
|
||||
consumer = kafkaClient,
|
||||
processor = processor.apply,
|
||||
maxPendingRequests = maxPendingRequests,
|
||||
workerThreads = workerThreads,
|
||||
commitIntervalMs = commitIntervalMs,
|
||||
statsReceiver = statsReceiver.scope(processorName)
|
||||
)
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
object UnifiedUserActionsConsumer {
|
||||
val maxPollRecords = 1000
|
||||
val maxPollInterval = 5.minutes
|
||||
val fetchMax = 1.megabytes
|
||||
val maxPendingRequests = 1000
|
||||
val workerThreads = 16
|
||||
val commitIntervalMs = 10.seconds.inMilliseconds
|
||||
val processorName = "unified_user_actions_processor"
|
||||
val uuaTopic = "unified_user_actions_engagements"
|
||||
val uuaDest = "/s/kafka/bluebird-1:kafka-tls"
|
||||
val uuaRecosInjectorGroupId = "recos-injector"
|
||||
}
|
Loading…
Reference in New Issue