the-algorithm/unified_user_actions/client/src/main/scala/com/twitter/unified_user_actions/client/summingbird/UnifiedUserActionsSourceScrooge.scala
twitter-team 617c8c787d Open-sourcing Unified User Actions
Unified User Action (UUA) is a centralized, real-time stream of user actions on Twitter, consumed by various product, ML, and marketing teams. UUA makes sure all internal teams consume the uniformed user actions data in an accurate and fast way.
2023-04-14 16:45:37 -05:00

44 lines
1.6 KiB
Scala

package com.twitter.unified_user_actions.client.summingbird
import com.twitter.summingbird.TimeExtractor
import com.twitter.summingbird.storm.Storm
import com.twitter.summingbird_internal.sources.AppId
import com.twitter.summingbird_internal.sources.SourceFactory
import com.twitter.tormenta_internal.spout.Kafka2ScroogeSpoutWrapper
import com.twitter.unified_user_actions.client.config.ClientConfig
import com.twitter.unified_user_actions.thriftscala.UnifiedUserAction
import com.twitter.unified_user_actions.client.config.KafkaConfigs
case class UnifiedUserActionsSourceScrooge(
appId: AppId,
parallelism: Int,
kafkaConfig: ClientConfig = KafkaConfigs.ProdUnifiedUserActions,
skipToLatest: Boolean = false,
enableTls: Boolean = true)
extends SourceFactory[Storm, UnifiedUserAction] {
override def name: String = "UnifiedUserActionsSource"
override def description: String = "Unified User Actions (UUA) events"
// The event timestamps from summingbird's perspective (client), is our internally
// outputted timestamps (producer). This ensures time-continuity between the client and the
// producer.
val timeExtractor: TimeExtractor[UnifiedUserAction] = TimeExtractor { e =>
e.eventMetadata.receivedTimestampMs
}
override def source = {
Storm.source(
Kafka2ScroogeSpoutWrapper(
codec = UnifiedUserAction,
cluster = kafkaConfig.cluster.name,
topic = kafkaConfig.topic,
appId = appId.get,
skipToLatest = skipToLatest,
enableTls = enableTls
),
Some(parallelism)
)(timeExtractor)
}
}