Delete science/search/ingester/config directory

This commit is contained in:
dogemanttv 2024-01-10 17:08:30 -06:00 committed by GitHub
parent 921e154bef
commit d52379152a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 0 additions and 673 deletions

View File

@ -1,2 +0,0 @@
## Ingester Configs
This directory contains pipeline configurations for the tweet ingesters (realtime, protected and realtime_cg) and the user-updates ingester. The pipeline configurations define an ordered sequence of stages that the tweet or user update goes through before reaching Earlybird. Source code for the various stages referenced in the configs can be found at src/java/com/twitter/search/ingester/pipeline/twitter.

View File

@ -1,30 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!--
This indexer reads UserModification from user_modification Kafka topic, converts the
data into AntisocialUserUpdate by querying Gizmoduck and then writes the data to the
the search_user_updates Kafka topic.
-->
<pipeline>
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="pipeline">
<!-- This queue is a factor of batchSize larger than inner queues because it is unbatched -->
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="500"
fair="false"/>
</driverFactory>
<stage
className="com.twitter.search.ingester.pipeline.twitter.userupdates.UserUpdatesPipelineStage"
environment="prod"
driverFactoryId="pipeline"/>
</pipeline>

View File

@ -1,202 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Ingesters process tweet create events from TweetyPie and write them to a queue for Earlybird
to index. -->
<pipeline>
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka">
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<!-- Read tweets from the thrift kafka queue. The reader loops forever. -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.KafkaRawRecordConsumerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName=""
kafkaConsumerGroupId=""
maxPollRecords="1"
pollTimeoutMs="1000"
partitioned="false"
deciderKey=""
driverFactoryId="kafka"/>
<!-- Deserialize the bytes into TweetData -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TweetEventDeserializerStage"
driverFactoryId="kafka"/>
<!-- Filter to only have the safetytype for this cluster -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.FilterEventsBySafetyTypeStage"
tweetCreateLatencyLogThresholdMillis="5000"
safetyType="PROTECTED"
driverFactoryId="kafka"/>
<!-- Parse to TwitterMessage -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ThriftTweetParserStage"
tweetDeleteEventBranchNames="kafka_update_events_delete"
driverFactoryId="kafka"/>
<branch>
<pipeline key="kafka_update_events_delete">
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka_update_events_delete">
<!-- we are willing to queue more deletes than other stages,
to make sure we don't slow down the incoming tweets -->
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.DeleteUpdateEventsKafkaProducerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName=""
driverFactoryId="kafka_update_events_delete"/>
</pipeline>
</branch>
<!-- filters out messages that are not formatted correctly -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.FilterTwitterMessageStage"
driverFactoryId="kafka"/>
<!-- retrieves space ids from space urls if the tweet has space urls -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveSpaceIdsStage"
driverFactoryId="kafka"/>
<!-- looks up user reputation scores for each message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.LookupUserPropertiesBatchedStage"
driverFactoryId="kafka"/>
<!-- extract text features of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextFeatureExtractionWorkersStage"
driverFactoryId="kafka"/>
<!-- compute text quality score of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextQualityEvaluationWorkerStage"
driverFactoryId="kafka"/>
<!-- Extract lat/lon pairs from the text, and geocode them -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.SingleTweetExtractAndGeocodeLatLonStage"
driverFactoryId="kafka"/>
<!-- adds coded locations -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.PopulateCodedLocationsBatchedStage"
driverFactoryId="kafka"/>
<!-- Parse the TwitterMessages into ThriftStatuses -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ConvertMessageToThriftStage"
thriftVersionedEventsBranchName="kafka_base_tweets"
driverFactoryId="kafka"/>
<!-- Branch for tweets -->
<branch>
<pipeline key="kafka_base_tweets">
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka_base_tweets">
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.TweetThriftVersionedEventsKafkaProducerStage"
kafkaClusterPath=""
kafkaClientId="search_ingester_indexing_events"
kafkaTopicName="search_ingester_indexing_events_protected_prod"
driverFactoryId="kafka_base_tweets"/>
</pipeline>
</branch>
<!-- Resolve compressed URL via Pink -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ResolveCompressedUrlsBatchedStage"
pinkClientId="INGESTER"
batchedStageBatchSize="10"
tweetMaxAgeToResolve="10000"
driverFactoryId="kafka"/>
<!-- Retrieve card information -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveCardBatchedStage"
tweetypieClientId="ingester.prod"
filterProtected="false"
internalBatchSize="50"
driverFactoryId="kafka"/>
<!-- Retrieve named entities -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveNamedEntitiesSingleTweetStage"
driverFactoryId="kafka"/>
<!-- retrieves space admins and title for a tweet if the tweet has space urls -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveSpaceAdminsAndTitleStage"
driverFactoryId="kafka"/>
<!-- extract text features of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextUrlsFeatureExtractionStage"
driverFactoryId="kafka"/>
<!-- Compute the tweet signature -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ComputeTweetSignatureStage"
driverFactoryId="kafka"/>
<!-- Parse the TwitterMessages into ThriftStatuses -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ConvertDelayedMessageToThriftStage"
driverFactoryId="kafka"/>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.TweetThriftVersionedEventsKafkaProducerStage"
kafkaClusterPath=""
stageName="UpdateEvents"
kafkaClientId=""
kafkaTopicName=""
driverFactoryId="kafka"/>
</pipeline>

View File

@ -1,240 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Ingesters process tweet create events from TweetyPie and write them to a queue for Earlybird
to index. -->
<pipeline>
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka">
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<!-- Read tweets from the thrift kafka queue. The reader loops forever. -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.KafkaRawRecordConsumerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName=""
kafkaConsumerGroupId=""
maxPollRecords="1"
pollTimeoutMs="1000"
partitioned="false"
deciderKey=""
driverFactoryId="kafka"/>
<!-- Deserialize the bytes into TweetData -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TweetEventDeserializerStage"
driverFactoryId="kafka"/>
<!-- Filter to only have the safetytype for this cluster -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.FilterEventsBySafetyTypeStage"
tweetCreateLatencyLogThresholdMillis="5000"
safetyType="PUBLIC"
driverFactoryId="kafka"/>
<!-- Parse to TwitterMessage -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ThriftTweetParserStage"
tweetCreateEventBranchNames="kafka_retweet_and_reply"
tweetDeleteEventBranchNames="kafka_update_events_delete"
driverFactoryId="kafka"/>
<branch>
<pipeline key="kafka_update_events_delete">
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka_update_events_delete">
<!-- we are willing to queue more deletes than other stages,
to make sure we don't slow down the incoming tweets -->
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.DeleteUpdateEventsKafkaProducerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName=""
driverFactoryId="kafka_update_events_delete"/>
</pipeline>
</branch>
<!-- Processes retweets and replies to tweets -->
<branch>
<pipeline key="kafka_retweet_and_reply">
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka_retweet_and_reply">
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<!-- An incoming reply to this stage can either be a tweet directed at someone using @mention, or
a tweet that is a direct reply to another tweet. This stage filters retweets and tweets that are
direct replies to other tweets into the retweet_and_reply pipeline -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.FilterRetweetsAndRepliesStage"
driverFactoryId="kafka_retweet_and_reply"/>
<stage
className="com.twitter.search.ingester.pipeline.twitter.ConvertToThriftVersionedEventsStage"
driverFactoryId="kafka_retweet_and_reply"/>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.RetweetAndReplyUpdateEventsKafkaProducerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName=""
driverFactoryId="kafka_retweet_and_reply"/>
</pipeline>
</branch>
<!-- filters out messages that are not formatted correctly -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.FilterTwitterMessageStage"
driverFactoryId="kafka"/>
<!-- retrieves space ids from space urls if the tweet has space urls -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveSpaceIdsStage"
driverFactoryId="kafka"/>
<!-- looks up user reputation scores for each message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.LookupUserPropertiesBatchedStage"
driverFactoryId="kafka"/>
<!-- extract text features of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextFeatureExtractionWorkersStage"
driverFactoryId="kafka"/>
<!-- compute text quality score of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextQualityEvaluationWorkerStage"
driverFactoryId="kafka"/>
<!-- Extract lat/lon pairs from the text, and geocode them -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.SingleTweetExtractAndGeocodeLatLonStage"
driverFactoryId="kafka"/>
<!-- adds coded locations -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.PopulateCodedLocationsBatchedStage"
driverFactoryId="kafka"/>
<!-- Parse the TwitterMessages into ThriftStatuses -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ConvertMessageToThriftStage"
thriftVersionedEventsBranchName="kafka_base_tweets"
driverFactoryId="kafka"/>
<!-- Branch for tweets -->
<branch>
<pipeline key="kafka_base_tweets">
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka_base_tweets">
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.TweetThriftVersionedEventsKafkaProducerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName="search_ingester_indexing_events_realtime_prod"
driverFactoryId="kafka_base_tweets"/>
</pipeline>
</branch>
<!-- Resolve compressed URL via Pink -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ResolveCompressedUrlsBatchedStage"
pinkClientId="INGESTER"
batchedStageBatchSize="10"
tweetMaxAgeToResolve="10000"
driverFactoryId="kafka"/>
<!-- Retrieve card information -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveCardBatchedStage"
tweetypieClientId="ingester.prod"
internalBatchSize="50"
driverFactoryId="kafka"/>
<!-- Retrieve named entities -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveNamedEntitiesSingleTweetStage"
driverFactoryId="kafka"/>
<!-- retrieves space admins and title for a tweet if the tweet has space urls -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveSpaceAdminsAndTitleStage"
driverFactoryId="kafka"/>
<!-- extract text features of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextUrlsFeatureExtractionStage"
driverFactoryId="kafka"/>
<!-- Compute the tweet signature -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ComputeTweetSignatureStage"
driverFactoryId="kafka"/>
<!-- Parse the TwitterMessages into ThriftStatuses -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ConvertDelayedMessageToThriftStage"
driverFactoryId="kafka"/>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.TweetThriftVersionedEventsKafkaProducerStage"
kafkaClusterPath=""
stageName="UpdateEvents"
kafkaClientId=""
kafkaTopicName=""
driverFactoryId="kafka"/>
</pipeline>

View File

@ -1,199 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!-- Ingesters process tweet create events from TweetyPie and write them to a queue for Earlybird
to index. -->
<pipeline>
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka">
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<!-- Read tweets from the thrift kafka queue. The reader loops forever. -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.KafkaRawRecordConsumerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName=""
kafkaConsumerGroupId=""
maxPollRecords="1"
pollTimeoutMs="1000"
partitioned="false"
deciderKey=""
driverFactoryId="kafka"/>
<!-- Deserialize the bytes into TweetData -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TweetEventDeserializerStage"
driverFactoryId="kafka"/>
<!-- Filter to only have the safetytype = PUBLIC or PROTECTED -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.FilterEventsBySafetyTypeStage"
tweetCreateLatencyLogThresholdMillis="5000"
safetyType="PUBLIC_OR_PROTECTED"
driverFactoryId="kafka"/>
<!-- Parse to TwitterMessage -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ThriftTweetParserStage"
tweetDeleteEventBranchNames="kafka_update_events_delete"
driverFactoryId="kafka"/>
<branch>
<pipeline key="kafka_update_events_delete">
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka_update_events_delete">
<!-- we are willing to queue more deletes than other stages,
to make sure we don't slow down the incoming tweets -->
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.DeleteUpdateEventsKafkaProducerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName=""
driverFactoryId="kafka_update_events_delete"/>
</pipeline>
</branch>
<!-- filters out messages that are not formatted correctly -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.FilterTwitterMessageStage"
driverFactoryId="kafka"/>
<!-- retrieves space ids from space urls if the tweet has space urls -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveSpaceIdsStage"
driverFactoryId="kafka"/>
<!-- looks up user reputation scores for each message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.LookupUserPropertiesBatchedStage"
driverFactoryId="kafka"/>
<!-- extract text features of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextFeatureExtractionWorkersStage"
driverFactoryId="kafka"/>
<!-- compute text quality score of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextQualityEvaluationWorkerStage"
driverFactoryId="kafka"/>
<!-- Extract lat/lon pairs from the text, and geocode them -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.SingleTweetExtractAndGeocodeLatLonStage"
driverFactoryId="kafka"/>
<!-- adds coded locations -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.PopulateCodedLocationsBatchedStage"
driverFactoryId="kafka"/>
<!-- Parse the TwitterMessages into ThriftStatuses -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ConvertMessageToThriftStage"
thriftVersionedEventsBranchName="kafka_base_tweets"
driverFactoryId="kafka"/>
<!-- Branch for tweets -->
<branch>
<pipeline key="kafka_base_tweets">
<property
propName="validator"
className="org.apache.commons.pipeline.validation.SimplePipelineValidator"/>
<listener
className="org.apache.commons.pipeline.listener.ObjectProcessedEventCounter"/>
<driverFactory
className="org.apache.commons.pipeline.driver.DedicatedThreadStageDriverFactory"
id="kafka_base_tweets">
<property
propName="queueFactory"
className="org.apache.commons.pipeline.util.BlockingQueueFactory$ArrayBlockingQueueFactory"
capacity="1000"
fair="false"/>
</driverFactory>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.TweetThriftVersionedEventsKafkaProducerStage"
kafkaClusterPath=""
kafkaClientId=""
kafkaTopicName=""
driverFactoryId="kafka_base_tweets"/>
</pipeline>
</branch>
<!-- Resolve compressed URL via Pink -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ResolveCompressedUrlsBatchedStage"
pinkClientId="INGESTER"
batchedStageBatchSize="10"
tweetMaxAgeToResolve="10000"
driverFactoryId="kafka"/>
<!-- Retrieve card information -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveCardBatchedStage"
tweetypieClientId=""
internalBatchSize="50"
driverFactoryId="kafka"/>
<!-- Retrieve named entities -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveNamedEntitiesSingleTweetStage"
driverFactoryId="kafka"/>
<!-- retrieves space admins and title for a tweet if the tweet has space urls -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.RetrieveSpaceAdminsAndTitleStage"
driverFactoryId="kafka"/>
<!-- extract text features of the message -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.TextUrlsFeatureExtractionStage"
driverFactoryId="kafka"/>
<!-- Compute the tweet signature -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ComputeTweetSignatureStage"
driverFactoryId="kafka"/>
<!-- Parse the TwitterMessages into ThriftStatuses -->
<stage
className="com.twitter.search.ingester.pipeline.twitter.ConvertDelayedMessageToThriftStage"
driverFactoryId="kafka"/>
<stage
className="com.twitter.search.ingester.pipeline.twitter.kafka.TweetThriftVersionedEventsKafkaProducerStage"
kafkaClusterPath=""
stageName="UpdateEvents"
kafkaClientId=""
kafkaTopicName=""
driverFactoryId="kafka"/>
</pipeline>