mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-27 13:36:03 +02:00
ef4c5eb65e
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
51 lines
2.4 KiB
Scala
51 lines
2.4 KiB
Scala
package com.twitter.home_mixer.functional_component.filter
|
|
|
|
import com.twitter.home_mixer.model.HomeFeatures.ConversationModuleFocalTweetIdFeature
|
|
import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature
|
|
import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate
|
|
import com.twitter.product_mixer.core.functional_component.filter.Filter
|
|
import com.twitter.product_mixer.core.functional_component.filter.FilterResult
|
|
import com.twitter.product_mixer.core.model.common.CandidateWithFeatures
|
|
import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier
|
|
import com.twitter.product_mixer.core.pipeline.PipelineQuery
|
|
import com.twitter.stitch.Stitch
|
|
|
|
/**
|
|
* Exclude conversation modules where Tweets have been dropped by other filters
|
|
*
|
|
* Largest conversation modules have 3 Tweets, so if all 3 are present, module is valid.
|
|
* For 2 Tweet modules, check if the head is the root (not a reply) and the last item
|
|
* is actually replying to the root directly with no missing intermediate tweets
|
|
*/
|
|
object InvalidConversationModuleFilter extends Filter[PipelineQuery, TweetCandidate] {
|
|
|
|
override val identifier: FilterIdentifier = FilterIdentifier("InvalidConversationModule")
|
|
|
|
val ValidThreeTweetModuleSize = 3
|
|
val ValidTwoTweetModuleSize = 2
|
|
|
|
override def apply(
|
|
query: PipelineQuery,
|
|
candidates: Seq[CandidateWithFeatures[TweetCandidate]]
|
|
): Stitch[FilterResult[TweetCandidate]] = {
|
|
val allowedTweetIds = candidates
|
|
.groupBy(_.features.getOrElse(ConversationModuleFocalTweetIdFeature, None))
|
|
.map { case (id, candidates) => (id, candidates.sortBy(_.candidate.id)) }
|
|
.filter {
|
|
case (Some(_), conversation) if conversation.size == ValidThreeTweetModuleSize => true
|
|
case (Some(focalId), conversation) if conversation.size == ValidTwoTweetModuleSize =>
|
|
conversation.head.features.getOrElse(InReplyToTweetIdFeature, None).isEmpty &&
|
|
conversation.last.candidate.id == focalId &&
|
|
conversation.last.features
|
|
.getOrElse(InReplyToTweetIdFeature, None)
|
|
.contains(conversation.head.candidate.id)
|
|
case (None, _) => true
|
|
case _ => false
|
|
}.values.flatten.toSeq.map(_.candidate.id).toSet
|
|
|
|
val (kept, removed) =
|
|
candidates.map(_.candidate).partition(candidate => allowedTweetIds.contains(candidate.id))
|
|
Stitch.value(FilterResult(kept = kept, removed = removed))
|
|
}
|
|
}
|