the-algorithm/home-mixer/server/src/main/scala/com/twitter/home_mixer/functional_component/filter/InvalidConversationModuleFilter.scala
twitter-team ef4c5eb65e Twitter Recommendation Algorithm
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
2023-03-31 17:36:31 -05:00

51 lines
2.4 KiB
Scala

package com.twitter.home_mixer.functional_component.filter
import com.twitter.home_mixer.model.HomeFeatures.ConversationModuleFocalTweetIdFeature
import com.twitter.home_mixer.model.HomeFeatures.InReplyToTweetIdFeature
import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate
import com.twitter.product_mixer.core.functional_component.filter.Filter
import com.twitter.product_mixer.core.functional_component.filter.FilterResult
import com.twitter.product_mixer.core.model.common.CandidateWithFeatures
import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier
import com.twitter.product_mixer.core.pipeline.PipelineQuery
import com.twitter.stitch.Stitch
/**
* Exclude conversation modules where Tweets have been dropped by other filters
*
* Largest conversation modules have 3 Tweets, so if all 3 are present, module is valid.
* For 2 Tweet modules, check if the head is the root (not a reply) and the last item
* is actually replying to the root directly with no missing intermediate tweets
*/
object InvalidConversationModuleFilter extends Filter[PipelineQuery, TweetCandidate] {
override val identifier: FilterIdentifier = FilterIdentifier("InvalidConversationModule")
val ValidThreeTweetModuleSize = 3
val ValidTwoTweetModuleSize = 2
override def apply(
query: PipelineQuery,
candidates: Seq[CandidateWithFeatures[TweetCandidate]]
): Stitch[FilterResult[TweetCandidate]] = {
val allowedTweetIds = candidates
.groupBy(_.features.getOrElse(ConversationModuleFocalTweetIdFeature, None))
.map { case (id, candidates) => (id, candidates.sortBy(_.candidate.id)) }
.filter {
case (Some(_), conversation) if conversation.size == ValidThreeTweetModuleSize => true
case (Some(focalId), conversation) if conversation.size == ValidTwoTweetModuleSize =>
conversation.head.features.getOrElse(InReplyToTweetIdFeature, None).isEmpty &&
conversation.last.candidate.id == focalId &&
conversation.last.features
.getOrElse(InReplyToTweetIdFeature, None)
.contains(conversation.head.candidate.id)
case (None, _) => true
case _ => false
}.values.flatten.toSeq.map(_.candidate.id).toSet
val (kept, removed) =
candidates.map(_.candidate).partition(candidate => allowedTweetIds.contains(candidate.id))
Stitch.value(FilterResult(kept = kept, removed = removed))
}
}