mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-27 13:36:03 +02:00
ef4c5eb65e
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
38 lines
1.5 KiB
Scala
38 lines
1.5 KiB
Scala
package com.twitter.home_mixer.functional_component.filter
|
|
|
|
import com.twitter.home_mixer.util.CandidatesUtil
|
|
import com.twitter.home_mixer.util.TweetImpressionsHelper
|
|
import com.twitter.product_mixer.component_library.model.candidate.TweetCandidate
|
|
import com.twitter.product_mixer.core.functional_component.filter.Filter
|
|
import com.twitter.product_mixer.core.functional_component.filter.FilterResult
|
|
import com.twitter.product_mixer.core.model.common.CandidateWithFeatures
|
|
import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier
|
|
import com.twitter.product_mixer.core.pipeline.PipelineQuery
|
|
import com.twitter.stitch.Stitch
|
|
|
|
/**
|
|
* Filter out users' previously seen tweets from 2 sources:
|
|
* 1. Heron Topology Impression Store in Memcache;
|
|
* 2. Manhattan Impression Store;
|
|
*/
|
|
object PreviouslySeenTweetsFilter extends Filter[PipelineQuery, TweetCandidate] {
|
|
|
|
override val identifier: FilterIdentifier = FilterIdentifier("PreviouslySeenTweets")
|
|
|
|
override def apply(
|
|
query: PipelineQuery,
|
|
candidates: Seq[CandidateWithFeatures[TweetCandidate]]
|
|
): Stitch[FilterResult[TweetCandidate]] = {
|
|
|
|
val seenTweetIds =
|
|
query.features.map(TweetImpressionsHelper.tweetImpressions).getOrElse(Set.empty)
|
|
|
|
val (removed, kept) = candidates.partition { candidate =>
|
|
val tweetIdAndSourceId = CandidatesUtil.getTweetIdAndSourceId(candidate)
|
|
tweetIdAndSourceId.exists(seenTweetIds.contains)
|
|
}
|
|
|
|
Stitch.value(FilterResult(kept = kept.map(_.candidate), removed = removed.map(_.candidate)))
|
|
}
|
|
}
|