mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-02 01:08:47 +02:00
ef4c5eb65e
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
41 lines
1.5 KiB
Scala
41 lines
1.5 KiB
Scala
package com.twitter.product_mixer.component_library.filter
|
|
|
|
import com.twitter.product_mixer.core.functional_component.filter.Filter
|
|
import com.twitter.product_mixer.core.functional_component.filter.FilterResult
|
|
import com.twitter.product_mixer.core.model.common.CandidateWithFeatures
|
|
import com.twitter.product_mixer.core.model.common.UniversalNoun
|
|
import com.twitter.product_mixer.core.model.common.identifier.FilterIdentifier
|
|
import com.twitter.product_mixer.core.pipeline.PipelineQuery
|
|
import com.twitter.stitch.Stitch
|
|
import com.twitter.search.common.util.bloomfilter.AdaptiveLongIntBloomFilter
|
|
|
|
trait GetAdaptiveLongIntBloomFilter[Query <: PipelineQuery] {
|
|
def apply(query: Query): Option[AdaptiveLongIntBloomFilter]
|
|
}
|
|
|
|
case class AdaptiveLongIntBloomFilterDedupFilter[
|
|
Query <: PipelineQuery,
|
|
Candidate <: UniversalNoun[Long]
|
|
](
|
|
getBloomFilter: GetAdaptiveLongIntBloomFilter[Query])
|
|
extends Filter[Query, Candidate] {
|
|
|
|
override val identifier: FilterIdentifier = FilterIdentifier(
|
|
"AdaptiveLongIntBloomFilterDedupFilter")
|
|
|
|
override def apply(
|
|
query: Query,
|
|
candidates: Seq[CandidateWithFeatures[Candidate]]
|
|
): Stitch[FilterResult[Candidate]] = {
|
|
|
|
val filterResult = getBloomFilter(query)
|
|
.map { bloomFilter =>
|
|
val (kept, removed) =
|
|
candidates.map(_.candidate).partition(candidate => !bloomFilter.contains(candidate.id))
|
|
FilterResult(kept, removed)
|
|
}.getOrElse(FilterResult(candidates.map(_.candidate), Seq.empty))
|
|
|
|
Stitch.value(filterResult)
|
|
}
|
|
}
|