the-algorithm/timelines/data_processing/ml_util/aggregation_framework/conversion/PickFirstRecordPolicy.scala
twitter-team 197bf2c563 Open-sourcing Timelines Aggregation Framework
Open sourcing Aggregation Framework, a config-driven Summingbird based framework for generating real-time and batch aggregate features to be consumed by ML models.
2023-04-28 14:17:02 -05:00

27 lines
1015 B
Scala

package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion
import com.twitter.ml.api._
import com.twitter.ml.api.FeatureContext
import scala.collection.JavaConverters._
/*
* A really bad default merge policy that picks all the aggregate
* features corresponding to the first sparse key value in the list.
* Does not rename any of the aggregate features for simplicity.
* Avoid using this merge policy if at all possible.
*/
object PickFirstRecordPolicy extends SparseBinaryMergePolicy {
val dataRecordMerger: DataRecordMerger = new DataRecordMerger
override def mergeRecord(
mutableInputRecord: DataRecord,
aggregateRecords: List[DataRecord],
aggregateContext: FeatureContext
): Unit =
aggregateRecords.headOption
.foreach(aggregateRecord => dataRecordMerger.merge(mutableInputRecord, aggregateRecord))
override def aggregateFeaturesPostMerge(aggregateContext: FeatureContext): Set[Feature[_]] =
aggregateContext.getAllFeatures.asScala.toSet
}