the-algorithm/timelines/data_processing/ml_util/aggregation_framework/job/DataRecordFeatureCounter.scala
twitter-team 197bf2c563 Open-sourcing Timelines Aggregation Framework
Open sourcing Aggregation Framework, a config-driven Summingbird based framework for generating real-time and batch aggregate features to be consumed by ML models.
2023-04-28 14:17:02 -05:00

40 lines
1.4 KiB
Scala

package com.twitter.timelines.data_processing.ml_util.aggregation_framework.job
import com.twitter.ml.api.DataRecord
import com.twitter.summingbird.Counter
/**
* A summingbird Counter which is associated with a predicate which operates on
* [[com.twitter.ml.api.DataRecord]] instances.
*
* For example, for a data record which represents a Tweet, one could define a predicate
* which checks whether the Tweet contains a binary feature representing the presence of
* an image. The counter can then be used to represent the the count of Tweets with
* images processed.
*
* @param predicate a predicate which gates the counter
* @param counter a summingbird Counter instance
*/
case class DataRecordFeatureCounter(predicate: DataRecord => Boolean, counter: Counter)
object DataRecordFeatureCounter {
/**
* Increments the counter if the record satisfies the predicate
*
* @param recordCounter a data record counter
* @param record a data record
*/
def apply(recordCounter: DataRecordFeatureCounter, record: DataRecord): Unit =
if (recordCounter.predicate(record)) recordCounter.counter.incr()
/**
* Defines a feature counter with a predicate that is always true
*
* @param counter a summingbird Counter instance
* @return a data record counter
*/
def any(counter: Counter): DataRecordFeatureCounter =
DataRecordFeatureCounter({ _: DataRecord => true }, counter)
}