the-algorithm/timelines/data_processing/ml_util/aggregation_framework/job/DataRecordFeatureCounter.scala

package com.twitter.timelines.data_processing.ml_util.aggregation_framework.job

import com.twitter.ml.api.DataRecord
import com.twitter.summingbird.Counter

/**
 * A summingbird Counter which is associated with a predicate which operates on
 * [[com.twitter.ml.api.DataRecord]] instances.
 *
 * For example, for a data record which represents a Tweet, one could define a predicate
 * which checks whether the Tweet contains a binary feature representing the presence of
 * an image. The counter can then be used to represent the the count of Tweets with
 * images processed.
 *
 * @param predicate a predicate which gates the counter
 * @param counter a summingbird Counter instance
 */
case class DataRecordFeatureCounter(predicate: DataRecord => Boolean, counter: Counter)

object DataRecordFeatureCounter {

  /**
   * Increments the counter if the record satisfies the predicate
   *
   * @param recordCounter a data record counter
   * @param record a data record
   */
  def apply(recordCounter: DataRecordFeatureCounter, record: DataRecord): Unit =
    if (recordCounter.predicate(record)) recordCounter.counter.incr()

  /**
   * Defines a feature counter with a predicate that is always true
   *
   * @param counter a summingbird Counter instance
   * @return a data record counter
   */
  def any(counter: Counter): DataRecordFeatureCounter =
    DataRecordFeatureCounter({ _: DataRecord => true }, counter)
}
Open-sourcing Timelines Aggregation Framework Open sourcing Aggregation Framework, a config-driven Summingbird based framework for generating real-time and batch aggregate features to be consumed by ML models. 2023-04-27 22:58:07 +02:00			`package com.twitter.timelines.data_processing.ml_util.aggregation_framework.job`

			`import com.twitter.ml.api.DataRecord`
			`import com.twitter.summingbird.Counter`

			`/**`
			`* A summingbird Counter which is associated with a predicate which operates on`
			`* [[com.twitter.ml.api.DataRecord]] instances.`
			`*`
			`* For example, for a data record which represents a Tweet, one could define a predicate`
			`* which checks whether the Tweet contains a binary feature representing the presence of`
			`* an image. The counter can then be used to represent the the count of Tweets with`
			`* images processed.`
			`*`
			`* @param predicate a predicate which gates the counter`
			`* @param counter a summingbird Counter instance`
			`*/`
			`case class DataRecordFeatureCounter(predicate: DataRecord => Boolean, counter: Counter)`

			`object DataRecordFeatureCounter {`

			`/**`
			`* Increments the counter if the record satisfies the predicate`
			`*`
			`* @param recordCounter a data record counter`
			`* @param record a data record`
			`*/`
			`def apply(recordCounter: DataRecordFeatureCounter, record: DataRecord): Unit =`
			`if (recordCounter.predicate(record)) recordCounter.counter.incr()`

			`/**`
			`* Defines a feature counter with a predicate that is always true`
			`*`
			`* @param counter a summingbird Counter instance`
			`* @return a data record counter`
			`*/`
			`def any(counter: Counter): DataRecordFeatureCounter =`
			`DataRecordFeatureCounter({ _: DataRecord => true }, counter)`
			`}`