mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-07-07 02:11:45 +02:00
40 lines
1.4 KiB
Scala
40 lines
1.4 KiB
Scala
![]() |
package com.twitter.timelines.data_processing.ml_util.aggregation_framework.job
|
||
|
|
||
|
import com.twitter.ml.api.DataRecord
|
||
|
import com.twitter.summingbird.Counter
|
||
|
|
||
|
/**
|
||
|
* A summingbird Counter which is associated with a predicate which operates on
|
||
|
* [[com.twitter.ml.api.DataRecord]] instances.
|
||
|
*
|
||
|
* For example, for a data record which represents a Tweet, one could define a predicate
|
||
|
* which checks whether the Tweet contains a binary feature representing the presence of
|
||
|
* an image. The counter can then be used to represent the the count of Tweets with
|
||
|
* images processed.
|
||
|
*
|
||
|
* @param predicate a predicate which gates the counter
|
||
|
* @param counter a summingbird Counter instance
|
||
|
*/
|
||
|
case class DataRecordFeatureCounter(predicate: DataRecord => Boolean, counter: Counter)
|
||
|
|
||
|
object DataRecordFeatureCounter {
|
||
|
|
||
|
/**
|
||
|
* Increments the counter if the record satisfies the predicate
|
||
|
*
|
||
|
* @param recordCounter a data record counter
|
||
|
* @param record a data record
|
||
|
*/
|
||
|
def apply(recordCounter: DataRecordFeatureCounter, record: DataRecord): Unit =
|
||
|
if (recordCounter.predicate(record)) recordCounter.counter.incr()
|
||
|
|
||
|
/**
|
||
|
* Defines a feature counter with a predicate that is always true
|
||
|
*
|
||
|
* @param counter a summingbird Counter instance
|
||
|
* @return a data record counter
|
||
|
*/
|
||
|
def any(counter: Counter): DataRecordFeatureCounter =
|
||
|
DataRecordFeatureCounter({ _: DataRecord => true }, counter)
|
||
|
}
|