mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-01 08:48:46 +02:00
20 lines
943 B
Scala
20 lines
943 B
Scala
|
package com.twitter.timelines.prediction.features.escherbird
|
||
|
|
||
|
import com.twitter.tweetypie.thriftscala.Tweet
|
||
|
import scala.collection.JavaConverters._
|
||
|
|
||
|
object EscherbirdFeaturesConverter {
|
||
|
val DeprecatedOrTestDomains = Set(1L, 5L, 7L, 9L, 14L, 19L, 20L, 31L)
|
||
|
|
||
|
def fromTweet(tweet: Tweet): Option[EscherbirdFeatures] = tweet.escherbirdEntityAnnotations.map {
|
||
|
escherbirdEntityAnnotations =>
|
||
|
val annotations = escherbirdEntityAnnotations.entityAnnotations
|
||
|
.filterNot(annotation => DeprecatedOrTestDomains.contains(annotation.domainId))
|
||
|
val tweetGroupIds = annotations.map(_.groupId.toString).toSet.asJava
|
||
|
val tweetDomainIds = annotations.map(_.domainId.toString).toSet.asJava
|
||
|
// An entity is only unique within a given domain
|
||
|
val tweetEntityIds = annotations.map(a => s"${a.domainId}.${a.entityId}").toSet.asJava
|
||
|
EscherbirdFeatures(tweet.id, tweetGroupIds, tweetDomainIds, tweetEntityIds)
|
||
|
}
|
||
|
}
|