the-algorithm/src/scala/com/twitter/timelines/prediction/features/simcluster/SimclusterFeatures.scala

62 lines
2.3 KiB
Scala

package com.twitter.timelines.prediction.features.simcluster
import com.twitter.dal.personal_data.thriftjava.PersonalDataType._
import com.twitter.finagle.stats.StatsReceiver
import com.twitter.ml.api.Feature._
import com.twitter.simclusters_v2.thriftscala.ClustersUserIsInterestedIn
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.TypedAggregateGroup
import scala.collection.JavaConverters._
class SimclusterFeaturesHelper(statsReceiver: StatsReceiver) {
import SimclusterFeatures._
private[this] val scopedStatsReceiver = statsReceiver.scope(getClass.getSimpleName)
private[this] val invalidSimclusterModelVersion = scopedStatsReceiver
.counter("invalidSimclusterModelVersion")
def fromUserClusterInterestsPair(
userInterestClustersPair: (Long, ClustersUserIsInterestedIn)
): Option[SimclusterFeatures] = {
val (userId, userInterestClusters) = userInterestClustersPair
if (userInterestClusters.knownForModelVersion == SIMCLUSTER_MODEL_VERSION) {
val userInterestClustersFavScores = for {
(clusterId, scores) <- userInterestClusters.clusterIdToScores
favScore <- scores.favScore
} yield (clusterId.toString, favScore)
Some(
SimclusterFeatures(
userId,
userInterestClusters.knownForModelVersion,
userInterestClustersFavScores.toMap
)
)
} else {
// We maintain this counter to make sure that the hardcoded modelVersion we are using is correct.
invalidSimclusterModelVersion.incr
None
}
}
}
object SimclusterFeatures {
// Check http://go/simclustersv2runbook for production versions
// Our models are trained for this specific model version only.
val SIMCLUSTER_MODEL_VERSION = "20M_145K_dec11"
val prefix = s"simcluster.v2.$SIMCLUSTER_MODEL_VERSION"
val SIMCLUSTER_USER_INTEREST_CLUSTER_SCORES = new SparseContinuous(
s"$prefix.user_interest_cluster_scores",
Set(EngagementScore, InferredInterests).asJava
)
val SIMCLUSTER_USER_INTEREST_CLUSTER_IDS = new SparseBinary(
s"$prefix.user_interest_cluster_ids",
Set(InferredInterests).asJava
)
val SIMCLUSTER_MODEL_VERSION_METADATA = new Text("meta.simcluster_version")
}
case class SimclusterFeatures(
userId: Long,
modelVersion: String,
interestClusterScoresMap: Map[String, Double])