package com.twitter.timelines.data_processing.ml_util.aggregation_framework.conversion

import com.google.common.annotations.VisibleForTesting
import com.twitter.ml.api.util.SRichDataRecord
import com.twitter.ml.api.FeatureContext
import com.twitter.ml.api._
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.AggregationMetricCommon
import com.twitter.timelines.data_processing.ml_util.aggregation_framework.metrics.TypedCountMetric
import java.lang.{Double => JDouble}
import scala.collection.JavaConverters._

case class CombinedFeatures(
  sum: Feature[JDouble],
  nonzero: Feature[JDouble],
  mean: Feature[JDouble],
  topK: Seq[Feature[JDouble]])

trait CombineCountsBase {
  val SparseSum = "sparse_sum"
  val SparseNonzero = "sparse_nonzero"
  val SparseMean = "sparse_mean"
  val SparseTop = "sparse_top"

  def topK: Int
  def hardLimit: Option[Int]
  def precomputedCountFeatures: Seq[Feature[_]]

  lazy val precomputedFeaturesMap: Map[Feature[_], CombinedFeatures] =
    precomputedCountFeatures.map { countFeature =>
      val derivedPersonalDataTypes =
        AggregationMetricCommon.derivePersonalDataTypes(Some(countFeature))
      val sum = new Feature.Continuous(
        countFeature.getDenseFeatureName + "." + SparseSum,
        derivedPersonalDataTypes)
      val nonzero = new Feature.Continuous(
        countFeature.getDenseFeatureName + "." + SparseNonzero,
        derivedPersonalDataTypes)
      val mean = new Feature.Continuous(
        countFeature.getDenseFeatureName + "." + SparseMean,
        derivedPersonalDataTypes)
      val topKFeatures = (1 to topK).map { k =>
        new Feature.Continuous(
          countFeature.getDenseFeatureName + "." + SparseTop + k,
          derivedPersonalDataTypes)
      }
      (countFeature, CombinedFeatures(sum, nonzero, mean, topKFeatures))
    }.toMap

  lazy val outputFeaturesPostMerge: Set[Feature[JDouble]] =
    precomputedFeaturesMap.values.flatMap { combinedFeatures: CombinedFeatures =>
      Seq(
        combinedFeatures.sum,
        combinedFeatures.nonzero,
        combinedFeatures.mean
      ) ++ combinedFeatures.topK
    }.toSet

  private case class ComputedStats(sum: Double, nonzero: Double, mean: Double)

  private def preComputeStats(featureValues: Seq[Double]): ComputedStats = {
    val (sum, nonzero) = featureValues.foldLeft((0.0, 0.0)) {
      case ((accSum, accNonzero), value) =>
        (accSum + value, if (value > 0.0) accNonzero + 1.0 else accNonzero)
    }
    ComputedStats(sum, nonzero, if (nonzero > 0.0) sum / nonzero else 0.0)
  }

  private def computeSortedFeatureValues(featureValues: List[Double]): List[Double] =
    featureValues.sortBy(-_)

  private def extractKth(sortedFeatureValues: Seq[Double], k: Int): Double =
    sortedFeatureValues
      .lift(k - 1)
      .getOrElse(0.0)

  private def setContinuousFeatureIfNonZero(
    record: SRichDataRecord,
    feature: Feature[JDouble],
    value: Double
  ): Unit =
    if (value != 0.0) {
      record.setFeatureValue(feature, value)
    }

  def hydrateCountFeatures(
    richRecord: SRichDataRecord,
    features: Seq[Feature[_]],
    featureValuesMap: Map[Feature[_], List[Double]]
  ): Unit =
    for {
      feature <- features
      featureValues <- featureValuesMap.get(feature)
    } {
      mergeRecordFromCountFeature(
        countFeature = feature,
        featureValues = featureValues,
        richInputRecord = richRecord
      )
    }

  def mergeRecordFromCountFeature(
    richInputRecord: SRichDataRecord,
    countFeature: Feature[_],
    featureValues: List[Double]
  ): Unit = {
    // In majority of calls to this method from timeline scorer
    // the featureValues list is empty.
    // While with empty list each operation will be not that expensive, these
    // small things do add up. By adding early stop here we can avoid sorting
    // empty list, allocating several options and making multiple function
    // calls. In addition to that, we won't iterate over [1, topK].
    if (featureValues.nonEmpty) {
      val sortedFeatureValues = hardLimit
        .map { limit =>
          computeSortedFeatureValues(featureValues).take(limit)
        }.getOrElse(computeSortedFeatureValues(featureValues)).toIndexedSeq
      val computed = preComputeStats(sortedFeatureValues)

      val combinedFeatures = precomputedFeaturesMap(countFeature)
      setContinuousFeatureIfNonZero(
        richInputRecord,
        combinedFeatures.sum,
        computed.sum
      )
      setContinuousFeatureIfNonZero(
        richInputRecord,
        combinedFeatures.nonzero,
        computed.nonzero
      )
      setContinuousFeatureIfNonZero(
        richInputRecord,
        combinedFeatures.mean,
        computed.mean
      )
      (1 to topK).foreach { k =>
        setContinuousFeatureIfNonZero(
          richInputRecord,
          combinedFeatures.topK(k - 1),
          extractKth(sortedFeatureValues, k)
        )
      }
    }
  }
}

object CombineCountsPolicy {
  def getCountFeatures(aggregateContext: FeatureContext): Seq[Feature[_]] =
    aggregateContext.getAllFeatures.asScala.toSeq
      .filter { feature =>
        feature.getFeatureType == FeatureType.CONTINUOUS &&
        feature.getDenseFeatureName.endsWith(TypedCountMetric[JDouble]().operatorName)
      }

  @VisibleForTesting
  private[conversion] def getFeatureValues(
    dataRecordsWithCounts: List[DataRecord],
    countFeature: Feature[_]
  ): List[Double] =
    dataRecordsWithCounts.map(new SRichDataRecord(_)).flatMap { record =>
      Option(record.getFeatureValue(countFeature)).map(_.asInstanceOf[JDouble].toDouble)
    }
}

/**
 * A merge policy that works whenever all aggregate features are
 * counts (computed using CountMetric), and typically represent
 * either impressions or engagements. For each such input count
 * feature, the policy outputs the following (3+k) derived features
 * into the output data record:
 *
 * Sum of the feature's value across all aggregate records
 * Number of aggregate records that have the feature set to non-zero
 * Mean of the feature's value across all aggregate records
 * topK values of the feature across all aggregate records
 *
 * @param topK topK values to compute
 * @param hardLimit when set, records are sorted and only the top values will be used for aggregation if
 *                  the number of records are higher than this hard limit.
 */
case class CombineCountsPolicy(
  override val topK: Int,
  aggregateContextToPrecompute: FeatureContext,
  override val hardLimit: Option[Int] = None)
    extends SparseBinaryMergePolicy
    with CombineCountsBase {
  import CombineCountsPolicy._
  override val precomputedCountFeatures: Seq[Feature[_]] = getCountFeatures(
    aggregateContextToPrecompute)

  override def mergeRecord(
    mutableInputRecord: DataRecord,
    aggregateRecords: List[DataRecord],
    aggregateContext: FeatureContext
  ): Unit = {
    // Assumes aggregateContext === aggregateContextToPrecompute
    mergeRecordFromCountFeatures(mutableInputRecord, aggregateRecords, precomputedCountFeatures)
  }

  def defaultMergeRecord(
    mutableInputRecord: DataRecord,
    aggregateRecords: List[DataRecord]
  ): Unit = {
    mergeRecordFromCountFeatures(mutableInputRecord, aggregateRecords, precomputedCountFeatures)
  }

  def mergeRecordFromCountFeatures(
    mutableInputRecord: DataRecord,
    aggregateRecords: List[DataRecord],
    countFeatures: Seq[Feature[_]]
  ): Unit = {
    val richInputRecord = new SRichDataRecord(mutableInputRecord)
    countFeatures.foreach { countFeature =>
      mergeRecordFromCountFeature(
        richInputRecord = richInputRecord,
        countFeature = countFeature,
        featureValues = getFeatureValues(aggregateRecords, countFeature)
      )
    }
  }

  override def aggregateFeaturesPostMerge(aggregateContext: FeatureContext): Set[Feature[_]] =
    outputFeaturesPostMerge.map(_.asInstanceOf[Feature[_]])
}