mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-01 08:48:46 +02:00
ef4c5eb65e
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
40 lines
1.3 KiB
Scala
40 lines
1.3 KiB
Scala
package com.twitter.simclusters_v2.hdfs_sources
|
|
|
|
import com.twitter.scalding.DateOps
|
|
import com.twitter.scalding.DateRange
|
|
import com.twitter.scalding.Days
|
|
import com.twitter.scalding.TypedPipe
|
|
import com.twitter.scalding_internal.dalv2.DAL
|
|
import com.twitter.scalding_internal.dalv2.remote_access.ExplicitLocation
|
|
import com.twitter.scalding_internal.dalv2.remote_access.ProcAtla
|
|
import com.twitter.simclusters_v2.thriftscala.NormsAndCounts
|
|
import com.twitter.simclusters_v2.thriftscala.UserAndNeighbors
|
|
import java.util.TimeZone
|
|
|
|
object DataSources {
|
|
|
|
/**
|
|
* Reads production normalized graph data from atla-proc
|
|
*/
|
|
def userUserNormalizedGraphSource(implicit dateRange: DateRange): TypedPipe[UserAndNeighbors] = {
|
|
DAL
|
|
.readMostRecentSnapshotNoOlderThan(UserUserNormalizedGraphScalaDataset, Days(14)(DateOps.UTC))
|
|
.withRemoteReadPolicy(ExplicitLocation(ProcAtla))
|
|
.toTypedPipe
|
|
}
|
|
|
|
/**
|
|
* Reads production user norms and counts data from atla-proc
|
|
*/
|
|
def userNormsAndCounts(
|
|
implicit dateRange: DateRange,
|
|
timeZone: TimeZone
|
|
): TypedPipe[NormsAndCounts] = {
|
|
DAL
|
|
.readMostRecentSnapshot(ProducerNormsAndCountsScalaDataset, dateRange.prepend(Days(14)))
|
|
.withRemoteReadPolicy(ExplicitLocation(ProcAtla))
|
|
.toTypedPipe
|
|
}
|
|
|
|
}
|