mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-02 09:18:49 +02:00
ef4c5eb65e
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
36 lines
1.1 KiB
Scala
36 lines
1.1 KiB
Scala
package com.twitter.recos.user_tweet_graph.util
|
|
|
|
import com.twitter.graphjet.bipartite.MultiSegmentIterator
|
|
import com.twitter.graphjet.bipartite.api.BipartiteGraph
|
|
import com.twitter.graphjet.bipartite.segment.BipartiteGraphSegment
|
|
import java.util.Random
|
|
import scala.collection.mutable.ListBuffer
|
|
|
|
object SampleLHSUsersUtil {
|
|
// sample userId nodes
|
|
def sampleLHSUsers(
|
|
maskedTweetId: Long,
|
|
maxNumSamplesPerNeighbor: Int,
|
|
bipartiteGraph: BipartiteGraph
|
|
): Seq[Long] = {
|
|
val sampledUserIdsIterator = bipartiteGraph
|
|
.getRandomRightNodeEdges(
|
|
maskedTweetId,
|
|
maxNumSamplesPerNeighbor,
|
|
new Random(System.currentTimeMillis)).asInstanceOf[MultiSegmentIterator[
|
|
BipartiteGraphSegment
|
|
]]
|
|
|
|
val userIds = new ListBuffer[Long]()
|
|
if (sampledUserIdsIterator != null) {
|
|
while (sampledUserIdsIterator.hasNext) {
|
|
val leftNode = sampledUserIdsIterator.nextLong()
|
|
// If a user likes too many things, we risk including spammy behavior.
|
|
if (bipartiteGraph.getLeftNodeDegree(leftNode) < 100)
|
|
userIds += leftNode
|
|
}
|
|
}
|
|
userIds
|
|
}
|
|
}
|