mirror of
https://github.com/twitter/the-algorithm.git
synced 2025-01-07 01:48:16 +01:00
Compare commits
4 Commits
b572e6b1ba
...
6977e97325
Author | SHA1 | Date | |
---|---|---|---|
|
6977e97325 | ||
|
645342ba2f | ||
|
376f35f08f | ||
|
573fab9526 |
@ -102,23 +102,23 @@ object KnownForSources {
|
|||||||
TypedPipe
|
TypedPipe
|
||||||
.from(TextLine(textFile))
|
.from(TextLine(textFile))
|
||||||
.flatMap { str =>
|
.flatMap { str =>
|
||||||
if (!str.startsWith("#")) {
|
str match {
|
||||||
try {
|
case s"#$_" => None
|
||||||
|
case _ => try {
|
||||||
val tokens = str.trim.split("\\s+")
|
val tokens = str.trim.split("\\s+")
|
||||||
val res = Array.newBuilder[(Int, Float)]
|
|
||||||
val userId = tokens(0).toLong
|
val userId = tokens(0).toLong
|
||||||
for (i <- 1 until tokens.length) {
|
(1 until tokens.length).foldRight(Array.newBuilder[(Int, Float)])((i, r) => {
|
||||||
val Array(cIdStr, scoreStr) = tokens(i).split(":")
|
val Array(cIdStr, scoreStr) = tokens(i).split(":")
|
||||||
val clusterId = cIdStr.toInt
|
val clusterId = cIdStr.toInt
|
||||||
val score = scoreStr.toFloat
|
val score = scoreStr.toFloat
|
||||||
val newEntry = (clusterId, score)
|
val newEntry = (clusterId, score)
|
||||||
res += newEntry
|
r += newEntry
|
||||||
|
}).result() match {
|
||||||
|
case (res) if res.nonEmpty => Some((userId, res.result()))
|
||||||
|
_ => None
|
||||||
}
|
}
|
||||||
val result = res.result
|
}
|
||||||
if (result.nonEmpty) {
|
catch {
|
||||||
Some((userId, res.result()))
|
|
||||||
} else None
|
|
||||||
} catch {
|
|
||||||
case ex: Throwable =>
|
case ex: Throwable =>
|
||||||
log.warning(
|
log.warning(
|
||||||
s"Error while loading knownFor from $textFile for line <$str>: " +
|
s"Error while loading knownFor from $textFile for line <$str>: " +
|
||||||
@ -126,7 +126,7 @@ object KnownForSources {
|
|||||||
)
|
)
|
||||||
None
|
None
|
||||||
}
|
}
|
||||||
} else None
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user