mirror of
https://github.com/twitter/the-algorithm.git
synced 2025-01-05 00:51:55 +01:00
Compare commits
4 Commits
b572e6b1ba
...
6977e97325
Author | SHA1 | Date | |
---|---|---|---|
|
6977e97325 | ||
|
645342ba2f | ||
|
376f35f08f | ||
|
573fab9526 |
@ -102,23 +102,23 @@ object KnownForSources {
|
||||
TypedPipe
|
||||
.from(TextLine(textFile))
|
||||
.flatMap { str =>
|
||||
if (!str.startsWith("#")) {
|
||||
try {
|
||||
str match {
|
||||
case s"#$_" => None
|
||||
case _ => try {
|
||||
val tokens = str.trim.split("\\s+")
|
||||
val res = Array.newBuilder[(Int, Float)]
|
||||
val userId = tokens(0).toLong
|
||||
for (i <- 1 until tokens.length) {
|
||||
(1 until tokens.length).foldRight(Array.newBuilder[(Int, Float)])((i, r) => {
|
||||
val Array(cIdStr, scoreStr) = tokens(i).split(":")
|
||||
val clusterId = cIdStr.toInt
|
||||
val score = scoreStr.toFloat
|
||||
val newEntry = (clusterId, score)
|
||||
res += newEntry
|
||||
r += newEntry
|
||||
}).result() match {
|
||||
case (res) if res.nonEmpty => Some((userId, res.result()))
|
||||
_ => None
|
||||
}
|
||||
val result = res.result
|
||||
if (result.nonEmpty) {
|
||||
Some((userId, res.result()))
|
||||
} else None
|
||||
} catch {
|
||||
}
|
||||
catch {
|
||||
case ex: Throwable =>
|
||||
log.warning(
|
||||
s"Error while loading knownFor from $textFile for line <$str>: " +
|
||||
@ -126,7 +126,7 @@ object KnownForSources {
|
||||
)
|
||||
None
|
||||
}
|
||||
} else None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user