diff --git a/src/scala/com/twitter/simclusters_v2/scalding/KnownForSources.scala b/src/scala/com/twitter/simclusters_v2/scalding/KnownForSources.scala index 217f521ac..5f96df6a5 100644 --- a/src/scala/com/twitter/simclusters_v2/scalding/KnownForSources.scala +++ b/src/scala/com/twitter/simclusters_v2/scalding/KnownForSources.scala @@ -102,23 +102,23 @@ object KnownForSources { TypedPipe .from(TextLine(textFile)) .flatMap { str => - if (!str.startsWith("#")) { - try { + str match { + case s"#$_" => None + case _ => try { val tokens = str.trim.split("\\s+") - val res = Array.newBuilder[(Int, Float)] val userId = tokens(0).toLong - for (i <- 1 until tokens.length) { + (1 until tokens.length).foldRight(Array.newBuilder[(Int, Float)])((i, r) => { val Array(cIdStr, scoreStr) = tokens(i).split(":") val clusterId = cIdStr.toInt val score = scoreStr.toFloat val newEntry = (clusterId, score) - res += newEntry + r += newEntry + }).result() match { + case (res) if res.nonEmpty => Some((userId, res.result())) + _ => None } - val result = res.result - if (result.nonEmpty) { - Some((userId, res.result())) - } else None - } catch { + } + catch { case ex: Throwable => log.warning( s"Error while loading knownFor from $textFile for line <$str>: " + @@ -126,7 +126,7 @@ object KnownForSources { ) None } - } else None + } } }