mirror of
https://github.com/twitter/the-algorithm.git
synced 2024-06-01 08:48:46 +02:00
ef4c5eb65e
Please note we have force-pushed a new initial commit in order to remove some publicly-available Twitter user information. Note that this process may be required in the future.
33 lines
882 B
Scala
33 lines
882 B
Scala
package com.twitter.simclusters_v2.common.clustering
|
|
|
|
import com.twitter.simclusters_v2.common.SimClustersEmbedding
|
|
|
|
/**
|
|
* SimilarityFunctions provide commonly used similarity functions that this clustering library needs.
|
|
*/
|
|
object SimilarityFunctions {
|
|
def simClustersCosineSimilarity: (SimClustersEmbedding, SimClustersEmbedding) => Double =
|
|
(e1, e2) => e1.cosineSimilarity(e2)
|
|
|
|
def simClustersMatchingLargestDimension: (
|
|
SimClustersEmbedding,
|
|
SimClustersEmbedding
|
|
) => Double = (e1, e2) => {
|
|
val doesMatchLargestDimension: Boolean = e1
|
|
.topClusterIds(1)
|
|
.exists { id1 =>
|
|
e2.topClusterIds(1).contains(id1)
|
|
}
|
|
|
|
if (doesMatchLargestDimension) 1.0
|
|
else 0.0
|
|
}
|
|
|
|
def simClustersFuzzyJaccardSimilarity: (
|
|
SimClustersEmbedding,
|
|
SimClustersEmbedding
|
|
) => Double = (e1, e2) => {
|
|
e1.fuzzyJaccardSimilarity(e2)
|
|
}
|
|
}
|