347 lines
12 KiB
Scala
347 lines
12 KiB
Scala
package com.twitter.tweetypie.storage
|
|
|
|
import com.twitter.mediaservices.commons.tweetmedia.thriftscala._
|
|
import com.twitter.scrooge.TFieldBlob
|
|
import com.twitter.tweetypie.additionalfields.AdditionalFields
|
|
import com.twitter.tweetypie.storage_internal.thriftscala._
|
|
import com.twitter.tweetypie.thriftscala._
|
|
import com.twitter.tweetypie.util.TweetLenses
|
|
|
|
object StorageConversions {
|
|
private val tbTweetCompiledAdditionalFieldIds =
|
|
StoredTweet.metaData.fields.map(_.id).filter(AdditionalFields.isAdditionalFieldId)
|
|
|
|
def toStoredReply(reply: Reply, conversationId: Option[TweetId]): StoredReply =
|
|
StoredReply(
|
|
inReplyToStatusId = reply.inReplyToStatusId.getOrElse(0),
|
|
inReplyToUserId = reply.inReplyToUserId,
|
|
conversationId = conversationId
|
|
)
|
|
|
|
def toStoredShare(share: Share): StoredShare =
|
|
StoredShare(
|
|
share.sourceStatusId,
|
|
share.sourceUserId,
|
|
share.parentStatusId
|
|
)
|
|
|
|
def toStoredQuotedTweet(qt: QuotedTweet, text: String): Option[StoredQuotedTweet] =
|
|
qt.permalink
|
|
.filterNot { p =>
|
|
text.contains(p.shortUrl)
|
|
} // omit StoredQuotedTweet when url already in text
|
|
.map { p =>
|
|
StoredQuotedTweet(
|
|
qt.tweetId,
|
|
qt.userId,
|
|
p.shortUrl
|
|
)
|
|
}
|
|
|
|
def toStoredGeo(tweet: Tweet): Option[StoredGeo] =
|
|
TweetLenses.geoCoordinates.get(tweet) match {
|
|
case None =>
|
|
TweetLenses.placeId.get(tweet) match {
|
|
case None => None
|
|
case Some(placeId) =>
|
|
Some(
|
|
StoredGeo(
|
|
latitude = 0.0,
|
|
longitude = 0.0,
|
|
geoPrecision = 0,
|
|
entityId = 0,
|
|
name = Some(placeId)
|
|
)
|
|
)
|
|
}
|
|
case Some(coords) =>
|
|
Some(
|
|
StoredGeo(
|
|
latitude = coords.latitude,
|
|
longitude = coords.longitude,
|
|
geoPrecision = coords.geoPrecision,
|
|
entityId = if (coords.display) 2 else 0,
|
|
name = TweetLenses.placeId.get(tweet)
|
|
)
|
|
)
|
|
}
|
|
|
|
def toStoredMedia(mediaList: Seq[MediaEntity]): Seq[StoredMediaEntity] =
|
|
mediaList.filter(_.sourceStatusId.isEmpty).flatMap(toStoredMediaEntity)
|
|
|
|
def toStoredMediaEntity(media: MediaEntity): Option[StoredMediaEntity] =
|
|
media.sizes.find(_.sizeType == MediaSizeType.Orig).map { origSize =>
|
|
StoredMediaEntity(
|
|
id = media.mediaId,
|
|
mediaType = origSize.deprecatedContentType.value.toByte,
|
|
width = origSize.width.toShort,
|
|
height = origSize.height.toShort
|
|
)
|
|
}
|
|
|
|
// The language and ids fields are for compatibility with existing tweets stored in manhattan.
|
|
def toStoredNarrowcast(narrowcast: Narrowcast): StoredNarrowcast =
|
|
StoredNarrowcast(
|
|
language = Some(Seq.empty),
|
|
location = Some(narrowcast.location),
|
|
ids = Some(Seq.empty)
|
|
)
|
|
|
|
def toStoredAdditionalFields(from: Seq[TFieldBlob], to: StoredTweet): StoredTweet =
|
|
from.foldLeft(to) { case (t, f) => t.setField(f) }
|
|
|
|
def toStoredAdditionalFields(from: Tweet, to: StoredTweet): StoredTweet =
|
|
toStoredAdditionalFields(AdditionalFields.additionalFields(from), to)
|
|
|
|
def toStoredTweet(tweet: Tweet): StoredTweet = {
|
|
val storedTweet =
|
|
StoredTweet(
|
|
id = tweet.id,
|
|
userId = Some(TweetLenses.userId(tweet)),
|
|
text = Some(TweetLenses.text(tweet)),
|
|
createdVia = Some(TweetLenses.createdVia(tweet)),
|
|
createdAtSec = Some(TweetLenses.createdAt(tweet)),
|
|
reply =
|
|
TweetLenses.reply(tweet).map { r => toStoredReply(r, TweetLenses.conversationId(tweet)) },
|
|
share = TweetLenses.share(tweet).map(toStoredShare),
|
|
contributorId = tweet.contributor.map(_.userId),
|
|
geo = toStoredGeo(tweet),
|
|
hasTakedown = Some(TweetLenses.hasTakedown(tweet)),
|
|
nsfwUser = Some(TweetLenses.nsfwUser(tweet)),
|
|
nsfwAdmin = Some(TweetLenses.nsfwAdmin(tweet)),
|
|
media = tweet.media.map(toStoredMedia),
|
|
narrowcast = TweetLenses.narrowcast(tweet).map(toStoredNarrowcast),
|
|
nullcast = Some(TweetLenses.nullcast(tweet)),
|
|
trackingId = TweetLenses.trackingId(tweet),
|
|
quotedTweet = TweetLenses.quotedTweet(tweet).flatMap { qt =>
|
|
toStoredQuotedTweet(qt, TweetLenses.text(tweet))
|
|
}
|
|
)
|
|
toStoredAdditionalFields(tweet, storedTweet)
|
|
}
|
|
|
|
/**
|
|
* Does not need core data to be set. Constructs on disk tweet by avoiding the TweetLenses object
|
|
* and only extracting the specified fields.
|
|
*
|
|
* NOTE: Assumes that specified fields are set in the tweet.
|
|
*
|
|
* @param tpTweet Tweetypie Tweet to be converted
|
|
* @param fields the fields to be populated in the on disk Tweet
|
|
*
|
|
* @return an on disk Tweet which has only the specified fields set
|
|
*/
|
|
def toStoredTweetForFields(tpTweet: Tweet, fields: Set[Field]): StoredTweet = {
|
|
|
|
// Make sure all the passed in fields are known or additional fields
|
|
require(
|
|
(fields -- Field.AllUpdatableCompiledFields)
|
|
.forall(field => AdditionalFields.isAdditionalFieldId(field.id))
|
|
)
|
|
|
|
val storedTweet =
|
|
StoredTweet(
|
|
id = tpTweet.id,
|
|
geo = if (fields.contains(Field.Geo)) {
|
|
tpTweet.coreData.get.coordinates match {
|
|
case None =>
|
|
tpTweet.coreData.get.placeId match {
|
|
case None => None
|
|
case Some(placeId) =>
|
|
Some(
|
|
StoredGeo(
|
|
latitude = 0.0,
|
|
longitude = 0.0,
|
|
geoPrecision = 0,
|
|
entityId = 0,
|
|
name = Some(placeId)
|
|
)
|
|
)
|
|
}
|
|
case Some(coords) =>
|
|
Some(
|
|
StoredGeo(
|
|
latitude = coords.latitude,
|
|
longitude = coords.longitude,
|
|
geoPrecision = coords.geoPrecision,
|
|
entityId = if (coords.display) 2 else 0,
|
|
name = tpTweet.coreData.get.placeId
|
|
)
|
|
)
|
|
}
|
|
} else {
|
|
None
|
|
},
|
|
hasTakedown =
|
|
if (fields.contains(Field.HasTakedown))
|
|
Some(tpTweet.coreData.get.hasTakedown)
|
|
else
|
|
None,
|
|
nsfwUser =
|
|
if (fields.contains(Field.NsfwUser))
|
|
Some(tpTweet.coreData.get.nsfwUser)
|
|
else
|
|
None,
|
|
nsfwAdmin =
|
|
if (fields.contains(Field.NsfwAdmin))
|
|
Some(tpTweet.coreData.get.nsfwAdmin)
|
|
else
|
|
None
|
|
)
|
|
|
|
if (fields.map(_.id).exists(AdditionalFields.isAdditionalFieldId))
|
|
toStoredAdditionalFields(tpTweet, storedTweet)
|
|
else
|
|
storedTweet
|
|
}
|
|
|
|
def fromStoredReply(reply: StoredReply): Reply =
|
|
Reply(
|
|
Some(reply.inReplyToStatusId).filter(_ > 0),
|
|
reply.inReplyToUserId
|
|
)
|
|
|
|
def fromStoredShare(share: StoredShare): Share =
|
|
Share(
|
|
share.sourceStatusId,
|
|
share.sourceUserId,
|
|
share.parentStatusId
|
|
)
|
|
|
|
def fromStoredQuotedTweet(qt: StoredQuotedTweet): QuotedTweet =
|
|
QuotedTweet(
|
|
qt.tweetId,
|
|
qt.userId,
|
|
Some(
|
|
ShortenedUrl(
|
|
shortUrl = qt.shortUrl,
|
|
longUrl = "", // will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator
|
|
displayText = "" //will be hydrated later via tweetypie's QuotedTweetRefUrlsHydrator
|
|
)
|
|
)
|
|
)
|
|
|
|
def fromStoredGeo(geo: StoredGeo): GeoCoordinates =
|
|
GeoCoordinates(
|
|
latitude = geo.latitude,
|
|
longitude = geo.longitude,
|
|
geoPrecision = geo.geoPrecision,
|
|
display = geo.entityId == 2
|
|
)
|
|
|
|
def fromStoredMediaEntity(media: StoredMediaEntity): MediaEntity =
|
|
MediaEntity(
|
|
fromIndex = -1, // will get filled in later
|
|
toIndex = -1, // will get filled in later
|
|
url = null, // will get filled in later
|
|
mediaPath = "", // field is obsolete
|
|
mediaUrl = null, // will get filled in later
|
|
mediaUrlHttps = null, // will get filled in later
|
|
displayUrl = null, // will get filled in later
|
|
expandedUrl = null, // will get filled in later
|
|
mediaId = media.id,
|
|
nsfw = false,
|
|
sizes = Set(
|
|
MediaSize(
|
|
sizeType = MediaSizeType.Orig,
|
|
resizeMethod = MediaResizeMethod.Fit,
|
|
deprecatedContentType = MediaContentType(media.mediaType),
|
|
width = media.width,
|
|
height = media.height
|
|
)
|
|
)
|
|
)
|
|
|
|
def fromStoredNarrowcast(narrowcast: StoredNarrowcast): Narrowcast =
|
|
Narrowcast(
|
|
location = narrowcast.location.getOrElse(Seq())
|
|
)
|
|
|
|
def fromStoredTweet(storedTweet: StoredTweet): Tweet = {
|
|
val coreData =
|
|
TweetCoreData(
|
|
userId = storedTweet.userId.get,
|
|
text = storedTweet.text.get,
|
|
createdVia = storedTweet.createdVia.get,
|
|
createdAtSecs = storedTweet.createdAtSec.get,
|
|
reply = storedTweet.reply.map(fromStoredReply),
|
|
share = storedTweet.share.map(fromStoredShare),
|
|
hasTakedown = storedTweet.hasTakedown.getOrElse(false),
|
|
nsfwUser = storedTweet.nsfwUser.getOrElse(false),
|
|
nsfwAdmin = storedTweet.nsfwAdmin.getOrElse(false),
|
|
narrowcast = storedTweet.narrowcast.map(fromStoredNarrowcast),
|
|
nullcast = storedTweet.nullcast.getOrElse(false),
|
|
trackingId = storedTweet.trackingId,
|
|
conversationId = storedTweet.reply.flatMap(_.conversationId),
|
|
placeId = storedTweet.geo.flatMap(_.name),
|
|
coordinates = storedTweet.geo.map(fromStoredGeo),
|
|
hasMedia = if (storedTweet.media.exists(_.nonEmpty)) Some(true) else None
|
|
)
|
|
|
|
// retweets should never have their media, but some tweets incorrectly do.
|
|
val storedMedia = if (coreData.share.isDefined) Nil else storedTweet.media.toSeq
|
|
|
|
val tpTweet =
|
|
Tweet(
|
|
id = storedTweet.id,
|
|
coreData = Some(coreData),
|
|
contributor = storedTweet.contributorId.map(Contributor(_)),
|
|
media = Some(storedMedia.flatten.map(fromStoredMediaEntity)),
|
|
mentions = Some(Seq.empty),
|
|
urls = Some(Seq.empty),
|
|
cashtags = Some(Seq.empty),
|
|
hashtags = Some(Seq.empty),
|
|
quotedTweet = storedTweet.quotedTweet.map(fromStoredQuotedTweet)
|
|
)
|
|
fromStoredAdditionalFields(storedTweet, tpTweet)
|
|
}
|
|
|
|
def fromStoredTweetAllowInvalid(storedTweet: StoredTweet): Tweet = {
|
|
fromStoredTweet(
|
|
storedTweet.copy(
|
|
userId = storedTweet.userId.orElse(Some(-1L)),
|
|
text = storedTweet.text.orElse(Some("")),
|
|
createdVia = storedTweet.createdVia.orElse(Some("")),
|
|
createdAtSec = storedTweet.createdAtSec.orElse(Some(-1L))
|
|
))
|
|
}
|
|
|
|
def fromStoredAdditionalFields(from: StoredTweet, to: Tweet): Tweet = {
|
|
val passThroughAdditionalFields =
|
|
from._passthroughFields.filterKeys(AdditionalFields.isAdditionalFieldId)
|
|
val allAdditionalFields =
|
|
from.getFieldBlobs(tbTweetCompiledAdditionalFieldIds) ++ passThroughAdditionalFields
|
|
allAdditionalFields.values.foldLeft(to) { case (t, f) => t.setField(f) }
|
|
}
|
|
|
|
def toDeletedTweet(storedTweet: StoredTweet): DeletedTweet = {
|
|
val noteTweetBlob = storedTweet.getFieldBlob(Tweet.NoteTweetField.id)
|
|
val noteTweetOption = noteTweetBlob.map(blob => NoteTweet.decode(blob.read))
|
|
DeletedTweet(
|
|
id = storedTweet.id,
|
|
userId = storedTweet.userId,
|
|
text = storedTweet.text,
|
|
createdAtSecs = storedTweet.createdAtSec,
|
|
share = storedTweet.share.map(toDeletedShare),
|
|
media = storedTweet.media.map(_.map(toDeletedMediaEntity)),
|
|
noteTweetId = noteTweetOption.map(_.id),
|
|
isExpandable = noteTweetOption.flatMap(_.isExpandable)
|
|
)
|
|
}
|
|
|
|
def toDeletedShare(storedShare: StoredShare): DeletedTweetShare =
|
|
DeletedTweetShare(
|
|
sourceStatusId = storedShare.sourceStatusId,
|
|
sourceUserId = storedShare.sourceUserId,
|
|
parentStatusId = storedShare.parentStatusId
|
|
)
|
|
|
|
def toDeletedMediaEntity(storedMediaEntity: StoredMediaEntity): DeletedTweetMediaEntity =
|
|
DeletedTweetMediaEntity(
|
|
id = storedMediaEntity.id,
|
|
mediaType = storedMediaEntity.mediaType,
|
|
width = storedMediaEntity.width,
|
|
height = storedMediaEntity.height
|
|
)
|
|
}
|