twitter-team 01dbfee4c0 Open-sourcing Tweetypie
Tweetypie is the core Tweet service that handles the reading and writing of Tweet data.
2023-05-19 16:20:06 -05:00

56 lines
1.5 KiB

package com.twitter.tweetypie.tweettext
* Code used to convert raw user-provided text into an allowable form.
object PartialHtmlEncoding {
* Replaces all `<`, `>`, and '&' chars with "&lt;", "&gt;", and "&amp;", respectively.
* Tweet text is HTML-encoded at tweet creation time, and is stored and processed in encoded form.
def encode(text: String): String = {
val buf = new StringBuilder
text.foreach {
case '<' => buf.append("&lt;")
case '>' => buf.append("&gt;")
case '&' => buf.append("&amp;")
case c => buf.append(c)
private val AmpLtRegex = "&lt;".r
private val AmpGtRegex = "&gt;".r
private val AmpAmpRegex = "&amp;".r
private val partialHtmlDecoder: (String => String) =
((s: String) => AmpLtRegex.replaceAllIn(s, "<"))
.andThen(s => AmpGtRegex.replaceAllIn(s, ">"))
.andThen(s => AmpAmpRegex.replaceAllIn(s, "&"))
* The opposite of encode, it replaces all "&lt;", "&gt;", and "&amp;" with
* `<`, `>`, and '&', respectively.
def decode(text: String): String =
decodeWithModification(text) match {
case Some(mod) => mod.updated
case None => text
* Decodes encoded entities, and returns a `TextModification` if the text was modified.
def decodeWithModification(text: String): Option[TextModification] =
AmpLtRegex -> "<",
AmpGtRegex -> ">",
AmpAmpRegex -> "&"