mirror of
https://github.com/Sude-/lgogdownloader.git
synced 2024-11-20 11:49:17 +01:00
Remove htmlcxx dependency and use libtidy with tinyxml2 to parse html
This commit is contained in:
parent
1866f4c65c
commit
1c0ab298e4
2
.github/workflows/linux.yml
vendored
2
.github/workflows/linux.yml
vendored
@ -32,7 +32,7 @@ jobs:
|
||||
run: |
|
||||
sudo apt -y update
|
||||
sudo apt -y install ninja-build build-essential libcurl4-openssl-dev libboost-regex-dev \
|
||||
libjsoncpp-dev librhash-dev libtinyxml2-dev libhtmlcxx-dev \
|
||||
libjsoncpp-dev librhash-dev libtinyxml2-dev libtidy-dev \
|
||||
libboost-system-dev libboost-filesystem-dev libboost-program-options-dev \
|
||||
libboost-date-time-dev libboost-iostreams-dev help2man cmake \
|
||||
pkg-config zlib1g-dev qtwebengine5-dev
|
||||
|
@ -25,11 +25,11 @@ find_package(Boost
|
||||
)
|
||||
find_package(CURL 7.55.0 REQUIRED)
|
||||
find_package(Jsoncpp REQUIRED)
|
||||
find_package(Htmlcxx REQUIRED)
|
||||
find_package(Tinyxml2 REQUIRED)
|
||||
find_package(Rhash REQUIRED)
|
||||
find_package(Threads REQUIRED)
|
||||
find_package(ZLIB REQUIRED)
|
||||
find_package(Tidy REQUIRED)
|
||||
|
||||
file(GLOB SRC_FILES
|
||||
main.cpp
|
||||
@ -111,10 +111,10 @@ target_include_directories(${PROJECT_NAME}
|
||||
PRIVATE ${CURL_INCLUDE_DIRS}
|
||||
PRIVATE ${OAuth_INCLUDE_DIRS}
|
||||
PRIVATE ${Jsoncpp_INCLUDE_DIRS}
|
||||
PRIVATE ${Htmlcxx_INCLUDE_DIRS}
|
||||
PRIVATE ${Tinyxml2_INCLUDE_DIRS}
|
||||
PRIVATE ${Rhash_INCLUDE_DIRS}
|
||||
PRIVATE ${ZLIB_INCLUDE_DIRS}
|
||||
PRIVATE ${Tidy_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
target_link_libraries(${PROJECT_NAME}
|
||||
@ -122,11 +122,11 @@ target_link_libraries(${PROJECT_NAME}
|
||||
PRIVATE ${CURL_LIBRARIES}
|
||||
PRIVATE ${OAuth_LIBRARIES}
|
||||
PRIVATE ${Jsoncpp_LIBRARIES}
|
||||
PRIVATE ${Htmlcxx_LIBRARIES}
|
||||
PRIVATE ${Tinyxml2_LIBRARIES}
|
||||
PRIVATE ${Rhash_LIBRARIES}
|
||||
PRIVATE ${CMAKE_THREAD_LIBS_INIT}
|
||||
PRIVATE ${ZLIB_LIBRARIES}
|
||||
PRIVATE ${Tidy_LIBRARIES}
|
||||
)
|
||||
|
||||
# Check if libatomic is needed in order to use std::atomic, and add
|
||||
|
@ -8,7 +8,7 @@ It uses the same API as GOG Galaxy which doesn't have Linux support at the momen
|
||||
* [libcurl](https://curl.haxx.se/libcurl/) >= 7.55.0
|
||||
* [librhash](https://github.com/rhash/RHash)
|
||||
* [jsoncpp](https://github.com/open-source-parsers/jsoncpp)
|
||||
* [htmlcxx](http://htmlcxx.sourceforge.net/)
|
||||
* [libtidy](https://www.html-tidy.org/)
|
||||
* [tinyxml2](https://github.com/leethomason/tinyxml2)
|
||||
* [boost](http://www.boost.org/) (regex, date-time, system, filesystem, program-options, iostreams)
|
||||
* [zlib](https://www.zlib.net/)
|
||||
@ -22,7 +22,7 @@ It uses the same API as GOG Galaxy which doesn't have Linux support at the momen
|
||||
## Debian/Ubuntu
|
||||
|
||||
# apt install build-essential libcurl4-openssl-dev libboost-regex-dev \
|
||||
libjsoncpp-dev librhash-dev libtinyxml2-dev libhtmlcxx-dev \
|
||||
libjsoncpp-dev librhash-dev libtinyxml2-dev libtidy-dev \
|
||||
libboost-system-dev libboost-filesystem-dev libboost-program-options-dev \
|
||||
libboost-date-time-dev libboost-iostreams-dev help2man cmake \
|
||||
pkg-config zlib1g-dev qtwebengine5-dev ninja-build
|
||||
@ -35,7 +35,7 @@ It uses the same API as GOG Galaxy which doesn't have Linux support at the momen
|
||||
## Fedora
|
||||
```
|
||||
sudo dnf install cmake make gcc gcc-c++ glibc tinyxml2-devel rhash-devel \
|
||||
htmlcxx-devel tinyxml-devel jsoncpp-devel liboauth-devel libcurl-devel \
|
||||
libtidy-devel tinyxml-devel jsoncpp-devel liboauth-devel libcurl-devel \
|
||||
boost-devel help2man
|
||||
```
|
||||
### Build and Install
|
||||
|
@ -1,54 +0,0 @@
|
||||
# - Try to find htmlcxx
|
||||
#
|
||||
# Once done this will define
|
||||
# Htmlcxx_FOUND - System has htmlcxx
|
||||
# Htmlcxx_INCLUDE_DIRS - The htmlcxx include directories
|
||||
# Htmlcxx_LIBRARIES - The libraries needed to use htmlcxx
|
||||
|
||||
find_package(PkgConfig)
|
||||
pkg_check_modules(PC_HTMLCXX REQUIRED htmlcxx)
|
||||
|
||||
find_path(HTMLCXX_INCLUDE_DIR
|
||||
NAMES
|
||||
css/parser.h
|
||||
html/tree.h
|
||||
HINTS
|
||||
${PC_HTMLCXX_INCLUDEDIR}
|
||||
${PC_HTMLCXX_INCLUDE_DIRS}
|
||||
PATH_SUFFIXES
|
||||
htmlcxx
|
||||
PATHS
|
||||
${PC_HTMLCXX_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
find_library(HTMLCXX_LIBRARY_HTMLCXX htmlcxx
|
||||
HINTS
|
||||
${PC_HTMLCXX_LIBDIR}
|
||||
${PC_HTMLCXX_LIBRARY_DIRS}
|
||||
PATHS
|
||||
${PC_HTMLCXX_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
find_library(HTMLCXX_LIBRARY_CSS_PARSER css_parser
|
||||
HINTS
|
||||
${PC_HTMLCXX_LIBDIR}
|
||||
${PC_HTMLCXX_LIBRARY_DIRS}
|
||||
PATHS
|
||||
${PC_HTMLCXX_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
find_library(HTMLCXX_LIBRARY_CSS_PARSER_PP css_parser_pp
|
||||
HINTS
|
||||
${PC_HTMLCXX_LIBDIR}
|
||||
${PC_HTMLCXX_LIBRARY_DIRS}
|
||||
PATHS
|
||||
${PC_HTMLCXX_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
mark_as_advanced(HTMLCXX_INCLUDE_DIR HTMLCXX_LIBRARY_HTMLCXX HTMLCXX_LIBRARY_CSS_PARSER HTMLCXX_LIBRARY_CSS_PARSER_PP)
|
||||
|
||||
if(PC_HTMLCXX_FOUND)
|
||||
set(Htmlcxx_FOUND ON)
|
||||
set(Htmlcxx_INCLUDE_DIRS ${HTMLCXX_INCLUDE_DIR})
|
||||
set(Htmlcxx_LIBRARIES ${HTMLCXX_LIBRARY_HTMLCXX} ${HTMLCXX_LIBRARY_CSS_PARSER} ${HTMLCXX_LIBRARY_CSS_PARSER_PP})
|
||||
endif(PC_HTMLCXX_FOUND)
|
33
cmake/FindTidy.cmake
Normal file
33
cmake/FindTidy.cmake
Normal file
@ -0,0 +1,33 @@
|
||||
# - Try to find tidy
|
||||
#
|
||||
# Once done this will define
|
||||
# Tidy_FOUND - System has tidy
|
||||
# Tidy_INCLUDE_DIRS - The tidy include directories
|
||||
# Tidy_LIBRARIES - The libraries needed to use tidy
|
||||
|
||||
find_package(PkgConfig)
|
||||
pkg_check_modules(PC_TIDY tidy)
|
||||
|
||||
find_path(TIDY_INCLUDE_DIR tidy.h
|
||||
HINTS
|
||||
${PC_TIDY_INCLUDEDIR}
|
||||
${PC_TIDY_INCLUDE_DIRS}
|
||||
PATHS
|
||||
${PC_TIDY_INCLUDE_DIRS}
|
||||
)
|
||||
|
||||
find_library(TIDY_LIBRARY tidy
|
||||
HINTS
|
||||
${PC_TIDY_LIBDIR}
|
||||
${PC_TIDY_LIBRARY_DIRS}
|
||||
PATHS
|
||||
${PC_TIDY_LIBRARY_DIRS}
|
||||
)
|
||||
|
||||
mark_as_advanced(TIDY_INCLUDE_DIR TIDY_LIBRARY)
|
||||
|
||||
if(TIDY_INCLUDE_DIR)
|
||||
set(Tidy_FOUND ON)
|
||||
set(Tidy_INCLUDE_DIRS ${TIDY_INCLUDE_DIR})
|
||||
set(Tidy_LIBRARIES ${TIDY_LIBRARY})
|
||||
endif(TIDY_INCLUDE_DIR)
|
@ -24,6 +24,7 @@
|
||||
#include <json/json.h>
|
||||
#include <boost/date_time/posix_time/posix_time.hpp>
|
||||
#include <curl/curl.h>
|
||||
#include <tinyxml2.h>
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -104,6 +105,8 @@ namespace Util
|
||||
}
|
||||
Json::Value readJsonFile(const std::string& path);
|
||||
std::string transformGamename(const std::string& gamename);
|
||||
std::string htmlToXhtml(const std::string& html);
|
||||
tinyxml2::XMLNode* nextXMLNode(tinyxml2::XMLNode* node);
|
||||
}
|
||||
|
||||
#endif // UTIL_H
|
||||
|
@ -32,7 +32,6 @@ class Website
|
||||
private:
|
||||
CURL* curlhandle;
|
||||
bool IsloggedInSimple();
|
||||
bool IsLoggedInComplex(const std::string& email);
|
||||
std::map<std::string, std::string> getTagsFromJson(const Json::Value& json);
|
||||
int retries;
|
||||
std::string LoginGetAuthCode(const std::string& email, const std::string& password);
|
||||
|
@ -24,8 +24,6 @@
|
||||
#include <boost/date_time/posix_time/posix_time.hpp>
|
||||
#include <tinyxml2.h>
|
||||
#include <json/json.h>
|
||||
#include <htmlcxx/html/ParserDom.h>
|
||||
#include <htmlcxx/html/Uri.h>
|
||||
#include <termios.h>
|
||||
#include <algorithm>
|
||||
#include <thread>
|
||||
@ -1630,30 +1628,18 @@ std::string Downloader::getSerialsFromJSON(const Json::Value& json)
|
||||
}
|
||||
else
|
||||
{
|
||||
htmlcxx::HTML::ParserDom parser;
|
||||
tree<htmlcxx::HTML::Node> dom = parser.parseTree(cdkey);
|
||||
tree<htmlcxx::HTML::Node>::iterator it = dom.begin();
|
||||
tree<htmlcxx::HTML::Node>::iterator end = dom.end();
|
||||
for (; it != end; ++it)
|
||||
std::string xhtml = Util::htmlToXhtml(cdkey);
|
||||
tinyxml2::XMLDocument doc;
|
||||
doc.Parse(xhtml.c_str());
|
||||
tinyxml2::XMLNode* node = doc.FirstChildElement("html");
|
||||
while(node)
|
||||
{
|
||||
std::string tag_text;
|
||||
if (it->tagName() == "span")
|
||||
{
|
||||
for (unsigned int j = 0; j < dom.number_of_children(it); ++j)
|
||||
{
|
||||
tree<htmlcxx::HTML::Node>::iterator span_it = dom.child(it, j);
|
||||
if (!span_it->isTag() && !span_it->isComment())
|
||||
tag_text = span_it->text();
|
||||
}
|
||||
}
|
||||
|
||||
if (!tag_text.empty())
|
||||
{
|
||||
boost::regex expression("^\\h+|\\h+$");
|
||||
std::string text = boost::regex_replace(tag_text, expression, "");
|
||||
if (!text.empty())
|
||||
tinyxml2::XMLElement *element = node->ToElement();
|
||||
const char* text = element->GetText();
|
||||
if (text)
|
||||
serials << text << std::endl;
|
||||
}
|
||||
|
||||
node = Util::nextXMLNode(node);
|
||||
}
|
||||
}
|
||||
|
||||
|
49
src/util.cpp
49
src/util.cpp
@ -11,10 +11,11 @@
|
||||
#include <boost/iostreams/filter/gzip.hpp>
|
||||
#include <boost/iostreams/filtering_streambuf.hpp>
|
||||
#include <boost/iostreams/copy.hpp>
|
||||
#include <tinyxml2.h>
|
||||
#include <json/json.h>
|
||||
#include <fstream>
|
||||
#include <sys/ioctl.h>
|
||||
#include <tidy.h>
|
||||
#include <tidybuffio.h>
|
||||
|
||||
/*
|
||||
Create filepath from specified directory and path
|
||||
@ -1007,3 +1008,49 @@ std::string Util::transformGamename(const std::string& gamename)
|
||||
|
||||
return gamename_transformed;
|
||||
}
|
||||
|
||||
std::string Util::htmlToXhtml(const std::string& html)
|
||||
{
|
||||
std::string xhtml;
|
||||
TidyBuffer buffer = {0, 0, 0, 0, 0};
|
||||
int rc = -1;
|
||||
TidyDoc doc = tidyCreate();
|
||||
|
||||
tidyOptSetBool(doc, TidyXhtmlOut, yes);
|
||||
tidyOptSetBool(doc, TidyForceOutput, yes);
|
||||
tidyOptSetInt(doc, TidyWrapLen, 0);
|
||||
tidyOptSetInt(doc, TidyShowInfo, 0);
|
||||
tidyOptSetInt(doc, TidyShowWarnings, 0);
|
||||
rc = tidyParseString(doc, html.c_str());
|
||||
if ( rc >= 0 )
|
||||
rc = tidyCleanAndRepair(doc);
|
||||
if ( rc >= 0 )
|
||||
rc = tidySaveBuffer(doc, &buffer);
|
||||
|
||||
xhtml = std::string((char*)buffer.bp, buffer.size);
|
||||
|
||||
tidyBufFree(&buffer);
|
||||
tidyRelease(doc);
|
||||
|
||||
return xhtml;
|
||||
}
|
||||
|
||||
tinyxml2::XMLNode* Util::nextXMLNode(tinyxml2::XMLNode* node)
|
||||
{
|
||||
if (node->FirstChildElement()) // Has child element, go to first child
|
||||
node = node->FirstChildElement();
|
||||
else if (node->NextSiblingElement()) // Has sibling element, go to first sibling
|
||||
node = node->NextSiblingElement();
|
||||
else
|
||||
{
|
||||
// Go to parent node until it has sibling
|
||||
while(node->Parent() && !node->Parent()->NextSiblingElement())
|
||||
node = node->Parent();
|
||||
if(node->Parent() && node->Parent()->NextSiblingElement())
|
||||
node = node->Parent()->NextSiblingElement();
|
||||
else // Reached the end
|
||||
node = nullptr;
|
||||
}
|
||||
|
||||
return node;
|
||||
}
|
||||
|
@ -8,8 +8,8 @@
|
||||
#include "globalconstants.h"
|
||||
#include "message.h"
|
||||
|
||||
#include <htmlcxx/html/ParserDom.h>
|
||||
#include <boost/algorithm/string/case_conv.hpp>
|
||||
#include <tinyxml2.h>
|
||||
|
||||
#ifdef USE_QT_GUI_LOGIN
|
||||
#include "gui_login.h"
|
||||
@ -358,23 +358,27 @@ std::string Website::LoginGetAuthCodeCurl(const std::string& login_form_html, co
|
||||
std::string tagname_username = "login[username]";
|
||||
std::string tagname_password = "login[password]";
|
||||
std::string tagname_login = "login[login]";
|
||||
std::string tagname_token;
|
||||
std::string tagname_token = "login[_token]";
|
||||
|
||||
htmlcxx::HTML::ParserDom parser;
|
||||
tree<htmlcxx::HTML::Node> login_dom = parser.parseTree(login_form_html);
|
||||
tree<htmlcxx::HTML::Node>::iterator login_it = login_dom.begin();
|
||||
tree<htmlcxx::HTML::Node>::iterator login_it_end = login_dom.end();
|
||||
for (; login_it != login_it_end; ++login_it)
|
||||
std::string login_form_xhtml = Util::htmlToXhtml(login_form_html);
|
||||
|
||||
tinyxml2::XMLDocument doc;
|
||||
doc.Parse(login_form_xhtml.c_str());
|
||||
tinyxml2::XMLNode* node = doc.FirstChildElement("html");
|
||||
while(node)
|
||||
{
|
||||
if (login_it->tagName()=="input")
|
||||
tinyxml2::XMLElement *element = node->ToElement();
|
||||
if (element->Name() && !std::string(element->Name()).compare("input"))
|
||||
{
|
||||
login_it->parseAttributes();
|
||||
if (login_it->attribute("id").second == "login__token")
|
||||
std::string name = element->Attribute("name");
|
||||
if (name == tagname_token)
|
||||
{
|
||||
token = login_it->attribute("value").second; // login token
|
||||
tagname_token = login_it->attribute("name").second;
|
||||
token = element->Attribute("value");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
node = Util::nextXMLNode(node);
|
||||
}
|
||||
|
||||
if (token.empty())
|
||||
@ -419,25 +423,28 @@ std::string Website::LoginGetAuthCodeCurl(const std::string& login_form_html, co
|
||||
std::string tagname_two_step_auth_letter_2 = "second_step_authentication[token][letter_2]";
|
||||
std::string tagname_two_step_auth_letter_3 = "second_step_authentication[token][letter_3]";
|
||||
std::string tagname_two_step_auth_letter_4 = "second_step_authentication[token][letter_4]";
|
||||
std::string tagname_two_step_token;
|
||||
std::string tagname_two_step_token = "second_step_authentication[_token]";
|
||||
std::string token_two_step;
|
||||
std::string two_step_html = this->getResponse(redirect_url);
|
||||
redirect_url = "";
|
||||
|
||||
tree<htmlcxx::HTML::Node> two_step_dom = parser.parseTree(two_step_html);
|
||||
tree<htmlcxx::HTML::Node>::iterator two_step_it = two_step_dom.begin();
|
||||
tree<htmlcxx::HTML::Node>::iterator two_step_it_end = two_step_dom.end();
|
||||
for (; two_step_it != two_step_it_end; ++two_step_it)
|
||||
std::string two_step_xhtml = Util::htmlToXhtml(two_step_html);
|
||||
doc.Parse(two_step_xhtml.c_str());
|
||||
node = doc.FirstChildElement("html");
|
||||
while(node)
|
||||
{
|
||||
if (two_step_it->tagName()=="input")
|
||||
tinyxml2::XMLElement *element = node->ToElement();
|
||||
if (element->Name() && !std::string(element->Name()).compare("input"))
|
||||
{
|
||||
two_step_it->parseAttributes();
|
||||
if (two_step_it->attribute("id").second == "second_step_authentication__token")
|
||||
std::string name = element->Attribute("name");
|
||||
if (name == tagname_two_step_token)
|
||||
{
|
||||
token_two_step = two_step_it->attribute("value").second; // two step token
|
||||
tagname_two_step_token = two_step_it->attribute("name").second;
|
||||
token_two_step = element->Attribute("value");
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
node = Util::nextXMLNode(node);
|
||||
}
|
||||
|
||||
std::cerr << "Security code: ";
|
||||
@ -568,52 +575,6 @@ bool Website::IsLoggedIn()
|
||||
return this->IsloggedInSimple();
|
||||
}
|
||||
|
||||
/* Complex login check. Check login by checking email address on the account settings page.
|
||||
returns true if we are logged in
|
||||
returns false if we are not logged in
|
||||
*/
|
||||
bool Website::IsLoggedInComplex(const std::string& email)
|
||||
{
|
||||
bool bIsLoggedIn = false;
|
||||
std::string html = this->getResponse("https://www.gog.com/account/settings/security");
|
||||
std::string email_lowercase = boost::algorithm::to_lower_copy(email); // boost::algorithm::to_lower does in-place modification but "email" is read-only so we need to make a copy of it
|
||||
|
||||
htmlcxx::HTML::ParserDom parser;
|
||||
tree<htmlcxx::HTML::Node> dom = parser.parseTree(html);
|
||||
tree<htmlcxx::HTML::Node>::iterator it = dom.begin();
|
||||
tree<htmlcxx::HTML::Node>::iterator end = dom.end();
|
||||
dom = parser.parseTree(html);
|
||||
it = dom.begin();
|
||||
end = dom.end();
|
||||
for (; it != end; ++it)
|
||||
{
|
||||
if (it->tagName()=="strong")
|
||||
{
|
||||
it->parseAttributes();
|
||||
if (it->attribute("class").second == "settings-item__value settings-item__section")
|
||||
{
|
||||
for (unsigned int i = 0; i < dom.number_of_children(it); ++i)
|
||||
{
|
||||
tree<htmlcxx::HTML::Node>::iterator tag_it = dom.child(it, i);
|
||||
if (!tag_it->isTag() && !tag_it->isComment())
|
||||
{
|
||||
std::string tag_text = boost::algorithm::to_lower_copy(tag_it->text());
|
||||
if (tag_text == email_lowercase)
|
||||
{
|
||||
bIsLoggedIn = true; // We are logged in
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
if (bIsLoggedIn) // We are logged in so no need to go through the remaining tags
|
||||
break;
|
||||
}
|
||||
|
||||
return bIsLoggedIn;
|
||||
}
|
||||
|
||||
/* Simple login check. Check login by trying to get account page. If response code isn't 200 then login failed.
|
||||
returns true if we are logged in
|
||||
returns false if we are not logged in
|
||||
|
Loading…
Reference in New Issue
Block a user