aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--c/data.cpp67
-rw-r--r--c/main.cpp2
2 files changed, 39 insertions, 30 deletions
diff --git a/c/data.cpp b/c/data.cpp
index 149d9b6..7c0fcb7 100644
--- a/c/data.cpp
+++ b/c/data.cpp
@@ -20,13 +20,42 @@ struct XmlError : public std::exception
}
};
-using HtmlParserCtxtPtr = Managed<htmlParserCtxtPtr, xmlFreeParserCtxt, XmlError>;
-using HtmlDocPtr = Managed<htmlDocPtr, xmlFreeDoc, XmlError>;
using XmlXPathContextPtr = Managed<xmlXPathContextPtr, xmlXPathFreeContext, XmlError>;
using XmlXPathObjectPtr = Managed<xmlXPathObjectPtr, xmlXPathFreeObject, XmlError>;
static inline void XmlFree(void* p) { xmlFree(p); }
using XmlCharPtr = Managed<xmlChar*, XmlFree, XmlError>;
+struct ParsedDoc
+{
+ using InternetHandle = Managed<HINTERNET, InternetCloseHandle, InternetError>;
+ using HtmlParserCtxtPtr = Managed<htmlParserCtxtPtr, xmlFreeParserCtxt, XmlError>;
+ using HtmlDocPtr = Managed<htmlDocPtr, xmlFreeDoc, XmlError>;
+
+ InternetHandle hi;
+ InternetHandle hiUrl;
+ HtmlParserCtxtPtr ctxt;
+ char buf[1024];
+
+ ParsedDoc(const wchar_t* wszUrl, const char* szUrl)
+ : hi(InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0)),
+ hiUrl(InternetOpenUrl(hi, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0)),
+ ctxt(htmlCreatePushParserCtxt(nullptr, nullptr, buf, sizeof(buf), szUrl, XML_CHAR_ENCODING_UTF8))
+ {
+ htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
+
+ BOOL r;
+ DWORD cbRead;
+ while (r = InternetReadFile(hiUrl, buf, sizeof(buf), &cbRead), cbRead) {
+ if (!r)
+ throw InternetError();
+ if (!htmlParseChunk(ctxt, buf, cbRead, 0))
+ throw XmlError();
+ }
+ htmlParseChunk(ctxt, buf, 0, 1); /* Stop parsing. */
+ }
+
+ operator htmlDocPtr() { return ctxt->myDoc; }
+};
template <size_t N>
bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept
{
@@ -54,37 +83,15 @@ bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept
void FetchData()
{
- LIBXML_TEST_VERSION;
- using InternetHandle = Managed<HINTERNET, InternetCloseHandle, InternetError>;
-
/* The remote data is retrieved using WinINet from the
* Detective Conan World wiki. Using libxml2's "push parser",
- * the HTML is parsed piece by piece as it is retrieved. */
-
- InternetHandle hi = InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0);
- InternetHandle hiUrl = InternetOpenUrl(hi, L"https://www.detectiveconanworld.com/wiki/Anime",
- nullptr, 0, INTERNET_FLAG_NO_UI, 0); //L"file://C:/Users/John/Desktop/dcw.html"
-
- char buf[1024];
- HtmlParserCtxtPtr ctxt = htmlCreatePushParserCtxt(nullptr, nullptr,
- buf, sizeof(buf), "https://www.detectiveconanworld.com/wiki/Anime",
- XML_CHAR_ENCODING_UTF8);
- htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
-
- BOOL r;
- DWORD cbRead;
- while (r = InternetReadFile(hiUrl, buf, sizeof(buf), &cbRead), cbRead) {
- if (!r)
- throw InternetError();
- if (!htmlParseChunk(ctxt, buf, cbRead, 0))
- throw std::runtime_error(xmlGetLastError()->message);
- }
- htmlParseChunk(ctxt, buf, 0, 1); /* Stop parsing. */
+ * the HTML is parsed piece by piece as it is retrieved. The
+ * episode data are contained in table rows matching a (very!)
+ * specific XPath query. This is fragile theoretically, but
+ * unlikely to break practically. */
- /* The episode data are contained in table rows matching a
- * (very!) specific XPath query. This is fragile
- * theoretically, but unlikely to break practically. */
- HtmlDocPtr doc = ctxt->myDoc;
+ ParsedDoc doc(L"https://www.detectiveconanworld.com/wiki/Anime",
+ "https://www.detectiveconanworld.com/wiki/Anime");
XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression(
reinterpret_cast<const xmlChar*>("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"),
diff --git a/c/main.cpp b/c/main.cpp
index 8dc5eab..771b323 100644
--- a/c/main.cpp
+++ b/c/main.cpp
@@ -4,6 +4,7 @@
#include <windows.h>
#include <commctrl.h>
#include <SWI-Prolog.h>
+#include <libxml/xmlversion.h>
#include "debug.h"
#include "resource.h"
@@ -86,6 +87,7 @@ static void UpdateTheme();
int WINAPI WinMain(const HINSTANCE hInstance, const HINSTANCE, char* const, const int nCmdShow)
{
setbuf(stdout, nullptr);
+ LIBXML_TEST_VERSION;
/* Initialize Prolog. */
const char* argv[] = {"EpisodeBrowser", nullptr};