aboutsummaryrefslogtreecommitdiff
path: root/c/data.cpp
diff options
context:
space:
mode:
authorJohn Ankarström <john@ankarstrom.se>2022-08-30 02:19:56 +0200
committerJohn Ankarström <john@ankarstrom.se>2022-08-30 02:20:23 +0200
commit6832f91958e6c2cc44a8c4a4e126ea20b8c8d5a3 (patch)
tree4f08e1f31429e2e716fa618f550342287a091a0e /c/data.cpp
parent16ba8f3ae15363e921fca0e97f5c89cf12987b21 (diff)
downloadEpisodeBrowser-6832f91958e6c2cc44a8c4a4e126ea20b8c8d5a3.tar.gz
Fix Unique Good, Bad. Simplify HTML parser.
Good and Bad should obviously do the exact opposite thing to what they did.
Diffstat (limited to 'c/data.cpp')
-rw-r--r--c/data.cpp76
1 files changed, 36 insertions, 40 deletions
diff --git a/c/data.cpp b/c/data.cpp
index df5627a..7ae6ad1 100644
--- a/c/data.cpp
+++ b/c/data.cpp
@@ -14,50 +14,44 @@
static Unique<HINTERNET, InternetCloseHandle> s_hi =
InternetOpenW(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0);
-/* ParsedDoc downloads and parses an HTML document. */
-struct ParsedDoc
+Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> RemoteParserCtxt(const wchar_t* wszUrl, const char* szUrl)
{
- Unique<HINTERNET, InternetCloseHandle> hiUrl;
- Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx;
- char bufI[1024];
+ if (s_hi.Bad(0))
+ throw Win32Error();
+
+ Unique<HINTERNET, InternetCloseHandle> hiUrl = InternetOpenUrlW(
+ s_hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0);
+ if (hiUrl.Bad(0))
+ throw InternetError();
+
char bufX[1024];
+ Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx = htmlCreatePushParserCtxt(
+ nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8);
+ if (ctx.Bad(0))
+ throw XmlError();
- ParsedDoc(const wchar_t* wszUrl, const char* szUrl)
- {
- if (!s_hi.Bad(0))
- throw Win32Error();
+ htmlCtxtUseOptions(ctx.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
- hiUrl = InternetOpenUrlW(s_hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0);
- if (!hiUrl.Bad(0))
+ BOOL r;
+ DWORD cbRead;
+ char bufI[1024];
+ while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) {
+ if (!r)
throw InternetError();
-
- ctx = htmlCreatePushParserCtxt(nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8);
- if (!ctx.Bad(0))
+ if (!htmlParseChunk(ctx.v, bufI, cbRead, 0))
throw XmlError();
-
- htmlCtxtUseOptions(ctx.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
-
- BOOL r;
- DWORD cbRead;
- while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) {
- if (!r)
- throw InternetError();
- if (!htmlParseChunk(ctx.v, bufI, cbRead, 0))
- throw XmlError();
- }
- htmlParseChunk(ctx.v, bufI, 0, 1); /* Stop parsing. */
}
+ htmlParseChunk(ctx.v, bufI, 0, 1); /* Stop parsing. */
- operator htmlDocPtr() { return ctx.v->myDoc; }
-};
+ return ctx;
+}
static inline void XmlFree(void* p) { xmlFree(p); }
template <size_t N>
-bool WcharsFromXmlchars(wchar_t (&dst)[N], xmlChar* utf8_)
+bool WcharsFromXmlchars(wchar_t (&dst)[N], Unique<xmlChar*, XmlFree> utf8)
{
- Unique<xmlChar*, XmlFree> utf8 = utf8_;
- if (!utf8.Bad(0))
+ if (utf8.Bad(0))
throw XmlError();
/* Truncate if source is larger than destination. */
@@ -167,17 +161,18 @@ void FetchData(unsigned char* sig)
* specific XPath query. This is fragile theoretically, but
* unlikely to break practically. */
- ParsedDoc doc(L"https://www.detectiveconanworld.com/wiki/Anime",
- "https://www.detectiveconanworld.com/wiki/Anime");
+ Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx =
+ RemoteParserCtxt(L"https://www.detectiveconanworld.com/wiki/Anime",
+ "https://www.detectiveconanworld.com/wiki/Anime");
- Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(doc);
- if (!xpathCtx.Bad(0))
+ Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(ctx.v->myDoc);
+ if (xpathCtx.Bad(0))
throw XmlError();
Unique<xmlXPathObjectPtr, xmlXPathFreeObject> xpathObj = xmlXPathEvalExpression(
reinterpret_cast<const xmlChar*>("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"),
xpathCtx.v);
- if (!xpathObj.Bad(0))
+ if (xpathObj.Bad(0))
throw XmlError();
xmlNodeSetPtr nodes = xpathObj.v->nodesetval;
@@ -269,16 +264,17 @@ void FetchScreenwriters(unsigned char* sig)
Wcscpy(Buf(url)+Len(prefix), d.wiki);
/* Retrieve screenwriter from HTML. */
- ParsedDoc doc(url, nullptr);
- Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(doc);
- if (!xpathCtx.Bad(0))
+
+ Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx = RemoteParserCtxt(url, nullptr);
+ Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(ctx.v->myDoc);
+ if (xpathCtx.Bad(0))
throw XmlError();
Unique<xmlXPathObjectPtr, xmlXPathFreeObject> xpathObj =
xmlXPathEvalExpression(reinterpret_cast<const xmlChar*>(
"//th[contains(text(), 'Screenplay:')]/following-sibling::td"),
xpathCtx.v);
- if (!xpathObj.Bad(0))
+ if (xpathObj.Bad(0))
throw XmlError();
xmlNodeSetPtr nodes = xpathObj.v->nodesetval;