aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Ankarström <john@ankarstrom.se>2022-08-30 02:19:56 +0200
committerJohn Ankarström <john@ankarstrom.se>2022-08-30 02:20:23 +0200
commit6832f91958e6c2cc44a8c4a4e126ea20b8c8d5a3 (patch)
tree4f08e1f31429e2e716fa618f550342287a091a0e
parent16ba8f3ae15363e921fca0e97f5c89cf12987b21 (diff)
downloadEpisodeBrowser-6832f91958e6c2cc44a8c4a4e126ea20b8c8d5a3.tar.gz
Fix Unique Good, Bad. Simplify HTML parser.
Good and Bad should obviously do the exact opposite thing to what they did.
-rw-r--r--c/data.cpp76
-rw-r--r--c/data.h8
-rw-r--r--c/ext.cpp2
-rw-r--r--c/util.h4
4 files changed, 43 insertions, 47 deletions
diff --git a/c/data.cpp b/c/data.cpp
index df5627a..7ae6ad1 100644
--- a/c/data.cpp
+++ b/c/data.cpp
@@ -14,50 +14,44 @@
static Unique<HINTERNET, InternetCloseHandle> s_hi =
InternetOpenW(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0);
-/* ParsedDoc downloads and parses an HTML document. */
-struct ParsedDoc
+Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> RemoteParserCtxt(const wchar_t* wszUrl, const char* szUrl)
{
- Unique<HINTERNET, InternetCloseHandle> hiUrl;
- Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx;
- char bufI[1024];
+ if (s_hi.Bad(0))
+ throw Win32Error();
+
+ Unique<HINTERNET, InternetCloseHandle> hiUrl = InternetOpenUrlW(
+ s_hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0);
+ if (hiUrl.Bad(0))
+ throw InternetError();
+
char bufX[1024];
+ Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx = htmlCreatePushParserCtxt(
+ nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8);
+ if (ctx.Bad(0))
+ throw XmlError();
- ParsedDoc(const wchar_t* wszUrl, const char* szUrl)
- {
- if (!s_hi.Bad(0))
- throw Win32Error();
+ htmlCtxtUseOptions(ctx.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
- hiUrl = InternetOpenUrlW(s_hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0);
- if (!hiUrl.Bad(0))
+ BOOL r;
+ DWORD cbRead;
+ char bufI[1024];
+ while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) {
+ if (!r)
throw InternetError();
-
- ctx = htmlCreatePushParserCtxt(nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8);
- if (!ctx.Bad(0))
+ if (!htmlParseChunk(ctx.v, bufI, cbRead, 0))
throw XmlError();
-
- htmlCtxtUseOptions(ctx.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
-
- BOOL r;
- DWORD cbRead;
- while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) {
- if (!r)
- throw InternetError();
- if (!htmlParseChunk(ctx.v, bufI, cbRead, 0))
- throw XmlError();
- }
- htmlParseChunk(ctx.v, bufI, 0, 1); /* Stop parsing. */
}
+ htmlParseChunk(ctx.v, bufI, 0, 1); /* Stop parsing. */
- operator htmlDocPtr() { return ctx.v->myDoc; }
-};
+ return ctx;
+}
static inline void XmlFree(void* p) { xmlFree(p); }
template <size_t N>
-bool WcharsFromXmlchars(wchar_t (&dst)[N], xmlChar* utf8_)
+bool WcharsFromXmlchars(wchar_t (&dst)[N], Unique<xmlChar*, XmlFree> utf8)
{
- Unique<xmlChar*, XmlFree> utf8 = utf8_;
- if (!utf8.Bad(0))
+ if (utf8.Bad(0))
throw XmlError();
/* Truncate if source is larger than destination. */
@@ -167,17 +161,18 @@ void FetchData(unsigned char* sig)
* specific XPath query. This is fragile theoretically, but
* unlikely to break practically. */
- ParsedDoc doc(L"https://www.detectiveconanworld.com/wiki/Anime",
- "https://www.detectiveconanworld.com/wiki/Anime");
+ Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx =
+ RemoteParserCtxt(L"https://www.detectiveconanworld.com/wiki/Anime",
+ "https://www.detectiveconanworld.com/wiki/Anime");
- Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(doc);
- if (!xpathCtx.Bad(0))
+ Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(ctx.v->myDoc);
+ if (xpathCtx.Bad(0))
throw XmlError();
Unique<xmlXPathObjectPtr, xmlXPathFreeObject> xpathObj = xmlXPathEvalExpression(
reinterpret_cast<const xmlChar*>("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"),
xpathCtx.v);
- if (!xpathObj.Bad(0))
+ if (xpathObj.Bad(0))
throw XmlError();
xmlNodeSetPtr nodes = xpathObj.v->nodesetval;
@@ -269,16 +264,17 @@ void FetchScreenwriters(unsigned char* sig)
Wcscpy(Buf(url)+Len(prefix), d.wiki);
/* Retrieve screenwriter from HTML. */
- ParsedDoc doc(url, nullptr);
- Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(doc);
- if (!xpathCtx.Bad(0))
+
+ Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx = RemoteParserCtxt(url, nullptr);
+ Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(ctx.v->myDoc);
+ if (xpathCtx.Bad(0))
throw XmlError();
Unique<xmlXPathObjectPtr, xmlXPathFreeObject> xpathObj =
xmlXPathEvalExpression(reinterpret_cast<const xmlChar*>(
"//th[contains(text(), 'Screenplay:')]/following-sibling::td"),
xpathCtx.v);
- if (!xpathObj.Bad(0))
+ if (xpathObj.Bad(0))
throw XmlError();
xmlNodeSetPtr nodes = xpathObj.v->nodesetval;
diff --git a/c/data.h b/c/data.h
index 171edaa..0f00ecb 100644
--- a/c/data.h
+++ b/c/data.h
@@ -120,11 +120,11 @@ struct FileView
{
hf = CreateFile(filename, GENERIC_READ|GENERIC_WRITE,
0, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr);
- if (!hf.Bad(INVALID_HANDLE_VALUE)) {
+ if (hf.Bad(INVALID_HANDLE_VALUE)) {
if (GetLastError() == ERROR_FILE_NOT_FOUND) {
hf = CreateFile(filename, GENERIC_READ|GENERIC_WRITE,
0, nullptr, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, nullptr);
- if (!hf.Bad(INVALID_HANDLE_VALUE))
+ if (hf.Bad(INVALID_HANDLE_VALUE))
throw Win32Error();
} else
throw Win32Error();
@@ -134,11 +134,11 @@ struct FileView
cbMap.QuadPart = c*sizeof(T);
hm = CreateFileMapping(hf.v, nullptr, PAGE_READWRITE,
cbMap.HighPart, cbMap.LowPart, nullptr);
- if (!hm.Bad(0))
+ if (hm.Bad(0))
throw Win32Error();
view = reinterpret_cast<T*>(MapViewOfFile(hm.v, FILE_MAP_ALL_ACCESS, 0, 0, 0));
- if (!view.Bad(0))
+ if (view.Bad(0))
throw Win32Error();
}
diff --git a/c/ext.cpp b/c/ext.cpp
index 8e58ade..694581d 100644
--- a/c/ext.cpp
+++ b/c/ext.cpp
@@ -87,7 +87,7 @@ static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const roo
WIN32_FIND_DATA fdata;
Unique<HANDLE, FindClose> h = FindFirstFileW(pat, &fdata);
- if (!h.Bad(INVALID_HANDLE_VALUE))
+ if (h.Bad(INVALID_HANDLE_VALUE))
throw Win32Error();
do
diff --git a/c/util.h b/c/util.h
index f9e3057..7ed7636 100644
--- a/c/util.h
+++ b/c/util.h
@@ -72,11 +72,11 @@ struct Unique
}
bool Good(T u) noexcept
{
- return ok = v == u;
+ return !(ok = v == u);
}
bool Bad(T u) noexcept
{
- return ok = v != u;
+ return !(ok = v != u);
}
~Unique()
{