diff options
author | John Ankarström <john@ankarstrom.se> | 2022-08-30 02:19:56 +0200 |
---|---|---|
committer | John Ankarström <john@ankarstrom.se> | 2022-08-30 02:20:23 +0200 |
commit | 6832f91958e6c2cc44a8c4a4e126ea20b8c8d5a3 (patch) | |
tree | 4f08e1f31429e2e716fa618f550342287a091a0e | |
parent | 16ba8f3ae15363e921fca0e97f5c89cf12987b21 (diff) | |
download | EpisodeBrowser-6832f91958e6c2cc44a8c4a4e126ea20b8c8d5a3.tar.gz |
Fix Unique Good, Bad. Simplify HTML parser.
Good and Bad should obviously do the exact opposite thing
to what they did.
-rw-r--r-- | c/data.cpp | 76 | ||||
-rw-r--r-- | c/data.h | 8 | ||||
-rw-r--r-- | c/ext.cpp | 2 | ||||
-rw-r--r-- | c/util.h | 4 |
4 files changed, 43 insertions, 47 deletions
@@ -14,50 +14,44 @@ static Unique<HINTERNET, InternetCloseHandle> s_hi = InternetOpenW(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); -/* ParsedDoc downloads and parses an HTML document. */ -struct ParsedDoc +Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> RemoteParserCtxt(const wchar_t* wszUrl, const char* szUrl) { - Unique<HINTERNET, InternetCloseHandle> hiUrl; - Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx; - char bufI[1024]; + if (s_hi.Bad(0)) + throw Win32Error(); + + Unique<HINTERNET, InternetCloseHandle> hiUrl = InternetOpenUrlW( + s_hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0); + if (hiUrl.Bad(0)) + throw InternetError(); + char bufX[1024]; + Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx = htmlCreatePushParserCtxt( + nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8); + if (ctx.Bad(0)) + throw XmlError(); - ParsedDoc(const wchar_t* wszUrl, const char* szUrl) - { - if (!s_hi.Bad(0)) - throw Win32Error(); + htmlCtxtUseOptions(ctx.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); - hiUrl = InternetOpenUrlW(s_hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0); - if (!hiUrl.Bad(0)) + BOOL r; + DWORD cbRead; + char bufI[1024]; + while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) { + if (!r) throw InternetError(); - - ctx = htmlCreatePushParserCtxt(nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8); - if (!ctx.Bad(0)) + if (!htmlParseChunk(ctx.v, bufI, cbRead, 0)) throw XmlError(); - - htmlCtxtUseOptions(ctx.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); - - BOOL r; - DWORD cbRead; - while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) { - if (!r) - throw InternetError(); - if (!htmlParseChunk(ctx.v, bufI, cbRead, 0)) - throw XmlError(); - } - htmlParseChunk(ctx.v, bufI, 0, 1); /* Stop parsing. */ } + htmlParseChunk(ctx.v, bufI, 0, 1); /* Stop parsing. */ - operator htmlDocPtr() { return ctx.v->myDoc; } -}; + return ctx; +} static inline void XmlFree(void* p) { xmlFree(p); } template <size_t N> -bool WcharsFromXmlchars(wchar_t (&dst)[N], xmlChar* utf8_) +bool WcharsFromXmlchars(wchar_t (&dst)[N], Unique<xmlChar*, XmlFree> utf8) { - Unique<xmlChar*, XmlFree> utf8 = utf8_; - if (!utf8.Bad(0)) + if (utf8.Bad(0)) throw XmlError(); /* Truncate if source is larger than destination. */ @@ -167,17 +161,18 @@ void FetchData(unsigned char* sig) * specific XPath query. This is fragile theoretically, but * unlikely to break practically. */ - ParsedDoc doc(L"https://www.detectiveconanworld.com/wiki/Anime", - "https://www.detectiveconanworld.com/wiki/Anime"); + Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx = + RemoteParserCtxt(L"https://www.detectiveconanworld.com/wiki/Anime", + "https://www.detectiveconanworld.com/wiki/Anime"); - Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(doc); - if (!xpathCtx.Bad(0)) + Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(ctx.v->myDoc); + if (xpathCtx.Bad(0)) throw XmlError(); Unique<xmlXPathObjectPtr, xmlXPathFreeObject> xpathObj = xmlXPathEvalExpression( reinterpret_cast<const xmlChar*>("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"), xpathCtx.v); - if (!xpathObj.Bad(0)) + if (xpathObj.Bad(0)) throw XmlError(); xmlNodeSetPtr nodes = xpathObj.v->nodesetval; @@ -269,16 +264,17 @@ void FetchScreenwriters(unsigned char* sig) Wcscpy(Buf(url)+Len(prefix), d.wiki); /* Retrieve screenwriter from HTML. */ - ParsedDoc doc(url, nullptr); - Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(doc); - if (!xpathCtx.Bad(0)) + + Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctx = RemoteParserCtxt(url, nullptr); + Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(ctx.v->myDoc); + if (xpathCtx.Bad(0)) throw XmlError(); Unique<xmlXPathObjectPtr, xmlXPathFreeObject> xpathObj = xmlXPathEvalExpression(reinterpret_cast<const xmlChar*>( "//th[contains(text(), 'Screenplay:')]/following-sibling::td"), xpathCtx.v); - if (!xpathObj.Bad(0)) + if (xpathObj.Bad(0)) throw XmlError(); xmlNodeSetPtr nodes = xpathObj.v->nodesetval; @@ -120,11 +120,11 @@ struct FileView { hf = CreateFile(filename, GENERIC_READ|GENERIC_WRITE, 0, nullptr, OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, nullptr); - if (!hf.Bad(INVALID_HANDLE_VALUE)) { + if (hf.Bad(INVALID_HANDLE_VALUE)) { if (GetLastError() == ERROR_FILE_NOT_FOUND) { hf = CreateFile(filename, GENERIC_READ|GENERIC_WRITE, 0, nullptr, CREATE_NEW, FILE_ATTRIBUTE_NORMAL, nullptr); - if (!hf.Bad(INVALID_HANDLE_VALUE)) + if (hf.Bad(INVALID_HANDLE_VALUE)) throw Win32Error(); } else throw Win32Error(); @@ -134,11 +134,11 @@ struct FileView cbMap.QuadPart = c*sizeof(T); hm = CreateFileMapping(hf.v, nullptr, PAGE_READWRITE, cbMap.HighPart, cbMap.LowPart, nullptr); - if (!hm.Bad(0)) + if (hm.Bad(0)) throw Win32Error(); view = reinterpret_cast<T*>(MapViewOfFile(hm.v, FILE_MAP_ALL_ACCESS, 0, 0, 0)); - if (!view.Bad(0)) + if (view.Bad(0)) throw Win32Error(); } @@ -87,7 +87,7 @@ static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const roo WIN32_FIND_DATA fdata; Unique<HANDLE, FindClose> h = FindFirstFileW(pat, &fdata); - if (!h.Bad(INVALID_HANDLE_VALUE)) + if (h.Bad(INVALID_HANDLE_VALUE)) throw Win32Error(); do @@ -72,11 +72,11 @@ struct Unique } bool Good(T u) noexcept { - return ok = v == u; + return !(ok = v == u); } bool Bad(T u) noexcept { - return ok = v != u; + return !(ok = v != u); } ~Unique() { |