diff options
Diffstat (limited to 'c/data.cpp')
-rw-r--r-- | c/data.cpp | 107 |
1 files changed, 56 insertions, 51 deletions
@@ -11,72 +11,65 @@ #include "util.h" #include "win.h" -struct XmlError : public std::exception -{ - const char* msg; - XmlError() - { - msg = xmlGetLastError()->message; - } - virtual const char* what() const noexcept - { - return msg; - } -}; - -static inline void XmlFree(void* p) { xmlFree(p); } - -/* RAII types for WinINet and libxml2. */ -using InternetHandle = Managed<HINTERNET, InternetCloseHandle, InternetError>; -using XmlXPathContextPtr = Managed<xmlXPathContextPtr, xmlXPathFreeContext, XmlError>; -using XmlXPathObjectPtr = Managed<xmlXPathObjectPtr, xmlXPathFreeObject, XmlError>; -using XmlCharPtr = Managed<xmlChar*, XmlFree, XmlError>; - -static InternetHandle s_hi = InternetOpenW(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); +static Unique<HINTERNET, InternetCloseHandle> s_hi = + InternetOpenW(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); /* ParsedDoc downloads and parses an HTML document. */ struct ParsedDoc { - using HtmlParserCtxtPtr = Managed<htmlParserCtxtPtr, xmlFreeParserCtxt, XmlError>; - - InternetHandle hiUrl; - HtmlParserCtxtPtr ctxt; + Unique<HINTERNET, InternetCloseHandle> hiUrl; + Unique<htmlParserCtxtPtr, xmlFreeParserCtxt> ctxt; char bufI[1024]; char bufX[1024]; ParsedDoc(const wchar_t* wszUrl, const char* szUrl) - : hiUrl(InternetOpenUrlW(s_hi, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0)), - ctxt(htmlCreatePushParserCtxt(nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8)) { - htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); + if (!s_hi.Not(0)) + throw Win32Error(); + + hiUrl = InternetOpenUrlW(s_hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0); + if (!hiUrl.Not(0)) + throw InternetError(); + + ctxt = htmlCreatePushParserCtxt(nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8); + if (!ctxt.Not(0)) + throw XmlError(); + + htmlCtxtUseOptions(ctxt.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); BOOL r; DWORD cbRead; - while (r = InternetReadFile(hiUrl, bufI, sizeof(bufI), &cbRead), cbRead) { + while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) { if (!r) throw InternetError(); - if (!htmlParseChunk(ctxt, bufI, cbRead, 0)) + if (!htmlParseChunk(ctxt.v, bufI, cbRead, 0)) throw XmlError(); } - htmlParseChunk(ctxt, bufI, 0, 1); /* Stop parsing. */ + htmlParseChunk(ctxt.v, bufI, 0, 1); /* Stop parsing. */ } - operator htmlDocPtr() { return ctxt->myDoc; } + operator htmlDocPtr() { return ctxt.v->myDoc; } }; +static inline void XmlFree(void* p) { xmlFree(p); } + template <size_t N> -bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept +bool WcharsFromXmlchars(wchar_t (&dst)[N], xmlChar* utf8_) { + Unique<xmlChar*, XmlFree> utf8 = utf8_; + if (!utf8.Not(0)) + throw XmlError(); + /* Truncate if source is larger than destination. */ - int lenUtf8 = xmlStrlen(utf8); - utf8[Min(N, static_cast<size_t>(lenUtf8))] = 0; + int lenUtf8 = xmlStrlen(utf8.v); + utf8.v[Min(N, static_cast<size_t>(lenUtf8))] = 0; /* Convert internal representation from UTF-8 to Latin-1, * which seems to actually convert the string to proper UTF-8 * (???). */ unsigned char lat1[N]; int lenLat1 = N-1; - if (UTF8Toisolat1(lat1, &lenLat1, utf8, &lenUtf8) <= 0) + if (UTF8Toisolat1(lat1, &lenLat1, utf8.v, &lenUtf8) <= 0) return false; lat1[lenLat1] = 0; @@ -177,11 +170,17 @@ void FetchData(unsigned char* sig) ParsedDoc doc(L"https://www.detectiveconanworld.com/wiki/Anime", "https://www.detectiveconanworld.com/wiki/Anime"); - XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); - XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( + Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(doc); + if (!xpathCtx.Not(0)) + throw XmlError(); + + Unique<xmlXPathObjectPtr, xmlXPathFreeObject> xpathObj = xmlXPathEvalExpression( reinterpret_cast<const xmlChar*>("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"), - xpathCtx); - xmlNodeSetPtr nodes = xpathObj->nodesetval; + xpathCtx.v); + if (!xpathObj.Not(0)) + throw XmlError(); + + xmlNodeSetPtr nodes = xpathObj.v->nodesetval; if (!nodes || !nodes->nodeNr) throw std::runtime_error("could not find remote episode information"); @@ -223,7 +222,8 @@ void FetchData(unsigned char* sig) * of the title node. */ const xmlNodePtr nodeLink = xmlFirstElementChild(nodeTitle); if (nodeLink) - WcharsFromXmlchars(d.wiki, xmlGetProp(nodeLink, reinterpret_cast<const xmlChar*>("href"))); + WcharsFromXmlchars(d.wiki, + xmlGetProp(nodeLink, reinterpret_cast<const xmlChar*>("href"))); } } @@ -270,14 +270,19 @@ void FetchScreenwriters(unsigned char* sig) /* Retrieve screenwriter from HTML. */ ParsedDoc doc(url, nullptr); - XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); - XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression(reinterpret_cast<const xmlChar*>( - "//th[contains(text(), 'Screenplay:')]/following-sibling::td"), - xpathCtx); - xmlNodeSetPtr nodes = xpathObj->nodesetval; - if (nodes && nodes->nodeNr) { - xmlChar* s = xmlNodeGetContent(nodes->nodeTab[0]); - WcharsFromXmlchars(d.screenwriter, s); - } + Unique<xmlXPathContextPtr, xmlXPathFreeContext> xpathCtx = xmlXPathNewContext(doc); + if (!xpathCtx.Not(0)) + throw XmlError(); + + Unique<xmlXPathObjectPtr, xmlXPathFreeObject> xpathObj = + xmlXPathEvalExpression(reinterpret_cast<const xmlChar*>( + "//th[contains(text(), 'Screenplay:')]/following-sibling::td"), + xpathCtx.v); + if (!xpathObj.Not(0)) + throw XmlError(); + + xmlNodeSetPtr nodes = xpathObj.v->nodesetval; + if (nodes && nodes->nodeNr) + WcharsFromXmlchars(d.screenwriter, xmlNodeGetContent(nodes->nodeTab[0])); } } |