diff options
author | John Ankarström <john@ankarstrom.se> | 2022-08-23 01:43:09 +0200 |
---|---|---|
committer | John Ankarström <john@ankarstrom.se> | 2022-08-23 01:43:09 +0200 |
commit | 3962b1bdfb2a8a2e3a5ff4f4e51a61b0c44f2e6b (patch) | |
tree | 1d7ee76b4f2bfb3f205f6cacb39afdbd68062545 /c | |
parent | 74b9361ccc77bcbb6b8188ad5914d6b26530d26a (diff) | |
download | EpisodeBrowser-3962b1bdfb2a8a2e3a5ff4f4e51a61b0c44f2e6b.tar.gz |
Add Managed (generic RAII type).
Diffstat (limited to 'c')
-rw-r--r-- | c/data.cpp | 98 | ||||
-rw-r--r-- | c/ext.cpp | 23 | ||||
-rw-r--r-- | c/util.h | 16 |
3 files changed, 62 insertions, 75 deletions
@@ -7,63 +7,25 @@ #include "data.h" #include "win.h" -struct InternetFile +struct XmlError : public std::exception { - InternetFile(const wchar_t* url) + const char* msg; + XmlError() { - hi = InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); - if (!hi) - throw Win32Error(); - - hiUrl = InternetOpenUrl(hi, url, - nullptr, 0, INTERNET_FLAG_NO_UI, 0); - if (!hiUrl) { - DWORD e = GetLastError(); - InternetCloseHandle(hi); - throw InternetError(e); - } + msg = xmlGetLastError()->message; } - - ~InternetFile() + virtual const char* what() const noexcept { - InternetCloseHandle(hiUrl); - InternetCloseHandle(hi); - } - - DWORD Read(void* buf, DWORD cb) - { - DWORD cbRead; - if (InternetReadFile(hiUrl, buf, cb, &cbRead)) - return cbRead; - else - throw InternetError(); + return msg; } - - HINTERNET hi; - HINTERNET hiUrl; }; -template <auto F, typename T> -struct XmlPtr -{ - XmlPtr(T v) : v(v) - { - if (!v) - throw std::runtime_error(xmlGetLastError()->message); - } - ~XmlPtr() { F(v); } - operator T() { return v; } - T operator ->() { return v; } -private: - T v; -}; - -using HtmlParserCtxtPtr = XmlPtr<xmlFreeParserCtxt, htmlParserCtxtPtr>; -using HtmlDocPtr = XmlPtr<xmlFreeDoc, htmlDocPtr>; -using XmlXPathContextPtr = XmlPtr<xmlXPathFreeContext, xmlXPathContextPtr>; -using XmlXPathObjectPtr = XmlPtr<xmlXPathFreeObject, xmlXPathObjectPtr>; +using HtmlParserCtxtPtr = Managed<htmlParserCtxtPtr, xmlFreeParserCtxt, XmlError>; +using HtmlDocPtr = Managed<htmlDocPtr, xmlFreeDoc, XmlError>; +using XmlXPathContextPtr = Managed<xmlXPathContextPtr, xmlXPathFreeContext, XmlError>; +using XmlXPathObjectPtr = Managed<xmlXPathObjectPtr, xmlXPathFreeObject, XmlError>; static inline void XmlFree(void* p) { xmlFree(p); } -using XmlCharPtr = XmlPtr<XmlFree, xmlChar*>; +using XmlCharPtr = Managed<xmlChar*, XmlFree, XmlError>; template <size_t N> bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept @@ -93,24 +55,36 @@ bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept void FetchData() { LIBXML_TEST_VERSION; + using InternetHandle = Managed<HINTERNET, InternetCloseHandle, InternetError>; - InternetFile inf(L"https://www.detectiveconanworld.com/wiki/Anime"); - //InternetFile inf(L"file://C:/Users/John/Desktop/dcw.html"); - char buf[1024]; + /* The remote data is retrieved using WinINet from the + * Detective Conan World wiki. Using libxml2's "push parser", + * the HTML is parsed piece by piece as it is retrieved. */ + InternetHandle hi = InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); + InternetHandle hiUrl = InternetOpenUrl(hi, L"https://www.detectiveconanworld.com/wiki/Anime", + nullptr, 0, INTERNET_FLAG_NO_UI, 0); //L"file://C:/Users/John/Desktop/dcw.html" + + char buf[1024]; HtmlParserCtxtPtr ctxt = htmlCreatePushParserCtxt(nullptr, nullptr, buf, sizeof(buf), "https://www.detectiveconanworld.com/wiki/Anime", XML_CHAR_ENCODING_UTF8); - htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); - while (const DWORD cbRead = inf.Read(&buf, sizeof(buf))) { + BOOL r; + DWORD cbRead; + while (r = InternetReadFile(hiUrl, buf, sizeof(buf), &cbRead), cbRead) { + if (!r) + throw InternetError(); if (!htmlParseChunk(ctxt, buf, cbRead, 0)) throw std::runtime_error(xmlGetLastError()->message); } htmlParseChunk(ctxt, buf, 0, 1); /* Stop parsing. */ - /* Find table rows containing episode data. */ + /* The episode data are contained in table rows matching a + * (very!) specific XPath query. This is fragile + * theoretically, but unlikely to break practically. */ + HtmlDocPtr doc = ctxt->myDoc; XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( @@ -133,7 +107,10 @@ void FetchData() ElvDataA& e = g_fvElv.At(i); DlvDataA& d = g_fvDlv.At(i); - /* Get cells. */ + /* Each datum is contained within a specific cell in + * the row. The child element count above ensures that + * none of the following nodes are null. */ + const xmlNodePtr nodeEp = xmlFirstElementChild(node); const xmlNodePtr nodeTitle = xmlNextElementSibling(xmlNextElementSibling(nodeEp)); const xmlNodePtr nodeDate = xmlNextElementSibling(nodeTitle); @@ -142,17 +119,18 @@ void FetchData() const xmlNodePtr nodeHint = xmlNextElementSibling(nodeSource); WcharsFromXmlchars(e.siEp, xmlNodeGetContent(nodeEp)); - e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove WPS suffix. */ + e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove potential "WPS" suffix. */ WcharsFromXmlchars(e.title, xmlNodeGetContent(nodeTitle)); WcharsFromXmlchars(d.date, xmlNodeGetContent(nodeDate)); WcharsFromXmlchars(d.source, xmlNodeGetContent(nodeSource)); WcharsFromXmlchars(d.hint, xmlNodeGetContent(nodeHint)); - /* Get wiki URL. */ + /* Retrieve the link to the episode's wiki entry, + * which should be the first (and only) child element + * of the title node. */ const xmlNodePtr nodeLink = xmlFirstElementChild(nodeTitle); if (nodeLink) - WcharsFromXmlchars(d.wiki, - xmlGetProp(nodeLink, reinterpret_cast<const xmlChar*>("href"))); + WcharsFromXmlchars(d.wiki, xmlGetProp(nodeLink, reinterpret_cast<const xmlChar*>("href"))); } } @@ -76,6 +76,8 @@ static inline bool MatchFileName(wchar_t (&file)[MAX_PATH], const wchar_t* const static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const root, const wchar_t* const siEp, const int level = 0) { + using FindHandle = Managed<HANDLE, FindClose, Win32Error, -1>; + /* Don't recurse too much. */ if (level > 3) return false; @@ -84,9 +86,7 @@ static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const roo Swprintf(pat, L"%s\\*", root); WIN32_FIND_DATA fdata; - HANDLE hf = FindFirstFile(pat, &fdata); - if (hf == INVALID_HANDLE_VALUE) - throw Win32Error().what(); + FindHandle h = FindFirstFile(pat, &fdata); do { if (fdata.cFileName[0] == L'.') @@ -95,25 +95,18 @@ static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const roo /* Recurse into directory. */ wchar_t root2[MAX_PATH]; Swprintf(root2, L"%s\\%s", root, fdata.cFileName); - try { - if (FindMatchingFile(file, root2, siEp, level+1)) - return true; - } catch (...) { - FindClose(hf); - throw; - } + if (FindMatchingFile(file, root2, siEp, level+1)) + return true; } else /* Try to match file name. */ if (MatchFileName(fdata.cFileName, siEp)) { Swprintf(file, L"%s\\%s", root, fdata.cFileName); return true; } - } while (FindNextFile(hf, &fdata)); + } while (FindNextFile(h, &fdata)); - DWORD e = GetLastError(); - FindClose(hf); - if (e != ERROR_NO_MORE_FILES) - throw Win32Error(e); + if (GetLastError() != ERROR_NO_MORE_FILES) + throw Win32Error(); return false; } @@ -23,6 +23,22 @@ inline int Cmp(const int a, const int b) return -1; } +template <typename T, auto F, typename U, auto E = 0> +struct Managed +{ + T obj; + Managed(T obj) : obj(obj) + { + if (obj == reinterpret_cast<T>(E)) + throw U(); + } + ~Managed() { F(obj); } + operator T() { return obj; } + auto& operator *() { return *obj; } + auto& operator ->() { return obj; } + auto& operator [](size_t i) { return obj[i]; } +}; + /* Buf is a span-like structure of a buffer and its size. */ template <typename T> struct Buf |