From 3962b1bdfb2a8a2e3a5ff4f4e51a61b0c44f2e6b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20Ankarstr=C3=B6m?= Date: Tue, 23 Aug 2022 01:43:09 +0200 Subject: Add Managed (generic RAII type). --- c/data.cpp | 98 ++++++++++++++++++++++++-------------------------------------- c/ext.cpp | 23 +++++---------- c/util.h | 16 ++++++++++ 3 files changed, 62 insertions(+), 75 deletions(-) (limited to 'c') diff --git a/c/data.cpp b/c/data.cpp index c1e7177..8d064a8 100644 --- a/c/data.cpp +++ b/c/data.cpp @@ -7,63 +7,25 @@ #include "data.h" #include "win.h" -struct InternetFile +struct XmlError : public std::exception { - InternetFile(const wchar_t* url) + const char* msg; + XmlError() { - hi = InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); - if (!hi) - throw Win32Error(); - - hiUrl = InternetOpenUrl(hi, url, - nullptr, 0, INTERNET_FLAG_NO_UI, 0); - if (!hiUrl) { - DWORD e = GetLastError(); - InternetCloseHandle(hi); - throw InternetError(e); - } + msg = xmlGetLastError()->message; } - - ~InternetFile() + virtual const char* what() const noexcept { - InternetCloseHandle(hiUrl); - InternetCloseHandle(hi); - } - - DWORD Read(void* buf, DWORD cb) - { - DWORD cbRead; - if (InternetReadFile(hiUrl, buf, cb, &cbRead)) - return cbRead; - else - throw InternetError(); + return msg; } - - HINTERNET hi; - HINTERNET hiUrl; }; -template -struct XmlPtr -{ - XmlPtr(T v) : v(v) - { - if (!v) - throw std::runtime_error(xmlGetLastError()->message); - } - ~XmlPtr() { F(v); } - operator T() { return v; } - T operator ->() { return v; } -private: - T v; -}; - -using HtmlParserCtxtPtr = XmlPtr; -using HtmlDocPtr = XmlPtr; -using XmlXPathContextPtr = XmlPtr; -using XmlXPathObjectPtr = XmlPtr; +using HtmlParserCtxtPtr = Managed; +using HtmlDocPtr = Managed; +using XmlXPathContextPtr = Managed; +using XmlXPathObjectPtr = Managed; static inline void XmlFree(void* p) { xmlFree(p); } -using XmlCharPtr = XmlPtr; +using XmlCharPtr = Managed; template bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept @@ -93,24 +55,36 @@ bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept void FetchData() { LIBXML_TEST_VERSION; + using InternetHandle = Managed; - InternetFile inf(L"https://www.detectiveconanworld.com/wiki/Anime"); - //InternetFile inf(L"file://C:/Users/John/Desktop/dcw.html"); - char buf[1024]; + /* The remote data is retrieved using WinINet from the + * Detective Conan World wiki. Using libxml2's "push parser", + * the HTML is parsed piece by piece as it is retrieved. */ + InternetHandle hi = InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); + InternetHandle hiUrl = InternetOpenUrl(hi, L"https://www.detectiveconanworld.com/wiki/Anime", + nullptr, 0, INTERNET_FLAG_NO_UI, 0); //L"file://C:/Users/John/Desktop/dcw.html" + + char buf[1024]; HtmlParserCtxtPtr ctxt = htmlCreatePushParserCtxt(nullptr, nullptr, buf, sizeof(buf), "https://www.detectiveconanworld.com/wiki/Anime", XML_CHAR_ENCODING_UTF8); - htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); - while (const DWORD cbRead = inf.Read(&buf, sizeof(buf))) { + BOOL r; + DWORD cbRead; + while (r = InternetReadFile(hiUrl, buf, sizeof(buf), &cbRead), cbRead) { + if (!r) + throw InternetError(); if (!htmlParseChunk(ctxt, buf, cbRead, 0)) throw std::runtime_error(xmlGetLastError()->message); } htmlParseChunk(ctxt, buf, 0, 1); /* Stop parsing. */ - /* Find table rows containing episode data. */ + /* The episode data are contained in table rows matching a + * (very!) specific XPath query. This is fragile + * theoretically, but unlikely to break practically. */ + HtmlDocPtr doc = ctxt->myDoc; XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( @@ -133,7 +107,10 @@ void FetchData() ElvDataA& e = g_fvElv.At(i); DlvDataA& d = g_fvDlv.At(i); - /* Get cells. */ + /* Each datum is contained within a specific cell in + * the row. The child element count above ensures that + * none of the following nodes are null. */ + const xmlNodePtr nodeEp = xmlFirstElementChild(node); const xmlNodePtr nodeTitle = xmlNextElementSibling(xmlNextElementSibling(nodeEp)); const xmlNodePtr nodeDate = xmlNextElementSibling(nodeTitle); @@ -142,17 +119,18 @@ void FetchData() const xmlNodePtr nodeHint = xmlNextElementSibling(nodeSource); WcharsFromXmlchars(e.siEp, xmlNodeGetContent(nodeEp)); - e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove WPS suffix. */ + e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove potential "WPS" suffix. */ WcharsFromXmlchars(e.title, xmlNodeGetContent(nodeTitle)); WcharsFromXmlchars(d.date, xmlNodeGetContent(nodeDate)); WcharsFromXmlchars(d.source, xmlNodeGetContent(nodeSource)); WcharsFromXmlchars(d.hint, xmlNodeGetContent(nodeHint)); - /* Get wiki URL. */ + /* Retrieve the link to the episode's wiki entry, + * which should be the first (and only) child element + * of the title node. */ const xmlNodePtr nodeLink = xmlFirstElementChild(nodeTitle); if (nodeLink) - WcharsFromXmlchars(d.wiki, - xmlGetProp(nodeLink, reinterpret_cast("href"))); + WcharsFromXmlchars(d.wiki, xmlGetProp(nodeLink, reinterpret_cast("href"))); } } diff --git a/c/ext.cpp b/c/ext.cpp index f485dce..ee2c943 100644 --- a/c/ext.cpp +++ b/c/ext.cpp @@ -76,6 +76,8 @@ static inline bool MatchFileName(wchar_t (&file)[MAX_PATH], const wchar_t* const static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const root, const wchar_t* const siEp, const int level = 0) { + using FindHandle = Managed; + /* Don't recurse too much. */ if (level > 3) return false; @@ -84,9 +86,7 @@ static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const roo Swprintf(pat, L"%s\\*", root); WIN32_FIND_DATA fdata; - HANDLE hf = FindFirstFile(pat, &fdata); - if (hf == INVALID_HANDLE_VALUE) - throw Win32Error().what(); + FindHandle h = FindFirstFile(pat, &fdata); do { if (fdata.cFileName[0] == L'.') @@ -95,25 +95,18 @@ static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const roo /* Recurse into directory. */ wchar_t root2[MAX_PATH]; Swprintf(root2, L"%s\\%s", root, fdata.cFileName); - try { - if (FindMatchingFile(file, root2, siEp, level+1)) - return true; - } catch (...) { - FindClose(hf); - throw; - } + if (FindMatchingFile(file, root2, siEp, level+1)) + return true; } else /* Try to match file name. */ if (MatchFileName(fdata.cFileName, siEp)) { Swprintf(file, L"%s\\%s", root, fdata.cFileName); return true; } - } while (FindNextFile(hf, &fdata)); + } while (FindNextFile(h, &fdata)); - DWORD e = GetLastError(); - FindClose(hf); - if (e != ERROR_NO_MORE_FILES) - throw Win32Error(e); + if (GetLastError() != ERROR_NO_MORE_FILES) + throw Win32Error(); return false; } diff --git a/c/util.h b/c/util.h index 7ca8bfe..c08050d 100644 --- a/c/util.h +++ b/c/util.h @@ -23,6 +23,22 @@ inline int Cmp(const int a, const int b) return -1; } +template +struct Managed +{ + T obj; + Managed(T obj) : obj(obj) + { + if (obj == reinterpret_cast(E)) + throw U(); + } + ~Managed() { F(obj); } + operator T() { return obj; } + auto& operator *() { return *obj; } + auto& operator ->() { return obj; } + auto& operator [](size_t i) { return obj[i]; } +}; + /* Buf is a span-like structure of a buffer and its size. */ template struct Buf -- cgit v1.2.3