aboutsummaryrefslogtreecommitdiff
path: root/c
diff options
context:
space:
mode:
Diffstat (limited to 'c')
-rw-r--r--c/data.cpp98
-rw-r--r--c/ext.cpp23
-rw-r--r--c/util.h16
3 files changed, 62 insertions, 75 deletions
diff --git a/c/data.cpp b/c/data.cpp
index c1e7177..8d064a8 100644
--- a/c/data.cpp
+++ b/c/data.cpp
@@ -7,63 +7,25 @@
#include "data.h"
#include "win.h"
-struct InternetFile
+struct XmlError : public std::exception
{
- InternetFile(const wchar_t* url)
+ const char* msg;
+ XmlError()
{
- hi = InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0);
- if (!hi)
- throw Win32Error();
-
- hiUrl = InternetOpenUrl(hi, url,
- nullptr, 0, INTERNET_FLAG_NO_UI, 0);
- if (!hiUrl) {
- DWORD e = GetLastError();
- InternetCloseHandle(hi);
- throw InternetError(e);
- }
+ msg = xmlGetLastError()->message;
}
-
- ~InternetFile()
+ virtual const char* what() const noexcept
{
- InternetCloseHandle(hiUrl);
- InternetCloseHandle(hi);
- }
-
- DWORD Read(void* buf, DWORD cb)
- {
- DWORD cbRead;
- if (InternetReadFile(hiUrl, buf, cb, &cbRead))
- return cbRead;
- else
- throw InternetError();
+ return msg;
}
-
- HINTERNET hi;
- HINTERNET hiUrl;
};
-template <auto F, typename T>
-struct XmlPtr
-{
- XmlPtr(T v) : v(v)
- {
- if (!v)
- throw std::runtime_error(xmlGetLastError()->message);
- }
- ~XmlPtr() { F(v); }
- operator T() { return v; }
- T operator ->() { return v; }
-private:
- T v;
-};
-
-using HtmlParserCtxtPtr = XmlPtr<xmlFreeParserCtxt, htmlParserCtxtPtr>;
-using HtmlDocPtr = XmlPtr<xmlFreeDoc, htmlDocPtr>;
-using XmlXPathContextPtr = XmlPtr<xmlXPathFreeContext, xmlXPathContextPtr>;
-using XmlXPathObjectPtr = XmlPtr<xmlXPathFreeObject, xmlXPathObjectPtr>;
+using HtmlParserCtxtPtr = Managed<htmlParserCtxtPtr, xmlFreeParserCtxt, XmlError>;
+using HtmlDocPtr = Managed<htmlDocPtr, xmlFreeDoc, XmlError>;
+using XmlXPathContextPtr = Managed<xmlXPathContextPtr, xmlXPathFreeContext, XmlError>;
+using XmlXPathObjectPtr = Managed<xmlXPathObjectPtr, xmlXPathFreeObject, XmlError>;
static inline void XmlFree(void* p) { xmlFree(p); }
-using XmlCharPtr = XmlPtr<XmlFree, xmlChar*>;
+using XmlCharPtr = Managed<xmlChar*, XmlFree, XmlError>;
template <size_t N>
bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept
@@ -93,24 +55,36 @@ bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept
void FetchData()
{
LIBXML_TEST_VERSION;
+ using InternetHandle = Managed<HINTERNET, InternetCloseHandle, InternetError>;
- InternetFile inf(L"https://www.detectiveconanworld.com/wiki/Anime");
- //InternetFile inf(L"file://C:/Users/John/Desktop/dcw.html");
- char buf[1024];
+ /* The remote data is retrieved using WinINet from the
+ * Detective Conan World wiki. Using libxml2's "push parser",
+ * the HTML is parsed piece by piece as it is retrieved. */
+ InternetHandle hi = InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0);
+ InternetHandle hiUrl = InternetOpenUrl(hi, L"https://www.detectiveconanworld.com/wiki/Anime",
+ nullptr, 0, INTERNET_FLAG_NO_UI, 0); //L"file://C:/Users/John/Desktop/dcw.html"
+
+ char buf[1024];
HtmlParserCtxtPtr ctxt = htmlCreatePushParserCtxt(nullptr, nullptr,
buf, sizeof(buf), "https://www.detectiveconanworld.com/wiki/Anime",
XML_CHAR_ENCODING_UTF8);
-
htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
- while (const DWORD cbRead = inf.Read(&buf, sizeof(buf))) {
+ BOOL r;
+ DWORD cbRead;
+ while (r = InternetReadFile(hiUrl, buf, sizeof(buf), &cbRead), cbRead) {
+ if (!r)
+ throw InternetError();
if (!htmlParseChunk(ctxt, buf, cbRead, 0))
throw std::runtime_error(xmlGetLastError()->message);
}
htmlParseChunk(ctxt, buf, 0, 1); /* Stop parsing. */
- /* Find table rows containing episode data. */
+ /* The episode data are contained in table rows matching a
+ * (very!) specific XPath query. This is fragile
+ * theoretically, but unlikely to break practically. */
+
HtmlDocPtr doc = ctxt->myDoc;
XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression(
@@ -133,7 +107,10 @@ void FetchData()
ElvDataA& e = g_fvElv.At(i);
DlvDataA& d = g_fvDlv.At(i);
- /* Get cells. */
+ /* Each datum is contained within a specific cell in
+ * the row. The child element count above ensures that
+ * none of the following nodes are null. */
+
const xmlNodePtr nodeEp = xmlFirstElementChild(node);
const xmlNodePtr nodeTitle = xmlNextElementSibling(xmlNextElementSibling(nodeEp));
const xmlNodePtr nodeDate = xmlNextElementSibling(nodeTitle);
@@ -142,17 +119,18 @@ void FetchData()
const xmlNodePtr nodeHint = xmlNextElementSibling(nodeSource);
WcharsFromXmlchars(e.siEp, xmlNodeGetContent(nodeEp));
- e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove WPS suffix. */
+ e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove potential "WPS" suffix. */
WcharsFromXmlchars(e.title, xmlNodeGetContent(nodeTitle));
WcharsFromXmlchars(d.date, xmlNodeGetContent(nodeDate));
WcharsFromXmlchars(d.source, xmlNodeGetContent(nodeSource));
WcharsFromXmlchars(d.hint, xmlNodeGetContent(nodeHint));
- /* Get wiki URL. */
+ /* Retrieve the link to the episode's wiki entry,
+ * which should be the first (and only) child element
+ * of the title node. */
const xmlNodePtr nodeLink = xmlFirstElementChild(nodeTitle);
if (nodeLink)
- WcharsFromXmlchars(d.wiki,
- xmlGetProp(nodeLink, reinterpret_cast<const xmlChar*>("href")));
+ WcharsFromXmlchars(d.wiki, xmlGetProp(nodeLink, reinterpret_cast<const xmlChar*>("href")));
}
}
diff --git a/c/ext.cpp b/c/ext.cpp
index f485dce..ee2c943 100644
--- a/c/ext.cpp
+++ b/c/ext.cpp
@@ -76,6 +76,8 @@ static inline bool MatchFileName(wchar_t (&file)[MAX_PATH], const wchar_t* const
static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const root,
const wchar_t* const siEp, const int level = 0)
{
+ using FindHandle = Managed<HANDLE, FindClose, Win32Error, -1>;
+
/* Don't recurse too much. */
if (level > 3)
return false;
@@ -84,9 +86,7 @@ static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const roo
Swprintf(pat, L"%s\\*", root);
WIN32_FIND_DATA fdata;
- HANDLE hf = FindFirstFile(pat, &fdata);
- if (hf == INVALID_HANDLE_VALUE)
- throw Win32Error().what();
+ FindHandle h = FindFirstFile(pat, &fdata);
do {
if (fdata.cFileName[0] == L'.')
@@ -95,25 +95,18 @@ static bool FindMatchingFile(wchar_t (&file)[MAX_PATH], const wchar_t* const roo
/* Recurse into directory. */
wchar_t root2[MAX_PATH];
Swprintf(root2, L"%s\\%s", root, fdata.cFileName);
- try {
- if (FindMatchingFile(file, root2, siEp, level+1))
- return true;
- } catch (...) {
- FindClose(hf);
- throw;
- }
+ if (FindMatchingFile(file, root2, siEp, level+1))
+ return true;
}
else /* Try to match file name. */
if (MatchFileName(fdata.cFileName, siEp)) {
Swprintf(file, L"%s\\%s", root, fdata.cFileName);
return true;
}
- } while (FindNextFile(hf, &fdata));
+ } while (FindNextFile(h, &fdata));
- DWORD e = GetLastError();
- FindClose(hf);
- if (e != ERROR_NO_MORE_FILES)
- throw Win32Error(e);
+ if (GetLastError() != ERROR_NO_MORE_FILES)
+ throw Win32Error();
return false;
}
diff --git a/c/util.h b/c/util.h
index 7ca8bfe..c08050d 100644
--- a/c/util.h
+++ b/c/util.h
@@ -23,6 +23,22 @@ inline int Cmp(const int a, const int b)
return -1;
}
+template <typename T, auto F, typename U, auto E = 0>
+struct Managed
+{
+ T obj;
+ Managed(T obj) : obj(obj)
+ {
+ if (obj == reinterpret_cast<T>(E))
+ throw U();
+ }
+ ~Managed() { F(obj); }
+ operator T() { return obj; }
+ auto& operator *() { return *obj; }
+ auto& operator ->() { return obj; }
+ auto& operator [](size_t i) { return obj[i]; }
+};
+
/* Buf is a span-like structure of a buffer and its size. */
template <typename T>
struct Buf