aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Ankarström <john@ankarstrom.se>2022-08-21 00:47:54 +0200
committerJohn Ankarström <john@ankarstrom.se>2022-08-21 00:47:54 +0200
commit593757d3d178e7f94376241a70ac29fe2bf94469 (patch)
treed5861e25f3707571d51bf5d33935c10ae8f2f9ab
parent3fa33fc6b16e066838f4db3e182776a04c5c7d26 (diff)
downloadEpisodeBrowser-593757d3d178e7f94376241a70ac29fe2bf94469.tar.gz
Rework remote data fetching.
-rw-r--r--c/data.cpp152
-rw-r--r--c/test.cpp117
2 files changed, 163 insertions, 106 deletions
diff --git a/c/data.cpp b/c/data.cpp
index 9dd6ef8..1e59b8b 100644
--- a/c/data.cpp
+++ b/c/data.cpp
@@ -1 +1,153 @@
+#include <windows.h>
+#include <wininet.h>
+#include <libxml/HTMLparser.h>
+#include <libxml/HTMLtree.h>
+#include <libxml/xpath.h>
+
#include "data.h"
+
+struct InternetFile
+{
+ InternetFile(const wchar_t* url);
+ ~InternetFile();
+ DWORD Read(void* buf, DWORD cb);
+ HINTERNET hi;
+ HINTERNET hiUrl;
+};
+
+InternetFile::InternetFile(const wchar_t* url)
+{
+ hi = InternetOpen(L"Episode Browser",
+ INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr,
+ /*INTERNET_FLAG_ASYNC*/0);
+ if (!hi)
+ throw Win32Error{};
+
+ hiUrl = InternetOpenUrl(hi, url,
+ nullptr, 0, INTERNET_FLAG_NO_UI, 0);
+ if (!hiUrl) {
+ InternetCloseHandle(hi);
+ throw Win32Error{};
+ }
+}
+
+InternetFile::~InternetFile()
+{
+ InternetCloseHandle(hiUrl);
+ InternetCloseHandle(hi);
+}
+
+DWORD InternetFile::Read(void* buf, DWORD cb)
+{
+ DWORD cbRead;
+ if (InternetReadFile(hiUrl, buf, cb, &cbRead))
+ return cbRead;
+ else
+ throw Win32Error{};
+}
+
+template <auto F, typename T>
+struct XmlPtr
+{
+ XmlPtr(T v) : v(v)
+ {
+ if (!v)
+ throw std::runtime_error(xmlGetLastError()->message);
+ }
+ ~XmlPtr() { F(v); }
+ operator T() { return v; }
+ T operator ->() { return v; }
+private:
+ T v;
+};
+
+using HtmlParserCtxtPtr = XmlPtr<xmlFreeParserCtxt, htmlParserCtxtPtr>;
+using HtmlDocPtr = XmlPtr<xmlFreeDoc, htmlDocPtr>;
+using XmlXPathContextPtr = XmlPtr<xmlXPathFreeContext, xmlXPathContextPtr>;
+using XmlXPathObjectPtr = XmlPtr<xmlXPathFreeObject, xmlXPathObjectPtr>;
+static inline void XmlFree(void* p) { xmlFree(p); }
+using XmlCharPtr = XmlPtr<XmlFree, xmlChar*>;
+
+template <size_t N>
+bool WriteNodeContent(wchar_t (&dst)[N], const xmlNodePtr node)
+{
+ XmlCharPtr utf8 = xmlNodeGetContent(node);
+ if (!utf8)
+ return false;
+
+ /* Truncate if source is larger than destination. */
+ utf8[N] = 0;
+ int lenUtf8 = xmlStrlen(utf8);
+
+ /* Convert internal representation from UTF-8 to Latin-1,
+ * which seems to actually convert the string to proper UTF-8
+ * (???). */
+ unsigned char lat1[N];
+ int lenLat1 = N-1;
+ if (UTF8Toisolat1(lat1, &lenLat1, utf8, &lenUtf8) <= 0)
+ return false;
+ lat1[lenLat1] = 0;
+
+ /* Write wide string to destination, if it fits. */
+ char* const src = reinterpret_cast<char*>(lat1);
+ const int cchNarrow = lenLat1+1;
+ const int cchWide = MultiByteToWideChar(CP_UTF8, 0, src, cchNarrow, nullptr, 0);
+ if (static_cast<size_t>(cchWide) > N)
+ return false;
+ return MultiByteToWideChar(CP_UTF8, 0, src, cchNarrow, dst, cchWide);
+}
+
+void FetchData(FileView<ElvDataA>& fvElv, FileView<DlvDataA>& fvDlv)
+{
+ LIBXML_TEST_VERSION;
+
+ //InternetFile inf{L"https://www.detectiveconanworld.com/wiki/Anime"};
+ InternetFile inf{L"file://C:/Users/John/Desktop/dcw.html"};
+ char buf[1024];
+
+ HtmlParserCtxtPtr ctxt = htmlCreatePushParserCtxt(nullptr, nullptr,
+ buf, sizeof(buf), "https://www.detectiveconanworld.com/wiki/Anime",
+ XML_CHAR_ENCODING_UTF8);
+
+ htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
+
+ while (const DWORD cbRead = inf.Read(&buf, sizeof(buf))) {
+ if (!htmlParseChunk(ctxt, buf, cbRead, 0))
+ throw std::runtime_error(xmlGetLastError()->message);
+ }
+ htmlParseChunk(ctxt, buf, 0, 1); /* Stop parsing. */
+
+ /* Find table rows containing episode data. */
+ HtmlDocPtr doc = ctxt->myDoc;
+ XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
+ XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression(
+ reinterpret_cast<const xmlChar*>("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"),
+ xpathCtx);
+ xmlNodeSetPtr nodes = xpathObj->nodesetval;
+ int cNodes = nodes? nodes->nodeNr: 0;
+
+ printf("%d nodes\n", cNodes);
+ for (int i = 0; i < cNodes; i++) {
+ const xmlNodePtr node = nodes->nodeTab[i];
+ if (xmlChildElementCount(node) != 8)
+ throw std::runtime_error("unexpected remote data format");
+
+ /* Get cells. */
+ const xmlNodePtr nodeEp = xmlFirstElementChild(node);
+ const xmlNodePtr nodeTitle = xmlNextElementSibling(xmlNextElementSibling(nodeEp));
+ const xmlNodePtr nodeDate = xmlNextElementSibling(nodeTitle);
+ const xmlNodePtr nodeSource = xmlNextElementSibling(
+ xmlNextElementSibling(xmlNextElementSibling(nodeDate)));
+ const xmlNodePtr nodeHint = xmlNextElementSibling(nodeSource);
+
+ ElvDataA& e = fvElv.At(i);
+ DlvDataA& d = fvDlv.At(i);
+
+ WriteNodeContent(e.siEp, nodeEp);
+ e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove WPS suffix. */
+ WriteNodeContent(e.title, nodeTitle);
+ WriteNodeContent(d.date, nodeDate);
+ WriteNodeContent(d.source, nodeSource);
+ WriteNodeContent(d.hint, nodeHint);
+ }
+}
diff --git a/c/test.cpp b/c/test.cpp
index b125a20..5bd8958 100644
--- a/c/test.cpp
+++ b/c/test.cpp
@@ -1,8 +1,4 @@
#include <windows.h>
-#include <wininet.h>
-#include <libxml/HTMLparser.h>
-#include <libxml/HTMLtree.h>
-#include <libxml/xpath.h>
#include "data.h"
#include "episodelistview.h"
@@ -25,56 +21,6 @@ struct Test
#define TEST(id) }; struct id : public Test { id() : Test(#id)
#define FAIL(...) do { Sprintf(error, __VA_ARGS__); return; } while (0)
-template <typename F>
-struct Defer
-{
- Defer(F dtor) : dtor(dtor) {}
- ~Defer() { dtor(); }
- F dtor;
-};
-
-#define DEFER(x) Defer APPLY(CAT, defer_, __COUNTER__) {[=](){x;}}
-
-struct InternetFile
-{
- InternetFile(const wchar_t* url);
- ~InternetFile();
- DWORD Read(void* buf, DWORD cb);
- HINTERNET hi;
- HINTERNET hiUrl;
-};
-
-InternetFile::InternetFile(const wchar_t* url)
-{
- hi = InternetOpen(L"Episode Browser",
- INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr,
- /*INTERNET_FLAG_ASYNC*/0);
- if (!hi)
- throw Win32Error{};
-
- hiUrl = InternetOpenUrl(hi, url,
- nullptr, 0, INTERNET_FLAG_NO_UI, 0);
- if (!hiUrl) {
- InternetCloseHandle(hi);
- throw Win32Error{};
- }
-}
-
-InternetFile::~InternetFile()
-{
- InternetCloseHandle(hiUrl);
- InternetCloseHandle(hi);
-}
-
-DWORD InternetFile::Read(void* buf, DWORD cb)
-{
- DWORD cbRead;
- if (InternetReadFile(hiUrl, buf, cb, &cbRead))
- return cbRead;
- else
- throw Win32Error{};
-}
-
TESTS
{
TEST(StrcpyWithSmallerDestination)
@@ -252,7 +198,7 @@ TESTS
//DeleteFile(L"tmp.dat");
}
- TEST(DownloadData)
+ TEST(DownloadDataViaProlog)
{
WcharPtr title, wiki, date, source, hint;
int i = 1053;
@@ -276,60 +222,21 @@ TESTS
g_cfg.cEp = i;
}
- TEST(XML)
+ TEST(Fetch)
{
- LIBXML_TEST_VERSION;
-
- InternetFile inf{L"https://www.detectiveconanworld.com/wiki/Anime"};
- char buf[1024];
-
- htmlParserCtxtPtr ctxt = htmlCreatePushParserCtxt(nullptr, nullptr,
- buf, sizeof(buf), "https://www.detectiveconanworld.com/wiki/Anime",
- XML_CHAR_ENCODING_UTF8);
- DEFER(xmlFreeParserCtxt(ctxt));
-
- htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING);
-
- while (DWORD cbRead = inf.Read(&buf, sizeof(buf))) {
- if (!htmlParseChunk(ctxt, buf, cbRead, 0))
- FAIL(xmlGetLastError()->message);
- }
- htmlParseChunk(ctxt, buf, 0, 1); /* Terminate. */
-
- htmlDocPtr doc = ctxt->myDoc;
- if (!doc)
- FAIL(xmlGetLastError()->message);
- DEFER(xmlFreeDoc(doc)); /* Needed? */
-
- xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
- if (!xpathCtx)
- FAIL(xmlGetLastError()->message);
- DEFER(xmlXPathFreeContext(xpathCtx));
-
- xmlXPathObjectPtr xpathObj = xmlXPathEvalExpression(
- reinterpret_cast<const xmlChar*>("//tr/td[@style='background:#f2fde9;']"),
- xpathCtx);
- if (!xpathObj)
- FAIL(xmlGetLastError()->message);
- DEFER(xmlXPathFreeObject(xpathObj));
-
- xmlNodeSetPtr nodes;
- int cNodes;
- nodes = xpathObj->nodesetval;
- cNodes = nodes? nodes->nodeNr: 0;
-
- printf("%d nodes\n", cNodes);
- // for (int i = 0; i < cNodes; i++) {
- // xmlNodePtr node = nodes->nodeTab[i];
- // printf("node \"%s\": type %d\n", node->name, node->type);
- // }
+ extern FileView<ElvDataA> g_fvElv;
+ extern FileView<DlvDataA> g_fvDlv;
+ void FetchData(FileView<ElvDataA>& fvElv, FileView<DlvDataA>& fvDlv);
+ //FileView<ElvDataA> fvElv{L"testelv.dat", 1080};
+ //FileView<DlvDataA> fvDlv{L"testdlv.dat", 1080};
+ FetchData(g_fvElv, g_fvDlv);
}
};
int RunTests()
{
const Test tests[] = {
- //StrcpyWithSmallerDestination{},
+ StrcpyWithSmallerDestination{},
//EpisodeDataFromWeb{},
//EpisodeDataFromProlog{},
//IO{},
@@ -337,10 +244,8 @@ int RunTests()
//SampleConfigurationToDisk{},
//MigrateCfg{}
//MigrateDlvDataFromPrologToDisk{},
- //DownloadData{},
- XML{},
- //ImportElvData{},
- //ImportDlvData{},
+ //DownloadDataViaProlog{},
+ Fetch{},
};
printf("Results (%llu tests):\n", sizeof(tests)/sizeof(*tests));