#include #include #include #include #include #include #include "data.h" #include "episodelistview.h" #include "win.h" /* Signals for thread communication. */ constexpr unsigned char SIG_READY = 1<<0; constexpr unsigned char SIG_DONE = 1<<1; constexpr unsigned char SIG_ABORT = 1<<2; struct XmlError : public std::exception { const char* msg; XmlError() { msg = xmlGetLastError()->message; } virtual const char* what() const noexcept { return msg; } }; static inline void XmlFree(void* p) { xmlFree(p); } /* RAII types for WinINet and libxml2. */ using InternetHandle = Managed; using XmlXPathContextPtr = Managed; using XmlXPathObjectPtr = Managed; using XmlCharPtr = Managed; static InternetHandle s_hi = InternetOpen(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); /* ParsedDoc downloads and parses an HTML document. */ struct ParsedDoc { using HtmlParserCtxtPtr = Managed; using HtmlDocPtr = Managed; InternetHandle hiUrl; HtmlParserCtxtPtr ctxt; char bufI[1024]; char bufX[1024]; ParsedDoc(const wchar_t* wszUrl, const char* szUrl) : hiUrl(InternetOpenUrl(s_hi, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0)), ctxt(htmlCreatePushParserCtxt(nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8)) { htmlCtxtUseOptions(ctxt, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); BOOL r; DWORD cbRead; while (r = InternetReadFile(hiUrl, bufI, sizeof(bufI), &cbRead), cbRead) { if (!r) throw InternetError(); if (!htmlParseChunk(ctxt, bufI, cbRead, 0)) throw XmlError(); } htmlParseChunk(ctxt, bufI, 0, 1); /* Stop parsing. */ } operator htmlDocPtr() { return ctxt->myDoc; } }; template bool WcharsFromXmlchars(wchar_t (&dst)[N], XmlCharPtr utf8) noexcept { /* Truncate if source is larger than destination. */ int lenUtf8 = xmlStrlen(utf8); utf8[Min(N, static_cast(lenUtf8))] = 0; /* Convert internal representation from UTF-8 to Latin-1, * which seems to actually convert the string to proper UTF-8 * (???). */ unsigned char lat1[N]; int lenLat1 = N-1; if (UTF8Toisolat1(lat1, &lenLat1, utf8, &lenUtf8) <= 0) return false; lat1[lenLat1] = 0; /* Write wide string to destination, if it fits. */ char* const src = reinterpret_cast(lat1); const int cchNarrow = lenLat1+1; const int cchWide = MultiByteToWideChar(CP_UTF8, 0, src, cchNarrow, nullptr, 0); if (static_cast(cchWide) > N) return false; return MultiByteToWideChar(CP_UTF8, 0, src, cchNarrow, dst, cchWide); } void FetchData(unsigned char* sig) { /* The remote data is retrieved using WinINet from the * Detective Conan World wiki. Using libxml2's "push parser", * the HTML is parsed piece by piece as it is retrieved. The * episode data are contained in table rows matching a (very!) * specific XPath query. This is fragile theoretically, but * unlikely to break practically. */ ParsedDoc doc(L"https://www.detectiveconanworld.com/wiki/Anime", "https://www.detectiveconanworld.com/wiki/Anime"); XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression( reinterpret_cast("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"), xpathCtx); xmlNodeSetPtr nodes = xpathObj->nodesetval; if (!nodes || !nodes->nodeNr) throw std::runtime_error("could not find remote episode information"); for (int i = 0; i < nodes->nodeNr; i++) { extern FileView g_fvElv; extern FileView g_fvDlv; if (*sig & SIG_ABORT) return; const xmlNodePtr node = nodes->nodeTab[i]; if (xmlChildElementCount(node) != 8) throw std::runtime_error("unexpected remote data format"); ElvDataA& e = g_fvElv.At(i); DlvDataA& d = g_fvDlv.At(i); /* Each datum is contained within a specific cell in * the row. The child element count above ensures that * none of the following nodes are null. */ const xmlNodePtr nodeEp = xmlFirstElementChild(node); const xmlNodePtr nodeTitle = xmlNextElementSibling(xmlNextElementSibling(nodeEp)); const xmlNodePtr nodeDate = xmlNextElementSibling(nodeTitle); const xmlNodePtr nodeSource = xmlNextElementSibling( xmlNextElementSibling(xmlNextElementSibling(nodeDate))); const xmlNodePtr nodeHint = xmlNextElementSibling(nodeSource); WcharsFromXmlchars(d.date, xmlNodeGetContent(nodeDate)); WcharsFromXmlchars(d.source, xmlNodeGetContent(nodeSource)); WcharsFromXmlchars(d.hint, xmlNodeGetContent(nodeHint)); e.bTVOriginal = wcsncmp(d.source, L"TV", 2) == 0? 1: 0; WcharsFromXmlchars(e.siEp, xmlNodeGetContent(nodeEp)); e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove potential "WPS" suffix. */ WcharsFromXmlchars(e.title, xmlNodeGetContent(nodeTitle)); /* Retrieve the link to the episode's wiki entry, * which should be the first (and only) child element * of the title node. */ const xmlNodePtr nodeLink = xmlFirstElementChild(nodeTitle); if (nodeLink) WcharsFromXmlchars(d.wiki, xmlGetProp(nodeLink, reinterpret_cast("href"))); } } void FetchScreenwriters(unsigned char* sig) { extern FileView g_fvDlv; extern CfgA& g_cfg; static int iLast = -1; int iMax = g_cfg.cEp-1; /* Find the last fetched screenwriter. */ if (iLast == -1) for (size_t i = 0; i < g_fvDlv.c; i++) if (const DlvDataA& d = g_fvDlv[i]; !d.date[0]) { iMax = i-1; break; } else if (d.screenwriter[0]) iLast = i; FINALLY { Status(L""); }; /* Fetch screenwriters for the rest of the episodes. */ const wchar_t prefix[] = L"https://www.detectiveconanworld.com"; wchar_t url[256]; Wcscpy(url, prefix); for (iLast++; iLast < iMax; iLast++) { if (*sig & SIG_ABORT) return; wchar_t msg[48]; Swprintf(msg, L"Fetching screenwriter for episode %d...", iLast+1); Status(msg); /* Retrieve URL for episode's wiki page. */ DlvDataA& d = g_fvDlv[iLast]; Wcscpy(Buf(url)+Len(prefix), d.wiki); /* Retrieve screenwriter from HTML. */ ParsedDoc doc(url, nullptr); XmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); XmlXPathObjectPtr xpathObj = xmlXPathEvalExpression(reinterpret_cast( "//th[contains(text(), 'Screenplay:')]/following-sibling::td"), xpathCtx); xmlNodeSetPtr nodes = xpathObj->nodesetval; if (!nodes || !nodes->nodeNr) continue; xmlChar* s = xmlNodeGetContent(nodes->nodeTab[0]); WcharsFromXmlchars(d.screenwriter, s); } } void WaitFor(void (*f)(unsigned char*)) { static unsigned char sig = SIG_READY; static UINT_PTR iTimer = 0; /* The timer procedure animates an ellipsis in the status bar * while the thread is running. */ static auto procTimer = [](HWND, UINT, UINT_PTR, DWORD) -> void { static int i = 0; static const wchar_t* text[] = {L".", L"..", L"...", L""}; if (sig & SIG_DONE) { extern EpisodeListView* const g_elv; KillTimer(nullptr, iTimer); i = 0; sig &= ~SIG_DONE; sig |= SIG_READY; g_elv->Update(); /* Reset status bar. */ } else { i = (i+1)%(sizeof(text)/sizeof(*text)); Status(text[i], 1); } }; static auto procThread = [](void (*f)(unsigned char*)) noexcept -> void { while (!(sig & SIG_READY)) Sleep(100); sig &= ~SIG_READY; try { f(&sig); sig |= SIG_DONE; } catch (...) { sig |= SIG_DONE; ShowException(L"Remote data could not be fetched due to an error: %s", L"Error", MB_ICONWARNING); } }; /* Ensure that only a single thread is waited on. */ if (!(sig & SIG_READY)) { if (EBMessageBox(L"Another task is active. " L"Do you want to cancel the existing task and start a new one?", L"Error", MB_YESNO|MB_ICONWARNING) == IDYES) sig |= SIG_ABORT; else return; } std::thread(procThread, f).detach(); Status(L".", 1); Prefer(iTimer = SetTimer(nullptr, iTimer, 500, procTimer)); }