#include #include #include #include #include #include #include "data.h" #include "episodelistview.h" #include "err.h" #include "res.h" #include "util.h" #include "win32.h" #include "window.h" using namespace std::string_literals; UniqueOk RemoteParserCtxt(const wchar_t* wszUrl, const char* szUrl) { static Unique hi = InternetOpenW(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); if (hi.Bad(0)) throw Err(WINDOWS, L"Internet handle could not be opened: %s"); Unique hiUrl = InternetOpenUrlW( hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0); if (hiUrl.Bad(0)) throw Err(WININET, L"Could not open "s + wszUrl + L": %s"); char bufX[1024]; Unique ctx = htmlCreatePushParserCtxt( nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8); if (ctx.Bad(0)) throw Err(LIBXML2, L"HTML parser context could not be created: %s"); htmlCtxtUseOptions(ctx.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); BOOL r; DWORD cbRead; char bufI[1024]; while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) { if (!r) throw Err(WININET, L"HTML could not be retrieved: %s"); if (!htmlParseChunk(ctx.v, bufI, cbRead, 0)) throw Err(LIBXML2, L"HTML could not be parsed: %s"); } htmlParseChunk(ctx.v, bufI, 0, 1); /* Stop parsing. */ return ctx; } static inline void XmlFree(void* p) { xmlFree(p); } template bool WcharsFromXmlchars(wchar_t (&dst)[N], Unique utf8) { if (utf8.Bad(0)) throw Err(LIBXML2, L"Node content could not be retrieved: %s"); /* Truncate if source is larger than destination. */ int lenUtf8 = xmlStrlen(utf8.v); utf8.v[Min(N, static_cast(lenUtf8))] = 0; /* Convert internal representation from UTF-8 to Latin-1, * which seems to actually convert the string to proper UTF-8 * (???). */ unsigned char lat1[N]; int lenLat1 = N-1; if (UTF8Toisolat1(lat1, &lenLat1, utf8.v, &lenUtf8) <= 0) return false; lat1[lenLat1] = 0; /* Write wide string to destination, if it fits. */ char* const src = reinterpret_cast(lat1); const int cchNarrow = lenLat1+1; const int cchWide = MultiByteToWideChar(CP_UTF8, 0, src, cchNarrow, nullptr, 0); if (static_cast(cchWide) > N) return false; return MultiByteToWideChar(CP_UTF8, 0, src, cchNarrow, dst, cchWide); } /* The Fetch* functions are run in a separate thread via WaitFor. The * main thread and the fetch thread communicate by setting flags on a * shared byte. At any given time, only a single fetch thread may be * performing work. */ enum Signal : unsigned char { READY = 1<<0, /* Main -> fetch: start working! */ DONE = 1<<1, /* Fetch -> main: work is done. */ ABORT = 1<<2 /* Main -> fetch: exit prematurely! */ }; static Window* s_window; void WaitFor(Window& window, void (*f)(unsigned char*)) { static unsigned char sig = READY; static UINT_PTR iTimer = 0; static auto procTimer = [](HWND, UINT, UINT_PTR, DWORD) -> void { static int i = 0; if (sig & DONE) { KillTimer(nullptr, iTimer); i = 0; sig = READY; /* Reset signals. */ s_window->elv.Update(); /* Reset status bar. */ EnableMenuItem(GetMenu(s_window->hWnd), IDM_FILE_FETCH_CANCEL, MF_GRAYED); } else { /* Animate ellipsis in status bar. */ static const wchar_t* text[] = {L".", L"..", L"...", L""}; i = (i+1)%(sizeof(text)/sizeof(*text)); s_window->Status(text[i], 1); } }; static auto procThread = [](void (*f)(unsigned char*)) noexcept -> void { std::set_terminate(OnTerminate); while (!(sig & READY)) Sleep(100); sig = 0; EnableMenuItem(GetMenu(s_window->hWnd), IDM_FILE_FETCH_CANCEL, MF_ENABLED); try { f(&sig); sig |= DONE; } catch (...) { sig |= DONE; EBMessageBox(What(), L"Remote Data Retrieval Error"); } }; /* Null indicates that any active task should be cancelled. */ if (!f) { sig |= ABORT; EnableMenuItem(GetMenu(s_window->hWnd), IDM_FILE_FETCH_CANCEL, MF_GRAYED); return; } /* Ensure that only a single thread is waited on. */ if (!(sig & READY)) { if (EBMessageBox(L"Another task is active. " L"Do you want to cancel the existing task and start a new one?", L"Error", MB_YESNO|MB_ICONWARNING) == IDYES) sig |= ABORT; else return; } s_window = &window; std::thread(procThread, f).detach(); s_window->Status(L".", 1); if (!(iTimer = SetTimer(nullptr, iTimer, 500, procTimer))) throw Err(WINDOWS, L"Timer could not be started: %s"); } void FetchData(unsigned char* sig) { /* The remote data is retrieved using WinINet from the * Detective Conan World wiki. Using libxml2's "push parser", * the HTML is parsed piece by piece as it is retrieved. The * episode data are contained in table rows matching a (very!) * specific XPath query. This is fragile theoretically, but * unlikely to break practically. */ wchar_t url[_countof(s_window->cfg.prefixUrl)+46+1]; Swprintf(url, L"%shttps://www.detectiveconanworld.com/wiki/Anime", s_window->cfg.prefixUrl); UniqueOk ctx = RemoteParserCtxt(url, "https://www.detectiveconanworld.com/wiki/Anime"); Unique xpathCtx = xmlXPathNewContext(ctx.v->myDoc); if (xpathCtx.Bad(0)) throw Err(LIBXML2, L"XPath context could not be created: %s"); Unique xpathObj = xmlXPathEvalExpression( reinterpret_cast("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"), xpathCtx.v); if (xpathObj.Bad(0)) throw Err(LIBXML2, L"XPath object could not be created: %s"); xmlNodeSetPtr nodes = xpathObj.v->nodesetval; if (!nodes || !nodes->nodeNr) throw Err(GENERIC, L"Data retrieval failed: No matching HTML nodes found."); for (int i = 0; i < nodes->nodeNr; i++) { if (*sig & ABORT) return; const xmlNodePtr node = nodes->nodeTab[i]; if (xmlChildElementCount(node) != 8) throw Err(GENERIC, L"Data retrieval failed: Unexcepted number of columns in table."); ElvDataA& e = s_window->fvElv.At(i); DlvDataA& d = s_window->fvDlv.At(i); /* Each datum is contained within a specific cell in * the row. The child element count above ensures that * none of the following nodes are null. */ const xmlNodePtr nodeEp = xmlFirstElementChild(node); const xmlNodePtr nodeTitle = xmlNextElementSibling(xmlNextElementSibling(nodeEp)); const xmlNodePtr nodeDate = xmlNextElementSibling(nodeTitle); const xmlNodePtr nodeSource = xmlNextElementSibling( xmlNextElementSibling(xmlNextElementSibling(nodeDate))); const xmlNodePtr nodeHint = xmlNextElementSibling(nodeSource); WcharsFromXmlchars(d.date, xmlNodeGetContent(nodeDate)); WcharsFromXmlchars(d.source, xmlNodeGetContent(nodeSource)); WcharsFromXmlchars(d.hint, xmlNodeGetContent(nodeHint)); e.bTVOriginal = wcsncmp(d.source, L"TV", 2) == 0? 1: 0; WcharsFromXmlchars(e.siEp, xmlNodeGetContent(nodeEp)); e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove potential "WPS" suffix. */ WcharsFromXmlchars(e.title, xmlNodeGetContent(nodeTitle)); /* Retrieve the link to the episode's wiki entry, * which should be the first (and only) child element * of the title node. */ const xmlNodePtr nodeLink = xmlFirstElementChild(nodeTitle); if (nodeLink) WcharsFromXmlchars(d.wiki, xmlGetProp(nodeLink, reinterpret_cast("href"))); } } void FetchScreenwriters(unsigned char* sig) { /* Screenwriters are expensive to fetch, so we try to avoid * fetching screenwriters for episodes that already have a * screenwriter. Additionally, in the same session, we don't * try to fetch screenwriters for episodes for which we have * already tried to fetch screenwriters. We keep track of * these states using the iLast variable. */ static int iLast = -1; int iMax = s_window->cfg.cEp-1; /* Find the last episode that has a screenwriter. */ if (iLast == -1) for (size_t i = 0; i < s_window->fvDlv.c; i++) if (const DlvDataA& d = s_window->fvDlv[i]; !d.date[0]) { iMax = i-1; break; } else if (d.screenwriter[0]) iLast = i; FINALLY { s_window->Status(L""); }; /* Fetch screenwriters for the rest of the episodes. */ const wchar_t prefix[] = L"https://www.detectiveconanworld.com"; wchar_t url[256]; Wcscpy(url, prefix); Unique xpathCtx; Unique xpathObj; for (iLast++; iLast < iMax; iLast++) { if (*sig & ABORT) return; wchar_t msg[48]; Swprintf(msg, L"Fetching screenwriter for episode %d...", iLast+1); s_window->Status(msg); /* Retrieve URL for episode's wiki page. */ DlvDataA& d = s_window->fvDlv[iLast]; Wcscpy(Buf(url)+Len(prefix), d.wiki); /* Retrieve screenwriter from HTML. */ UniqueOk ctx = RemoteParserCtxt(url, nullptr); xpathCtx = xmlXPathNewContext(ctx.v->myDoc); if (xpathCtx.Bad(0)) throw Err(LIBXML2, L"XPath context could not be created: %s"); xpathObj = xmlXPathEvalExpression(reinterpret_cast( "//th[contains(text(), 'Screenplay:')]/following-sibling::td"), xpathCtx.v); if (xpathObj.Bad(0)) throw Err(LIBXML2, L"XPath object could not be created: %s"); xmlNodeSetPtr nodes = xpathObj.v->nodesetval; if (nodes && nodes->nodeNr) WcharsFromXmlchars(d.screenwriter, xmlNodeGetContent(nodes->nodeTab[0])); } }