#include #include #include #include #include #include #include "resource.h" #include "data.h" #include "episodelistview.h" #include "util.h" #include "win.h" static Unique s_hi = InternetOpenW(L"Episode Browser", INTERNET_OPEN_TYPE_DIRECT, nullptr, nullptr, 0); /* ParsedDoc downloads and parses an HTML document. */ struct ParsedDoc { Unique hiUrl; Unique ctxt; char bufI[1024]; char bufX[1024]; ParsedDoc(const wchar_t* wszUrl, const char* szUrl) { if (!s_hi.Not(0)) throw Win32Error(); hiUrl = InternetOpenUrlW(s_hi.v, wszUrl, nullptr, 0, INTERNET_FLAG_NO_UI, 0); if (!hiUrl.Not(0)) throw InternetError(); ctxt = htmlCreatePushParserCtxt(nullptr, nullptr, bufX, sizeof(bufX), szUrl, XML_CHAR_ENCODING_UTF8); if (!ctxt.Not(0)) throw XmlError(); htmlCtxtUseOptions(ctxt.v, HTML_PARSE_RECOVER|HTML_PARSE_NOERROR|HTML_PARSE_NOWARNING); BOOL r; DWORD cbRead; while (r = InternetReadFile(hiUrl.v, bufI, sizeof(bufI), &cbRead), cbRead) { if (!r) throw InternetError(); if (!htmlParseChunk(ctxt.v, bufI, cbRead, 0)) throw XmlError(); } htmlParseChunk(ctxt.v, bufI, 0, 1); /* Stop parsing. */ } operator htmlDocPtr() { return ctxt.v->myDoc; } }; static inline void XmlFree(void* p) { xmlFree(p); } template bool WcharsFromXmlchars(wchar_t (&dst)[N], xmlChar* utf8_) { Unique utf8 = utf8_; if (!utf8.Not(0)) throw XmlError(); /* Truncate if source is larger than destination. */ int lenUtf8 = xmlStrlen(utf8.v); utf8.v[Min(N, static_cast(lenUtf8))] = 0; /* Convert internal representation from UTF-8 to Latin-1, * which seems to actually convert the string to proper UTF-8 * (???). */ unsigned char lat1[N]; int lenLat1 = N-1; if (UTF8Toisolat1(lat1, &lenLat1, utf8.v, &lenUtf8) <= 0) return false; lat1[lenLat1] = 0; /* Write wide string to destination, if it fits. */ char* const src = reinterpret_cast(lat1); const int cchNarrow = lenLat1+1; const int cchWide = MultiByteToWideChar(CP_UTF8, 0, src, cchNarrow, nullptr, 0); if (static_cast(cchWide) > N) return false; return MultiByteToWideChar(CP_UTF8, 0, src, cchNarrow, dst, cchWide); } /* The Fetch* functions are run in a separate thread via WaitFor. The * main thread and the fetch thread communicate by setting flags on a * shared byte. At any given time, only a single fetch thread may be * performing work. */ enum Signal { READY = 1<<0, /* Main -> fetch: start working! */ DONE = 1<<1, /* Fetch -> main: work is done. */ ABORT = 1<<2 /* Main -> fetch: exit prematurely! */ }; void WaitFor(void (*f)(unsigned char*)) { extern HWND g_hWnd; static unsigned char sig = READY; static UINT_PTR iTimer = 0; static auto procTimer = [](HWND, UINT, UINT_PTR, DWORD) -> void { static int i = 0; if (sig & DONE) { extern EpisodeListView* g_elv; KillTimer(nullptr, iTimer); i = 0; sig = READY; /* Reset signals. */ g_elv->Update(); /* Reset status bar. */ EnableMenuItem(GetMenu(g_hWnd), IDM_FILE_FETCH_CANCEL, MF_GRAYED); } else { /* Animate ellipsis in status bar. */ static const wchar_t* text[] = {L".", L"..", L"...", L""}; i = (i+1)%(sizeof(text)/sizeof(*text)); Status(text[i], 1); } }; static auto procThread = [](void (*f)(unsigned char*)) noexcept -> void { SET_TERMINATE; while (!(sig & READY)) Sleep(100); sig = 0; EnableMenuItem(GetMenu(g_hWnd), IDM_FILE_FETCH_CANCEL, MF_ENABLED); try { f(&sig); sig |= DONE; } catch (...) { sig |= DONE; ShowException(L"Remote data could not be fetched due to an error: %s", L"Error", MB_ICONWARNING); } }; /* Null indicates that any active task should be cancelled. */ if (!f) { sig |= ABORT; EnableMenuItem(GetMenu(g_hWnd), IDM_FILE_FETCH_CANCEL, MF_GRAYED); return; } /* Ensure that only a single thread is waited on. */ if (!(sig & READY)) { if (EBMessageBox(L"Another task is active. " L"Do you want to cancel the existing task and start a new one?", L"Error", MB_YESNO|MB_ICONWARNING) == IDYES) sig |= ABORT; else return; } std::thread(procThread, f).detach(); Status(L".", 1); Prefer(iTimer = SetTimer(nullptr, iTimer, 500, procTimer)); } void FetchData(unsigned char* sig) { /* The remote data is retrieved using WinINet from the * Detective Conan World wiki. Using libxml2's "push parser", * the HTML is parsed piece by piece as it is retrieved. The * episode data are contained in table rows matching a (very!) * specific XPath query. This is fragile theoretically, but * unlikely to break practically. */ ParsedDoc doc(L"https://www.detectiveconanworld.com/wiki/Anime", "https://www.detectiveconanworld.com/wiki/Anime"); Unique xpathCtx = xmlXPathNewContext(doc); if (!xpathCtx.Not(0)) throw XmlError(); Unique xpathObj = xmlXPathEvalExpression( reinterpret_cast("//tr[./td[1] != '' and ./td[3][@style='background:#f2fde9;']]"), xpathCtx.v); if (!xpathObj.Not(0)) throw XmlError(); xmlNodeSetPtr nodes = xpathObj.v->nodesetval; if (!nodes || !nodes->nodeNr) throw std::runtime_error("could not find remote episode information"); for (int i = 0; i < nodes->nodeNr; i++) { extern FileView g_fvElv; extern FileView g_fvDlv; if (*sig & ABORT) return; const xmlNodePtr node = nodes->nodeTab[i]; if (xmlChildElementCount(node) != 8) throw std::runtime_error("unexpected remote data format"); ElvDataA& e = g_fvElv.At(i); DlvDataA& d = g_fvDlv.At(i); /* Each datum is contained within a specific cell in * the row. The child element count above ensures that * none of the following nodes are null. */ const xmlNodePtr nodeEp = xmlFirstElementChild(node); const xmlNodePtr nodeTitle = xmlNextElementSibling(xmlNextElementSibling(nodeEp)); const xmlNodePtr nodeDate = xmlNextElementSibling(nodeTitle); const xmlNodePtr nodeSource = xmlNextElementSibling( xmlNextElementSibling(xmlNextElementSibling(nodeDate))); const xmlNodePtr nodeHint = xmlNextElementSibling(nodeSource); WcharsFromXmlchars(d.date, xmlNodeGetContent(nodeDate)); WcharsFromXmlchars(d.source, xmlNodeGetContent(nodeSource)); WcharsFromXmlchars(d.hint, xmlNodeGetContent(nodeHint)); e.bTVOriginal = wcsncmp(d.source, L"TV", 2) == 0? 1: 0; WcharsFromXmlchars(e.siEp, xmlNodeGetContent(nodeEp)); e.siEp[wcscspn(e.siEp, L"W")] = 0; /* Remove potential "WPS" suffix. */ WcharsFromXmlchars(e.title, xmlNodeGetContent(nodeTitle)); /* Retrieve the link to the episode's wiki entry, * which should be the first (and only) child element * of the title node. */ const xmlNodePtr nodeLink = xmlFirstElementChild(nodeTitle); if (nodeLink) WcharsFromXmlchars(d.wiki, xmlGetProp(nodeLink, reinterpret_cast("href"))); } } void FetchScreenwriters(unsigned char* sig) { extern FileView g_fvDlv; extern CfgA& g_cfg; /* Screenwriters are expensive to fetch, so we try to avoid * fetching screenwriters for episodes that already have a * screenwriter. Additionally, in the same session, we don't * try to fetch screenwriters for episodes for which we have * already tried to fetch screenwriters. We keep track of * these states using the iLast variable. */ static int iLast = -1; int iMax = g_cfg.cEp-1; /* Find the last episode that has a screenwriter. */ if (iLast == -1) for (size_t i = 0; i < g_fvDlv.c; i++) if (const DlvDataA& d = g_fvDlv[i]; !d.date[0]) { iMax = i-1; break; } else if (d.screenwriter[0]) iLast = i; FINALLY { Status(L""); }; /* Fetch screenwriters for the rest of the episodes. */ const wchar_t prefix[] = L"https://www.detectiveconanworld.com"; wchar_t url[256]; Wcscpy(url, prefix); for (iLast++; iLast < iMax; iLast++) { if (*sig & ABORT) return; wchar_t msg[48]; Swprintf(msg, L"Fetching screenwriter for episode %d...", iLast+1); Status(msg); /* Retrieve URL for episode's wiki page. */ DlvDataA& d = g_fvDlv[iLast]; Wcscpy(Buf(url)+Len(prefix), d.wiki); /* Retrieve screenwriter from HTML. */ ParsedDoc doc(url, nullptr); Unique xpathCtx = xmlXPathNewContext(doc); if (!xpathCtx.Not(0)) throw XmlError(); Unique xpathObj = xmlXPathEvalExpression(reinterpret_cast( "//th[contains(text(), 'Screenplay:')]/following-sibling::td"), xpathCtx.v); if (!xpathObj.Not(0)) throw XmlError(); xmlNodeSetPtr nodes = xpathObj.v->nodesetval; if (nodes && nodes->nodeNr) WcharsFromXmlchars(d.screenwriter, xmlNodeGetContent(nodes->nodeTab[0])); } }