:- module(episode_data, [ensure_episode_data/0, update_episode_data/0, update_screenwriters/0, retract_episode/1, episode_count/1, rate_episode/2, episode_rating/2, tv_original/1, thread_running/1, thread_exception/2]). :- use_module(library(clpfd)). :- use_module(library(dcg/basics)). :- use_module(library(http/http_open)). :- use_module(library(sgml)). :- use_module(library(xpath)). :- use_module(library(persistency)). :- use_module(atom_dcg). :- persistent episode_title(episode:integer, title:atom). :- persistent episode_wiki(episode:integer, wiki:atom). :- persistent episode_datum(episode:integer, key:atom, value:atom). :- persistent episode_rating(episode:integer, rating:integer). attach :- absolute_file_name('episode_data.db', F, [access(write)]), db_attach(F, []). detach :- db_detach. ensure_episode_data :- episode_title(_, _), !. ensure_episode_data :- update_episode_data. retract_episode(Ep) :- ( episode_title(Ep, _) -> retractall_episode_title(Ep, _) ; true ), ( episode_datum(Ep, 'Hint', _) -> retractall_episode_datum(Ep, 'Hint', _) ; true ). episode_count(N) :- setof(E, T^episode_title(E,T), Es), last(Es, N). rate_episode(Ep, 0) :- ( episode_rating(Ep, _) -> retractall_episode_rating(Ep, _) ; true ), !. rate_episode(Ep, R) :- dif(R, 0), ( episode_rating(Ep, R) -> true ; ( episode_rating(Ep, _) -> retractall_episode_rating(Ep, _) ; true ), assert_episode_rating(Ep, R) ). open_episode_wiki(Ep) :- episode_wiki(Ep, W), win_shell(open, W). tv_original(Ep) :- episode_datum(Ep, 'Source', 'TV Original'). % Remote data retrieval. absolute_url(R) --> "https://www.detectiveconanworld.com", R. thread_running(T) :- thread_property(T, status(running)). thread_exception(T, E) :- thread_property(T, status(exception(E))). update_episode_data :- findall(Ep-Info, (fetch_episode_info(Ep, Info)), Set), maplist(set_episode_info, Set). set_episode_info(Ep-Info) :- maplist(set_episode_datum(Ep), Info). set_episode_datum(Ep, 'Title'-Title) :- !, maybe_assert_episode_title(Ep, Title). set_episode_datum(Ep, 'Wiki'-W) :- !, maybe_assert_episode_wiki(Ep, W). set_episode_datum(Ep, Key-Value) :- maybe_assert_episode_datum(Ep, Key, Value). maybe_assert_episode_title(Ep, Title) :- ( episode_title(Ep, Title), ! ; assert_episode_title(Ep, Title) ). maybe_assert_episode_wiki(Ep, W) :- ( episode_wiki(Ep, W), ! ; assert_episode_wiki(Ep, W) ). maybe_assert_episode_datum(Ep, Key, Value) :- ( episode_datum(Ep, Key, Value), ! ; ( episode_datum(Ep, Key, _) -> retract_episode_datum(Ep, Key, _) ; true ), assert_episode_datum(Ep, Key, Value) ). episode_number(Ep) --> integer(Ep). episode_number(Ep) --> integer(Ep), "WPS", integer(_). fetch_episode_info(Ep, ['Title'-Title, 'Wiki'-W, 'Date'-Date, 'Source'-Source, 'Hint'-Hint]) :- cached_html('https://www.detectiveconanworld.com/wiki/Anime', H), xpath(H, //tr(td(index(3),@style='background:#f2fde9;')), R), xpath(R, td(index(1),normalize_space), Ep0), atom_phrase(episode_number(Ep), Ep0), xpath(R, td(index(3),normalize_space), Title), xpath(R, td(index(3))/a(@href), W0), atom_phrase(absolute_url(W0), W), xpath(R, td(index(4),normalize_space), Date), xpath(R, td(index(7),normalize_space), Source0), re_replace('\\(([0-9])', ' (\\1', Source0, Source1), atom_string(Source, Source1), xpath(R, td(index(8),normalize_space), Hint). update_screenwriters :- findall(Ep-Name, (maybe_fetch_screenwriter_episode(Name, Ep)), Set), maplist(set_episode_screenwriter, Set). set_episode_screenwriter(Ep-Name) :- maybe_assert_episode_datum(Ep, 'Screenwriter', Name). maybe_fetch_screenwriter_episode(Name, Ep) :- \+ episode_datum(Ep, 'Screenwriter', Ep), fetch_screenwriter_episode(Name, Ep). fetch_screenwriter(Name) :- cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H), xpath(H, //'div'(@id='mw-pages')//a, A), xpath(A, /self(normalize_space), Name). fetch_screenwriter_url(Name, U) :- cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H), xpath(H, //'div'(@id='mw-pages')//a, A), xpath(A, /self(normalize_space), Name), xpath(A, /self(@href), U). screenwriter_episode(Ep) --> string(_), "(Episode ", integer(Ep), ")". screenwriter_episode(Ep) --> string(_), "(Episodes ", integer(Ep1), "-", integer(Ep2), ")", { between(Ep1, Ep2, Ep) }. screenwriter_episode(Ep) --> string(_), "(Episodes ", integer(Ep1), "-", integer(Ep2), " only)", { between(Ep1, Ep2, Ep) }. fetch_screenwriter_episode(Name, Ep) :- fetch_screenwriter_url(Name, U0), atom_phrase(absolute_url(U0), U), cached_html(U, H), xpath(H, //div(@id='mw-content-text')//li, L), xpath(L, /self(normalize_space), T), atom_phrase(screenwriter_episode(Ep), T). fetch_html(U, H) :- catch(http_load_html(U, H), _, fail), !, nb_setval(U, H). cached_html(U, H) :- nb_current(U, H), !. cached_html(U, H) :- fetch_html(U, H). http_load_html(URL, DOM) :- setup_call_cleanup(http_open(URL, In, [ timeout(60) ]), ( dtd(html, DTD), load_structure(stream(In), DOM, [ dtd(DTD), dialect(sgml), shorttag(false), max_errors(-1), syntax_errors(quiet) ]) ), close(In)).