:- module(episode_data, [ensure_episode_data/0, retract_episode/1, episode_count/1, rate_episode/2, episode_rating/2]). :- use_module(library(clpfd)). :- use_module(library(dcg/basics)). :- use_module(library(http/http_open)). :- use_module(library(sgml)). :- use_module(library(xpath)). :- use_module(library(persistency)). :- use_module(atom_dcg). :- persistent episode_title(episode:integer, title:atom). :- persistent episode_datum(episode:integer, key:atom, value:atom). :- persistent episode_rating(episode:integer, rating:integer). attach :- absolute_file_name('episode_data.db', F, [access(write)]), db_attach(F, []). detach :- db_detach. ensure_episode_data :- episode_title(_, _), !. ensure_episode_data :- fetch_episode_data. retract_episode(Ep) :- ( episode_title(Ep, _) -> retractall_episode_title(Ep, _) ; true ), ( episode_datum(Ep, 'Hint', _) -> retractall_episode_datum(Ep, 'Hint', _) ; true ). episode_count(N) :- setof(E, T^episode_title(E,T), Es), last(Es, N). rate_episode(Ep, 0) :- ( episode_rating(Ep, _) -> retractall_episode_rating(Ep, _) ; true ), !. rate_episode(Ep, R) :- dif(R, 0), ( episode_rating(Ep, R) -> true ; assert_episode_rating(Ep, R) ). % Remote data retrieval. episode_number(Ep) --> integer(Ep). episode_number(Ep) --> integer(Ep), "WPS", integer(_). fetch_episode_data :- findall(Ep-Title-Data, (remote_episode_title_data(Ep, Title, Data)), Set), maplist(set_episode_data, Set). set_episode_data(Ep-Title-Data) :- maybe_assert_episode_title(Ep, Title), maplist(set_episode_datum(Ep), Data). set_episode_datum(Ep, Key-Value) :- maybe_assert_episode_datum(Ep, Key, Value). maybe_assert_episode_title(Ep, Title) :- ( episode_title(Ep, Title), ! ; assert_episode_title(Ep, Title) ). maybe_assert_episode_datum(Ep, Key, Value) :- ( episode_datum(Ep, Key, Value), ! ; assert_episode_datum(Ep, Key, Value) ). fetch_html(H) :- catch(http_load_html('https://www.detectiveconanworld.com/wiki/Anime', H), _, fail), !, nb_setval(html, H). remote_html(H) :- ( nb_current(html, H), ! ; fetch_html(H) ). remote_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :- remote_html(H), xpath(H, //tr(td(index(3),@style='background:#f2fde9;')), R), xpath(R, td(index(1),normalize_space), Ep0), atom_phrase(episode_number(Ep), Ep0), xpath(R, td(index(3),normalize_space), Title), xpath(R, td(index(4),normalize_space), Date), xpath(R, td(index(7),normalize_space), Source0), re_replace('\\(([0-9])', ' (\\1', Source0, Source1), atom_string(Source, Source1), xpath(R, td(index(8),normalize_space), Hint). http_load_html(URL, DOM) :- setup_call_cleanup(http_open(URL, In, [ timeout(60) ]), ( dtd(html, DTD), load_structure(stream(In), DOM, [ dtd(DTD), dialect(sgml), shorttag(false), max_errors(-1), syntax_errors(quiet) ]) ), close(In)).