:- module(episode_data, [ensure_episode_data/0, retract_episode/1, episode_count/1]). :- use_module(library(clpfd)). :- use_module(library(dcg/basics)). :- use_module(library(http/http_open)). :- use_module(library(sgml)). :- use_module(library(xpath)). :- use_module(library(persistency)). :- use_module(atom_dcg). :- persistent episode_title(episode:integer, title:atom). :- persistent episode_datum(episode:integer, key:atom, value:atom). attach :- absolute_file_name('episode_data.db', F, [access(write)]), db_attach(F, []). detach :- db_detach. ensure_episode_data :- episode_title(Ep, _), !. ensure_episode_data :- fetch_episode_data. retract_episode(Ep) :- ( episode_title(Ep, _) -> retractall_episode_title(Ep, _) ; true ), ( episode_datum(Ep, 'Hint', _) -> retractall_episode_datum(Ep, 'Hint', _) ; true ). episode_count(N) :- setof(E, T^episode_title(E,T), Es), last(Es, N). % Remote data retrieval. padding(Ep) --> { Ep #< 10 }, "00". padding(Ep) --> { Ep #>= 10, Ep #< 100 }, "0". padding(Ep) --> { Ep #>= 100 }. episode_number(Ep) --> padding(Ep), integer(Ep). episode_number(Ep) --> padding(Ep), integer(Ep), "WPS", integer(_). fetch_episode_data :- findall(Ep-Title-Hint, (remote_row(R), row_episode_title_hint(R, Ep, Title, Hint)), Data), maplist(set_episode_data, Data). set_episode_data(Ep-Title-Hint) :- ( episode_title(Ep, Title), ! ; assert_episode_title(Ep, Title) ), ( episode_datum(Ep, 'Hint', Hint), ! ; assert_episode_datum(Ep, 'Hint', Hint) ). remote_row(R) :- catch(http_load_html( 'https://www.detectiveconanworld.com/wiki/Next_Conan%27s_Hint', R0), _, fail), !, xpath(R0, //tr, R). row_episode_title_hint(R, Ep, Title, Hint) :- xpath(R, td(index(1),text), T), atom_phrase(episode_number(Ep), T), xpath(R, td(index(2),text), Title), xpath(R, td(index(3),text), Hint). http_load_html(URL, DOM) :- setup_call_cleanup(http_open(URL, In, [ timeout(60) ]), ( dtd(html, DTD), load_structure(stream(In), DOM, [ dtd(DTD), dialect(sgml), shorttag(false), max_errors(-1), syntax_errors(quiet) ]) ), close(In)).