From 95b88391c331a82f6a404c49806954cd7f164d3b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?John=20Ankarstr=C3=B6m?= Date: Mon, 4 Apr 2022 21:46:57 +0200 Subject: Fetch more remote data. --- pl/episode_data.pl | 70 ++++++++++++++++++++++++++++++++---------------------- 1 file changed, 42 insertions(+), 28 deletions(-) (limited to 'pl/episode_data.pl') diff --git a/pl/episode_data.pl b/pl/episode_data.pl index 08201f7..6fa5b8d 100644 --- a/pl/episode_data.pl +++ b/pl/episode_data.pl @@ -20,7 +20,7 @@ attach :- detach :- db_detach. -ensure_episode_data :- episode_title(Ep, _), !. +ensure_episode_data :- episode_title(_, _), !. ensure_episode_data :- fetch_episode_data. retract_episode(Ep) :- @@ -39,41 +39,55 @@ episode_count(N) :- % Remote data retrieval. -padding(Ep) --> { Ep #< 10 }, "00". -padding(Ep) --> { Ep #>= 10, Ep #< 100 }, "0". -padding(Ep) --> { Ep #>= 100 }. - -episode_number(Ep) --> padding(Ep), integer(Ep). -episode_number(Ep) --> padding(Ep), integer(Ep), "WPS", integer(_). +episode_number(Ep) --> integer(Ep). +episode_number(Ep) --> integer(Ep), "WPS", integer(_). fetch_episode_data :- - findall(Ep-Title-Hint, - (remote_row(R), - row_episode_title_hint(R, Ep, Title, Hint)), - Data), - maplist(set_episode_data, Data). + findall(Ep-Title-Data, + (remote_episode_title_data(Ep, Title, Data)), + Set), + maplist(set_episode_data, Set). + +set_episode_data(Ep-Title-Data) :- + maybe_assert_episode_title(Ep, Title), + maplist(set_episode_datum(Ep), Data). + +set_episode_datum(Ep, Key-Value) :- + maybe_assert_episode_datum(Ep, Key, Value). -set_episode_data(Ep-Title-Hint) :- +maybe_assert_episode_title(Ep, Title) :- ( episode_title(Ep, Title), ! ; assert_episode_title(Ep, Title) - ), - ( episode_datum(Ep, 'Hint', Hint), ! - ; assert_episode_datum(Ep, 'Hint', Hint) ). -remote_row(R) :- - catch(http_load_html( - 'https://www.detectiveconanworld.com/wiki/Next_Conan%27s_Hint', - R0), +maybe_assert_episode_datum(Ep, Key, Value) :- + ( episode_datum(Ep, Key, Value), ! + ; assert_episode_datum(Ep, Key, Value) + ). + +fetch_html(H) :- + catch(http_load_html('https://www.detectiveconanworld.com/wiki/Anime', H), _, - fail), !, - xpath(R0, //tr, R). - -row_episode_title_hint(R, Ep, Title, Hint) :- - xpath(R, td(index(1),text), T), - atom_phrase(episode_number(Ep), T), - xpath(R, td(index(2),text), Title), - xpath(R, td(index(3),text), Hint). + fail), + !, + nb_setval(html, H). + +remote_html(H) :- + ( nb_current(html, H), ! + ; fetch_html(H) + ). + +remote_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :- + remote_html(H), + xpath(H, //tr(td(index(3),@style='background:#f2fde9;')), R), + xpath(R, td(index(1),text), Ep0), + atom_phrase(episode_number(Ep), Ep0), + xpath(R, td(index(3),text), Title), + xpath(R, td(index(4),text), Date), + xpath(R, td(index(7),text), Source0), + re_replace('\\(([0-9])', ' (\\1', Source0, Source1), + atom_string(Source, Source1), + xpath(R, td(index(8),text), Hint). http_load_html(URL, DOM) :- setup_call_cleanup(http_open(URL, In, -- cgit v1.2.3