diff options
author | John Ankarström <john@ankarstrom.se> | 2022-04-04 21:46:57 +0200 |
---|---|---|
committer | John Ankarström <john@ankarstrom.se> | 2022-04-04 21:46:57 +0200 |
commit | 95b88391c331a82f6a404c49806954cd7f164d3b (patch) | |
tree | ba90a1aa71582244464ee1a080642dfe747cd40d | |
parent | 9d133477d51dd0271d63520071cdf7a11e71c605 (diff) | |
download | EpisodeBrowser-95b88391c331a82f6a404c49806954cd7f164d3b.tar.gz |
Fetch more remote data.
-rw-r--r-- | pl/episode_data.pl | 70 |
1 files changed, 42 insertions, 28 deletions
diff --git a/pl/episode_data.pl b/pl/episode_data.pl index 08201f7..6fa5b8d 100644 --- a/pl/episode_data.pl +++ b/pl/episode_data.pl @@ -20,7 +20,7 @@ attach :- detach :- db_detach. -ensure_episode_data :- episode_title(Ep, _), !. +ensure_episode_data :- episode_title(_, _), !. ensure_episode_data :- fetch_episode_data. retract_episode(Ep) :- @@ -39,41 +39,55 @@ episode_count(N) :- % Remote data retrieval. -padding(Ep) --> { Ep #< 10 }, "00". -padding(Ep) --> { Ep #>= 10, Ep #< 100 }, "0". -padding(Ep) --> { Ep #>= 100 }. - -episode_number(Ep) --> padding(Ep), integer(Ep). -episode_number(Ep) --> padding(Ep), integer(Ep), "WPS", integer(_). +episode_number(Ep) --> integer(Ep). +episode_number(Ep) --> integer(Ep), "WPS", integer(_). fetch_episode_data :- - findall(Ep-Title-Hint, - (remote_row(R), - row_episode_title_hint(R, Ep, Title, Hint)), - Data), - maplist(set_episode_data, Data). + findall(Ep-Title-Data, + (remote_episode_title_data(Ep, Title, Data)), + Set), + maplist(set_episode_data, Set). + +set_episode_data(Ep-Title-Data) :- + maybe_assert_episode_title(Ep, Title), + maplist(set_episode_datum(Ep), Data). + +set_episode_datum(Ep, Key-Value) :- + maybe_assert_episode_datum(Ep, Key, Value). -set_episode_data(Ep-Title-Hint) :- +maybe_assert_episode_title(Ep, Title) :- ( episode_title(Ep, Title), ! ; assert_episode_title(Ep, Title) - ), - ( episode_datum(Ep, 'Hint', Hint), ! - ; assert_episode_datum(Ep, 'Hint', Hint) ). -remote_row(R) :- - catch(http_load_html( - 'https://www.detectiveconanworld.com/wiki/Next_Conan%27s_Hint', - R0), +maybe_assert_episode_datum(Ep, Key, Value) :- + ( episode_datum(Ep, Key, Value), ! + ; assert_episode_datum(Ep, Key, Value) + ). + +fetch_html(H) :- + catch(http_load_html('https://www.detectiveconanworld.com/wiki/Anime', H), _, - fail), !, - xpath(R0, //tr, R). - -row_episode_title_hint(R, Ep, Title, Hint) :- - xpath(R, td(index(1),text), T), - atom_phrase(episode_number(Ep), T), - xpath(R, td(index(2),text), Title), - xpath(R, td(index(3),text), Hint). + fail), + !, + nb_setval(html, H). + +remote_html(H) :- + ( nb_current(html, H), ! + ; fetch_html(H) + ). + +remote_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :- + remote_html(H), + xpath(H, //tr(td(index(3),@style='background:#f2fde9;')), R), + xpath(R, td(index(1),text), Ep0), + atom_phrase(episode_number(Ep), Ep0), + xpath(R, td(index(3),text), Title), + xpath(R, td(index(4),text), Date), + xpath(R, td(index(7),text), Source0), + re_replace('\\(([0-9])', ' (\\1', Source0, Source1), + atom_string(Source, Source1), + xpath(R, td(index(8),text), Hint). http_load_html(URL, DOM) :- setup_call_cleanup(http_open(URL, In, |