diff options
author | John Ankarström <john@ankarstrom.se> | 2022-05-28 00:55:08 +0200 |
---|---|---|
committer | John Ankarström <john@ankarstrom.se> | 2022-05-28 00:55:08 +0200 |
commit | 11a330a7fc93506a44e0b14dd1ad10f7fa4a2104 (patch) | |
tree | d987b8776dae4e83b2da85ab623937d7a97e6760 /pl/episode_data.pl | |
parent | b1b296dbd1b36c0033ffc8aff832e7b2ff49c5cc (diff) | |
download | EpisodeBrowser-11a330a7fc93506a44e0b14dd1ad10f7fa4a2104.tar.gz |
Add "Fetch Screenwriters" menu item.
This is a separate item from "Fetch", because it takes a lot longer.
Diffstat (limited to 'pl/episode_data.pl')
-rw-r--r-- | pl/episode_data.pl | 74 |
1 files changed, 54 insertions, 20 deletions
diff --git a/pl/episode_data.pl b/pl/episode_data.pl index af91d4e..3e0ee1b 100644 --- a/pl/episode_data.pl +++ b/pl/episode_data.pl @@ -1,5 +1,6 @@ :- module(episode_data, [ensure_episode_data/0, - fetch_episode_data/0, + update_episode_data/0, + update_screenwriters/0, retract_episode/1, episode_count/1, rate_episode/2, @@ -26,7 +27,7 @@ detach :- db_detach. ensure_episode_data :- episode_title(_, _), !. -ensure_episode_data :- fetch_episode_data. +ensure_episode_data :- update_episode_data. retract_episode(Ep) :- ( episode_title(Ep, _) @@ -64,12 +65,9 @@ tv_original(Ep) :- episode_datum(Ep, 'Source', 'TV Original'). % Remote data retrieval. -episode_number(Ep) --> integer(Ep). -episode_number(Ep) --> integer(Ep), "WPS", integer(_). - -fetch_episode_data :- +update_episode_data :- findall(Ep-Title-Data, - (remote_episode_title_data(Ep, Title, Data)), + (fetch_episode_title_data(Ep, Title, Data)), Set), maplist(set_episode_data, Set). @@ -94,20 +92,11 @@ maybe_assert_episode_datum(Ep, Key, Value) :- assert_episode_datum(Ep, Key, Value) ). -fetch_html(H) :- - catch(http_load_html('https://www.detectiveconanworld.com/wiki/Anime', H), - _, - fail), - !, - nb_setval(html, H). - -remote_html(H) :- - ( nb_current(html, H), ! - ; fetch_html(H) - ). +episode_number(Ep) --> integer(Ep). +episode_number(Ep) --> integer(Ep), "WPS", integer(_). -remote_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :- - remote_html(H), +fetch_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :- + cached_html('https://www.detectiveconanworld.com/wiki/Anime', H), xpath(H, //tr(td(index(3),@style='background:#f2fde9;')), R), xpath(R, td(index(1),normalize_space), Ep0), atom_phrase(episode_number(Ep), Ep0), @@ -118,6 +107,51 @@ remote_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint] atom_string(Source, Source1), xpath(R, td(index(8),normalize_space), Hint). +update_screenwriters :- + findall(Ep-Name, + (maybe_fetch_screenwriter_episode(Name, Ep)), + Set), + maplist(set_episode_screenwriter, Set). + +set_episode_screenwriter(Ep-Name) :- + maybe_assert_episode_datum(Ep, 'Screenwriter', Name). + +maybe_fetch_screenwriter_episode(Name, Ep) :- + \+ episode_datum(Ep, 'Screenwriter', Ep), + fetch_screenwriter_episode(Name, Ep). + +fetch_screenwriter(Name) :- + cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H), + xpath(H, //'div'(@id='mw-pages')//a, A), + xpath(A, /self(normalize_space), Name). + +fetch_screenwriter_url(Name, U) :- + cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H), + xpath(H, //'div'(@id='mw-pages')//a, A), + xpath(A, /self(normalize_space), Name), + xpath(A, /self(@href), U). + +absolute_url(R) --> "https://www.detectiveconanworld.com", R. +screenwriter_episode(Ep) --> string(_), "(Episode ", integer(Ep), ")", string(_). +screenwriter_episode(Ep) --> string(_), "(Episodes ", integer(Ep1), "-", integer(Ep2), ")", string(_), + { between(Ep1, Ep2, Ep) }. + +fetch_screenwriter_episode(Name, Ep) :- + fetch_screenwriter_url(Name, U0), + atom_phrase(absolute_url(U0), U), + cached_html(U, H), + xpath(H, //div(@id='mw-content-text')//li, L), + xpath(L, /self(normalize_space), T), + atom_phrase(screenwriter_episode(Ep), T). + +fetch_html(U, H) :- + catch(http_load_html(U, H), _, fail), + !, + nb_setval(U, H). + +cached_html(U, H) :- nb_current(U, H), !. +cached_html(U, H) :- fetch_html(U, H). + http_load_html(URL, DOM) :- setup_call_cleanup(http_open(URL, In, [ timeout(60) |