aboutsummaryrefslogtreecommitdiff
path: root/pl/episode_data.pl
diff options
context:
space:
mode:
authorJohn Ankarström <john@ankarstrom.se>2022-05-28 00:55:08 +0200
committerJohn Ankarström <john@ankarstrom.se>2022-05-28 00:55:08 +0200
commit11a330a7fc93506a44e0b14dd1ad10f7fa4a2104 (patch)
treed987b8776dae4e83b2da85ab623937d7a97e6760 /pl/episode_data.pl
parentb1b296dbd1b36c0033ffc8aff832e7b2ff49c5cc (diff)
downloadEpisodeBrowser-11a330a7fc93506a44e0b14dd1ad10f7fa4a2104.tar.gz
Add "Fetch Screenwriters" menu item.
This is a separate item from "Fetch", because it takes a lot longer.
Diffstat (limited to 'pl/episode_data.pl')
-rw-r--r--pl/episode_data.pl74
1 files changed, 54 insertions, 20 deletions
diff --git a/pl/episode_data.pl b/pl/episode_data.pl
index af91d4e..3e0ee1b 100644
--- a/pl/episode_data.pl
+++ b/pl/episode_data.pl
@@ -1,5 +1,6 @@
:- module(episode_data, [ensure_episode_data/0,
- fetch_episode_data/0,
+ update_episode_data/0,
+ update_screenwriters/0,
retract_episode/1,
episode_count/1,
rate_episode/2,
@@ -26,7 +27,7 @@ detach :-
db_detach.
ensure_episode_data :- episode_title(_, _), !.
-ensure_episode_data :- fetch_episode_data.
+ensure_episode_data :- update_episode_data.
retract_episode(Ep) :-
( episode_title(Ep, _)
@@ -64,12 +65,9 @@ tv_original(Ep) :- episode_datum(Ep, 'Source', 'TV Original').
% Remote data retrieval.
-episode_number(Ep) --> integer(Ep).
-episode_number(Ep) --> integer(Ep), "WPS", integer(_).
-
-fetch_episode_data :-
+update_episode_data :-
findall(Ep-Title-Data,
- (remote_episode_title_data(Ep, Title, Data)),
+ (fetch_episode_title_data(Ep, Title, Data)),
Set),
maplist(set_episode_data, Set).
@@ -94,20 +92,11 @@ maybe_assert_episode_datum(Ep, Key, Value) :-
assert_episode_datum(Ep, Key, Value)
).
-fetch_html(H) :-
- catch(http_load_html('https://www.detectiveconanworld.com/wiki/Anime', H),
- _,
- fail),
- !,
- nb_setval(html, H).
-
-remote_html(H) :-
- ( nb_current(html, H), !
- ; fetch_html(H)
- ).
+episode_number(Ep) --> integer(Ep).
+episode_number(Ep) --> integer(Ep), "WPS", integer(_).
-remote_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :-
- remote_html(H),
+fetch_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :-
+ cached_html('https://www.detectiveconanworld.com/wiki/Anime', H),
xpath(H, //tr(td(index(3),@style='background:#f2fde9;')), R),
xpath(R, td(index(1),normalize_space), Ep0),
atom_phrase(episode_number(Ep), Ep0),
@@ -118,6 +107,51 @@ remote_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]
atom_string(Source, Source1),
xpath(R, td(index(8),normalize_space), Hint).
+update_screenwriters :-
+ findall(Ep-Name,
+ (maybe_fetch_screenwriter_episode(Name, Ep)),
+ Set),
+ maplist(set_episode_screenwriter, Set).
+
+set_episode_screenwriter(Ep-Name) :-
+ maybe_assert_episode_datum(Ep, 'Screenwriter', Name).
+
+maybe_fetch_screenwriter_episode(Name, Ep) :-
+ \+ episode_datum(Ep, 'Screenwriter', Ep),
+ fetch_screenwriter_episode(Name, Ep).
+
+fetch_screenwriter(Name) :-
+ cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H),
+ xpath(H, //'div'(@id='mw-pages')//a, A),
+ xpath(A, /self(normalize_space), Name).
+
+fetch_screenwriter_url(Name, U) :-
+ cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H),
+ xpath(H, //'div'(@id='mw-pages')//a, A),
+ xpath(A, /self(normalize_space), Name),
+ xpath(A, /self(@href), U).
+
+absolute_url(R) --> "https://www.detectiveconanworld.com", R.
+screenwriter_episode(Ep) --> string(_), "(Episode ", integer(Ep), ")", string(_).
+screenwriter_episode(Ep) --> string(_), "(Episodes ", integer(Ep1), "-", integer(Ep2), ")", string(_),
+ { between(Ep1, Ep2, Ep) }.
+
+fetch_screenwriter_episode(Name, Ep) :-
+ fetch_screenwriter_url(Name, U0),
+ atom_phrase(absolute_url(U0), U),
+ cached_html(U, H),
+ xpath(H, //div(@id='mw-content-text')//li, L),
+ xpath(L, /self(normalize_space), T),
+ atom_phrase(screenwriter_episode(Ep), T).
+
+fetch_html(U, H) :-
+ catch(http_load_html(U, H), _, fail),
+ !,
+ nb_setval(U, H).
+
+cached_html(U, H) :- nb_current(U, H), !.
+cached_html(U, H) :- fetch_html(U, H).
+
http_load_html(URL, DOM) :-
setup_call_cleanup(http_open(URL, In,
[ timeout(60)