aboutsummaryrefslogtreecommitdiff
path: root/pl/episode_data.pl
blob: bde723f30bbfb119c81b0cab1d443118f1768846 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
:- module(episode_data, [ensure_episode_data/0,
			 update_episode_data/0,
			 update_screenwriters/0,
			 retract_episode/1,
			 episode_count/1,
			 rate_episode/2,
			 episode_rating/2,
			 tv_original/1]).

:- use_module(library(clpfd)).
:- use_module(library(dcg/basics)).
:- use_module(library(http/http_open)).
:- use_module(library(sgml)).
:- use_module(library(xpath)).
:- use_module(library(persistency)).
:- use_module(atom_dcg).

:- persistent episode_title(episode:integer, title:atom).
:- persistent episode_datum(episode:integer, key:atom, value:atom).
:- persistent episode_rating(episode:integer, rating:integer).

attach :-
	absolute_file_name('episode_data.db', F, [access(write)]),
	db_attach(F, []).

detach :-
	db_detach.

ensure_episode_data :- episode_title(_, _), !.
ensure_episode_data :- update_episode_data.

retract_episode(Ep) :-
	(   episode_title(Ep, _)
	->  retractall_episode_title(Ep, _)
	;   true
	),
	(   episode_datum(Ep, 'Hint', _)
	->  retractall_episode_datum(Ep, 'Hint', _)
	;   true
	).

episode_count(N) :-
	setof(E, T^episode_title(E,T), Es),
	last(Es, N).

rate_episode(Ep, 0) :-
	(   episode_rating(Ep, _)
	->  retractall_episode_rating(Ep, _)
	;   true
	),
	!.

rate_episode(Ep, R) :-
	dif(R, 0),
	(   episode_rating(Ep, R)
	->  true
	;   (	episode_rating(Ep, _)
	    ->	retractall_episode_rating(Ep, _)
	    ;	true
	    ),
	    assert_episode_rating(Ep, R)
	).

tv_original(Ep) :- episode_datum(Ep, 'Source', 'TV Original').

% Remote data retrieval.

update_episode_data :-
	findall(Ep-Title-Data,
		(fetch_episode_title_data(Ep, Title, Data)),
		Set),
	maplist(set_episode_data, Set).

set_episode_data(Ep-Title-Data) :-
	maybe_assert_episode_title(Ep, Title),
	maplist(set_episode_datum(Ep), Data).

set_episode_datum(Ep, Key-Value) :-
	maybe_assert_episode_datum(Ep, Key, Value).

maybe_assert_episode_title(Ep, Title) :-
	(   episode_title(Ep, Title), !
	;   assert_episode_title(Ep, Title)
	).

maybe_assert_episode_datum(Ep, Key, Value) :-
	(   episode_datum(Ep, Key, Value), !
	;   (	episode_datum(Ep, Key, _)
	    ->	retract_episode_datum(Ep, Key, _)
	    ;	true
	    ),
	    assert_episode_datum(Ep, Key, Value)
	).

episode_number(Ep) --> integer(Ep).
episode_number(Ep) --> integer(Ep), "WPS", integer(_).

fetch_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :-
	cached_html('https://www.detectiveconanworld.com/wiki/Anime', H),
	xpath(H, //tr(td(index(3),@style='background:#f2fde9;')), R),
	xpath(R, td(index(1),normalize_space), Ep0),
	atom_phrase(episode_number(Ep), Ep0),
	xpath(R, td(index(3),normalize_space), Title),
	xpath(R, td(index(4),normalize_space), Date),
	xpath(R, td(index(7),normalize_space), Source0),
	re_replace('\\(([0-9])', ' (\\1', Source0, Source1),
	atom_string(Source, Source1),
	xpath(R, td(index(8),normalize_space), Hint).

update_screenwriters :-
	findall(Ep-Name,
		(maybe_fetch_screenwriter_episode(Name, Ep)),
		Set),
	maplist(set_episode_screenwriter, Set).

set_episode_screenwriter(Ep-Name) :-
	maybe_assert_episode_datum(Ep, 'Screenwriter', Name).

maybe_fetch_screenwriter_episode(Name, Ep) :-
	\+ episode_datum(Ep, 'Screenwriter', Ep),
	fetch_screenwriter_episode(Name, Ep).

fetch_screenwriter(Name) :-
	cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H),
	xpath(H, //'div'(@id='mw-pages')//a, A),
	xpath(A, /self(normalize_space), Name).

fetch_screenwriter_url(Name, U) :-
	cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H),
	xpath(H, //'div'(@id='mw-pages')//a, A),
	xpath(A, /self(normalize_space), Name),
	xpath(A, /self(@href), U).

absolute_url(R) --> "https://www.detectiveconanworld.com", R.
screenwriter_episode(Ep) --> string(_), "(Episode ", integer(Ep), ")".
screenwriter_episode(Ep) --> string(_), "(Episodes ", integer(Ep1), "-", integer(Ep2), ")",
			     { between(Ep1, Ep2, Ep) }.
screenwriter_episode(Ep) --> string(_), "(Episodes ", integer(Ep1), "-", integer(Ep2), " only)",
			     { between(Ep1, Ep2, Ep) }.

fetch_screenwriter_episode(Name, Ep) :-
	fetch_screenwriter_url(Name, U0),
	atom_phrase(absolute_url(U0), U),
	cached_html(U, H),
	xpath(H, //div(@id='mw-content-text')//li, L),
	xpath(L, /self(normalize_space), T),
	atom_phrase(screenwriter_episode(Ep), T).

fetch_html(U, H) :-
	catch(http_load_html(U, H), _, fail),
	!,
	nb_setval(U, H).

cached_html(U, H) :- nb_current(U, H), !.
cached_html(U, H) :- fetch_html(U, H).

http_load_html(URL, DOM) :-
        setup_call_cleanup(http_open(URL, In,
                           [ timeout(60)
                           ]),
                           (   dtd(html, DTD),
                               load_structure(stream(In),
                                              DOM,
                                              [ dtd(DTD),
                                                dialect(sgml),
                                                shorttag(false),
                                                max_errors(-1),
                                                syntax_errors(quiet)
                                              ])
                           ),
                           close(In)).