1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
|
:- module(episode_data, [ensure_episode_data/0,
update_episode_data/0,
update_screenwriters/0,
retract_episode/1,
episode_count/1,
rate_episode/2,
episode_rating/2,
tv_original/1]).
:- use_module(library(clpfd)).
:- use_module(library(dcg/basics)).
:- use_module(library(http/http_open)).
:- use_module(library(sgml)).
:- use_module(library(xpath)).
:- use_module(library(persistency)).
:- use_module(atom_dcg).
:- persistent episode_title(episode:integer, title:atom).
:- persistent episode_datum(episode:integer, key:atom, value:atom).
:- persistent episode_rating(episode:integer, rating:integer).
attach :-
absolute_file_name('episode_data.db', F, [access(write)]),
db_attach(F, []).
detach :-
db_detach.
ensure_episode_data :- episode_title(_, _), !.
ensure_episode_data :- update_episode_data.
retract_episode(Ep) :-
( episode_title(Ep, _)
-> retractall_episode_title(Ep, _)
; true
),
( episode_datum(Ep, 'Hint', _)
-> retractall_episode_datum(Ep, 'Hint', _)
; true
).
episode_count(N) :-
setof(E, T^episode_title(E,T), Es),
last(Es, N).
rate_episode(Ep, 0) :-
( episode_rating(Ep, _)
-> retractall_episode_rating(Ep, _)
; true
),
!.
rate_episode(Ep, R) :-
dif(R, 0),
( episode_rating(Ep, R)
-> true
; ( episode_rating(Ep, _)
-> retractall_episode_rating(Ep, _)
; true
),
assert_episode_rating(Ep, R)
).
tv_original(Ep) :- episode_datum(Ep, 'Source', 'TV Original').
% Remote data retrieval.
update_episode_data :-
findall(Ep-Title-Data,
(fetch_episode_title_data(Ep, Title, Data)),
Set),
maplist(set_episode_data, Set).
set_episode_data(Ep-Title-Data) :-
maybe_assert_episode_title(Ep, Title),
maplist(set_episode_datum(Ep), Data).
set_episode_datum(Ep, Key-Value) :-
maybe_assert_episode_datum(Ep, Key, Value).
maybe_assert_episode_title(Ep, Title) :-
( episode_title(Ep, Title), !
; assert_episode_title(Ep, Title)
).
maybe_assert_episode_datum(Ep, Key, Value) :-
( episode_datum(Ep, Key, Value), !
; ( episode_datum(Ep, Key, _)
-> retract_episode_datum(Ep, Key, _)
; true
),
assert_episode_datum(Ep, Key, Value)
).
episode_number(Ep) --> integer(Ep).
episode_number(Ep) --> integer(Ep), "WPS", integer(_).
fetch_episode_title_data(Ep, Title, ['Date'-Date, 'Source'-Source, 'Hint'-Hint]) :-
cached_html('https://www.detectiveconanworld.com/wiki/Anime', H),
xpath(H, //tr(td(index(3),@style='background:#f2fde9;')), R),
xpath(R, td(index(1),normalize_space), Ep0),
atom_phrase(episode_number(Ep), Ep0),
xpath(R, td(index(3),normalize_space), Title),
xpath(R, td(index(4),normalize_space), Date),
xpath(R, td(index(7),normalize_space), Source0),
re_replace('\\(([0-9])', ' (\\1', Source0, Source1),
atom_string(Source, Source1),
xpath(R, td(index(8),normalize_space), Hint).
update_screenwriters :-
findall(Ep-Name,
(maybe_fetch_screenwriter_episode(Name, Ep)),
Set),
maplist(set_episode_screenwriter, Set).
set_episode_screenwriter(Ep-Name) :-
maybe_assert_episode_datum(Ep, 'Screenwriter', Name).
maybe_fetch_screenwriter_episode(Name, Ep) :-
\+ episode_datum(Ep, 'Screenwriter', Ep),
fetch_screenwriter_episode(Name, Ep).
fetch_screenwriter(Name) :-
cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H),
xpath(H, //'div'(@id='mw-pages')//a, A),
xpath(A, /self(normalize_space), Name).
fetch_screenwriter_url(Name, U) :-
cached_html('https://www.detectiveconanworld.com/wiki/Category:Screenplay_writers', H),
xpath(H, //'div'(@id='mw-pages')//a, A),
xpath(A, /self(normalize_space), Name),
xpath(A, /self(@href), U).
absolute_url(R) --> "https://www.detectiveconanworld.com", R.
screenwriter_episode(Ep) --> string(_), "(Episode ", integer(Ep), ")".
screenwriter_episode(Ep) --> string(_), "(Episodes ", integer(Ep1), "-", integer(Ep2), ")",
{ between(Ep1, Ep2, Ep) }.
screenwriter_episode(Ep) --> string(_), "(Episodes ", integer(Ep1), "-", integer(Ep2), " only)",
{ between(Ep1, Ep2, Ep) }.
fetch_screenwriter_episode(Name, Ep) :-
fetch_screenwriter_url(Name, U0),
atom_phrase(absolute_url(U0), U),
cached_html(U, H),
xpath(H, //div(@id='mw-content-text')//li, L),
xpath(L, /self(normalize_space), T),
atom_phrase(screenwriter_episode(Ep), T).
fetch_html(U, H) :-
catch(http_load_html(U, H), _, fail),
!,
nb_setval(U, H).
cached_html(U, H) :- nb_current(U, H), !.
cached_html(U, H) :- fetch_html(U, H).
http_load_html(URL, DOM) :-
setup_call_cleanup(http_open(URL, In,
[ timeout(60)
]),
( dtd(html, DTD),
load_structure(stream(In),
DOM,
[ dtd(DTD),
dialect(sgml),
shorttag(false),
max_errors(-1),
syntax_errors(quiet)
])
),
close(In)).
|