222 lines
8.3 KiB
Python
Executable File
222 lines
8.3 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""
|
|
1. Import a movie_list txt file
|
|
2. Query IMDb for each entry, retrieving actual movie name, rating and genres
|
|
3. Generate an HTML table from the IMDb data
|
|
4. Store the HTML in index.html
|
|
"""
|
|
|
|
import os
|
|
import sys
|
|
import time
|
|
import threading
|
|
from pathlib import Path
|
|
import progressbar
|
|
from imdb import IMDb
|
|
from imdb._exceptions import IMDbParserError
|
|
|
|
|
|
class MovieList:
|
|
""" Class to generate a movie list HTML table """
|
|
def __init__(self, src=None, dst=None):
|
|
self.prev_html = []
|
|
self.html = """<html>
|
|
<head>
|
|
<title>My Movie List</title>
|
|
<link rel="stylesheet" type="text/css" href="style.css">
|
|
<script src="../jquery-3.1.0.min.js"></script>
|
|
<script src="jquery.dataTables.min.js"></script>
|
|
<script>
|
|
$(document).ready(function(){
|
|
$('#sortable').DataTable({
|
|
"pageLength": -1,
|
|
"bPaginate": false
|
|
});
|
|
});
|
|
</script>
|
|
</head>
|
|
<body>
|
|
<header>
|
|
<div class="scroll-indicator" />
|
|
</header>
|
|
<base target="_parent" />
|
|
<table id="sortable" class="sortable">
|
|
<thead>
|
|
<tr>
|
|
<th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>"""
|
|
self.src = src
|
|
self.dst = Path(dst) if dst else Path(os.path.dirname(sys.argv[0])) / 'index.html'
|
|
self.movie_list = []
|
|
self.threads = []
|
|
self.read_prev_output()
|
|
self.html_table = None
|
|
|
|
def _worker(self, arg, index):
|
|
# Scan IMDb for a given movie and append it to the html
|
|
# This collects rating, genres, official name and a hyperlink
|
|
imdb = IMDb()
|
|
while True:
|
|
try:
|
|
query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
|
|
break
|
|
except IMDbParserError as exc:
|
|
query = []
|
|
#print(exc)
|
|
break
|
|
except Exception as exc:
|
|
#print(f'error: {exc.__class__.__name__}: {arg["title"]}')
|
|
time.sleep(10)
|
|
|
|
movie = None
|
|
for entry in query:
|
|
#print(entry)
|
|
imdb.update(entry)
|
|
# in case any of these keys is missing in the query, continue
|
|
if not all(key in entry.keys() for key in ['kind', 'year', 'title']):
|
|
#print(f'missing key {entry.keys()}')
|
|
continue
|
|
if arg['status'] == 'DONE' and 'rating' not in entry.keys():
|
|
continue
|
|
# Try to eliminate episode results
|
|
# Must not have "episode" in the object keys
|
|
# Must not have "episode" in the query title key,
|
|
# unless "episode" is in the query search string
|
|
if [i for i in entry.keys() if 'episode' in i.lower()] or (
|
|
'episode' in entry['title'].lower() and \
|
|
'episode' not in arg['title'].lower()):
|
|
continue
|
|
if entry['kind'].lower() == arg['kind'].lower():
|
|
movie = entry
|
|
break
|
|
if not movie:
|
|
movie = {
|
|
'title': arg['title'],
|
|
'kind': arg['kind'],
|
|
'year': arg['year'],
|
|
'dummy': None
|
|
}
|
|
if 'genres' not in movie.keys():
|
|
movie['genres'] = ['N/A']
|
|
if 'rating' not in movie.keys():
|
|
movie['rating'] = 'N/A'
|
|
|
|
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
|
|
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
|
|
self.html_table[index] = (
|
|
f'\n{" "*8}<tr><td>{index + 1}</td>'
|
|
f'<td><p hidden>{arg["title"]}</p>{html_title_td}</td>'
|
|
f'<td>{movie["year"]}</td><td align="center">{movie["rating"]}</td>'
|
|
f'<td>{", ".join(movie["genres"])}</td>'
|
|
f'<td align="center">{arg["status"]}</td></tr>'
|
|
)
|
|
|
|
def gen(self):
|
|
""" Generate an HTML list based on input, using a threaded worker """
|
|
if not self.src:
|
|
self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
|
|
else:
|
|
self.src = Path(self.src)
|
|
if not self.src.exists():
|
|
sys.stderr.write(f'error: input does not exist - {self.src}\n')
|
|
return False
|
|
|
|
self.movie_list = {}
|
|
# Open the movie list & split the columns
|
|
with open(self.src, 'r', encoding='utf-8') as fp_handle:
|
|
mlist_raw = fp_handle.read()
|
|
for raw_line in mlist_raw.splitlines():
|
|
self.movie_list.update({
|
|
len(self.movie_list): {
|
|
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
|
|
'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
|
|
'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
|
|
'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
|
|
}
|
|
})
|
|
self.html_table = [None] * len(self.movie_list)
|
|
|
|
# Progress bar. Enough said
|
|
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
|
for idx, movie in self.movie_list.items():
|
|
match = [html_row for html_row in self.prev_html if movie['title'] in html_row and 'N/A' not in html_row]
|
|
if match:
|
|
# Update movies as DONE in case of change
|
|
match = match[0].replace('*', movie['status'])
|
|
# Directly insert the current HTML line from the older output
|
|
self.html_table[idx] = \
|
|
f'\n{" "*8}<tr><td>{idx + 1}</td>{match[match.find("</td>") + 5:]}'
|
|
pbar.increment()
|
|
else:
|
|
thread = threading.Thread(target=self._worker, args=(movie, idx))
|
|
self.threads.append(thread)
|
|
|
|
max_threads = 10
|
|
while self.threads:
|
|
threads_alive = self.get_alive_threads()
|
|
threads_to_be_started = [i for i in self.threads if i not in threads_alive]
|
|
for idx in range(max_threads if max_threads < len(threads_to_be_started) else len(threads_to_be_started)):
|
|
threads_to_be_started[idx].start()
|
|
pbar.increment()
|
|
time.sleep(1)
|
|
time.sleep(1)
|
|
self.delete_finished_threads()
|
|
|
|
self.html += ''.join(self.html_table)
|
|
return True
|
|
|
|
def delete_finished_threads(self):
|
|
for idx, thread in enumerate(self.threads):
|
|
if not thread.is_alive() and thread._started.is_set():
|
|
thread.join()
|
|
self.threads[idx] = None
|
|
self.threads = list(filter(lambda a: a is not None, self.threads))
|
|
|
|
def get_alive_threads(self):
|
|
threads = []
|
|
for thread in self.threads:
|
|
if thread.is_alive() or thread._started.is_set():
|
|
threads.append(thread)
|
|
return threads
|
|
|
|
def write(self, dst=None):
|
|
""" Write the HTML list to index.html """
|
|
out_path = dst if dst else self.dst
|
|
# Just a fancy scrollbar for the html
|
|
scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
|
|
self.html += ('\n\t</tbody>\n</table>\n' +
|
|
'\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
|
|
' by ' + sys.argv[0] + scroll + '</body>\n</html>')
|
|
with open(out_path, 'wb') as fp_handle:
|
|
fp_handle.write(self.html.encode('utf8'))
|
|
|
|
def read_prev_output(self):
|
|
""" Import a previous HTML table """
|
|
if self.dst.exists():
|
|
with open(self.dst, 'rb') as fp_handle:
|
|
self.prev_html = fp_handle.read().decode('utf8').split('\n')
|
|
|
|
|
|
def main():
|
|
""" Default run """
|
|
src = dst = None
|
|
if len(sys.argv) > 3:
|
|
sys.stderr.write(f'error: max 2 variables, {len(sys.argv)-1} given!\n')
|
|
sys.exit(1)
|
|
|
|
if len(sys.argv) > 1:
|
|
src = sys.argv[1]
|
|
if len(sys.argv) == 3:
|
|
dst = sys.argv[2]
|
|
|
|
mlist = MovieList(src=src, dst=dst)
|
|
if mlist.gen():
|
|
mlist.write(dst=dst)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|