160 lines
5.0 KiB
Python
Executable File
160 lines
5.0 KiB
Python
Executable File
#!/usr/bin/env python
|
|
|
|
"""
|
|
1. Import a movie_list txt file
|
|
2. Query IMDb for each entry, retrieving actual movie name, rating and genres
|
|
3. Generate an HTML table from the IMDb data
|
|
4. Store the HTML in index.html
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import time
|
|
import sys
|
|
from http.client import IncompleteRead
|
|
import threading
|
|
from pathlib import Path
|
|
import progressbar
|
|
from imdb import IMDb
|
|
|
|
|
|
class MovieList:
|
|
""" Class to generate a movie list HTML table """
|
|
def __init__(self, src=None):
|
|
self.html = """<html>
|
|
<head>
|
|
<title>My Movie List</title>
|
|
<link rel="stylesheet" type="text/css" href="style.css">
|
|
<script src="../jquery-3.1.0.min.js"></script>
|
|
<script src="jquery.dataTables.min.js"></script>
|
|
<script>
|
|
$(document).ready(function(){
|
|
$('#sortable').DataTable({
|
|
"pageLength": -1,
|
|
"bPaginate": false
|
|
});
|
|
});
|
|
</script>
|
|
</head>
|
|
<body>
|
|
<header>
|
|
<div class="scroll-indicator" />
|
|
</header>
|
|
<base target="_parent" />
|
|
<table id="sortable" class="sortable">
|
|
<thead>
|
|
<tr>
|
|
<th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
|
|
</tr>
|
|
</thead>
|
|
<tbody>"""
|
|
self.src = src
|
|
self.movie_list = []
|
|
self.status_list = []
|
|
self.threads = []
|
|
|
|
def _worker(self, arg, index):
|
|
# Scan IMDb for a given movie and append it to the html
|
|
# This collects rating, genres, official name and a hyperlink
|
|
imdb = IMDb()
|
|
save_stdout = sys.stdout
|
|
with open(os.devnull, 'wb') as sys.stdout:
|
|
while True:
|
|
try:
|
|
query = imdb.search_movie(arg)
|
|
break
|
|
except IncompleteRead:
|
|
pass
|
|
sys.stdout = save_stdout
|
|
|
|
movie = query[0]
|
|
imdb.update(movie)
|
|
movie['ID'] = query[0].movieID
|
|
if 'genres' not in movie.keys():
|
|
movie['genres'] = ['N/A']
|
|
if 'rating' not in movie.keys():
|
|
movie['rating'] = 'N/A'
|
|
|
|
url = f'http://www.imdb.com/title/tt{movie["ID"]}'
|
|
title = movie['title']
|
|
year = movie['year']
|
|
rating = movie['rating']
|
|
genres = ', '.join(movie['genres'])
|
|
status = self.status_list[index]
|
|
self.html += (
|
|
f'\n\t<tr><td>{index + 1}</td>'
|
|
f'<td><p hidden>{title}</p><a href="{url}" target="_blank">{title}</a></td>'
|
|
f'<td>{year}</td><td align="center">{rating}</td>'
|
|
f'<td>{genres}</td>'
|
|
f'<td align="center">{status}</td></tr>'
|
|
)
|
|
|
|
def gen(self):
|
|
""" Generate an HTML list based on input, using a threaded worker """
|
|
if not self.src:
|
|
self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
|
|
else:
|
|
self.src = Path(self.src)
|
|
if not self.src.exists():
|
|
sys.stderr.write(f'error: input does not exist - {self.src}\n')
|
|
return False
|
|
|
|
# Open the movie list & split the columns
|
|
with open(self.src, 'r') as fp_handle:
|
|
tmp_names = tmp_status = fp_handle.read()
|
|
tmp_names = re.sub(r'\).*', ')', tmp_names)
|
|
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
|
|
self.movie_list = tmp_names.splitlines()
|
|
self.status_list = tmp_status.splitlines()
|
|
|
|
# Progress bar. Enough said
|
|
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
|
|
|
for idx, val in enumerate(self.movie_list):
|
|
thread = threading.Thread(target=self._worker, args=(val, idx,))
|
|
self.threads.append(thread)
|
|
thread.start()
|
|
pbar.update(idx+1)
|
|
time.sleep(0.2)
|
|
if len(self.threads) % 16 == 0:
|
|
time.sleep(6)
|
|
|
|
for thread in self.threads:
|
|
thread.join()
|
|
return True
|
|
|
|
def write(self, dst=None):
|
|
""" Write the HTML list to index.html """
|
|
if not dst:
|
|
dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
|
|
else:
|
|
dst = Path(dst)
|
|
# Just a fancy scrollbar for the html
|
|
scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
|
|
self.html += ('\n\t</tbody>\n</table>\n' +
|
|
'\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
|
|
' by ' + sys.argv[0] + scroll + '</body>\n</html>')
|
|
with open(dst, 'wb') as fp_handle:
|
|
fp_handle.write(self.html.encode('utf8'))
|
|
|
|
|
|
def main():
|
|
""" Default run """
|
|
src = dst = None
|
|
if len(sys.argv) > 3:
|
|
sys.stderr.write(f'error: max 2 variables, {len(sys.argv)-1} given!\n')
|
|
exit(1)
|
|
|
|
if len(sys.argv) > 1:
|
|
src = sys.argv[1]
|
|
if len(sys.argv) == 3:
|
|
dst = sys.argv[2]
|
|
|
|
mlist = MovieList(src=src)
|
|
if mlist.gen():
|
|
mlist.write(dst=dst)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|