#!/usr/bin/env python
"""
1. Import a movie_list txt file
2. Query IMDb for each entry, retrieving actual movie name, rating and genres
3. Generate an HTML table from the IMDb data
4. Store the HTML in index.html
"""
import os
import re
import time
import sys
import http.client
import threading
from pathlib import Path
import progressbar
from imdb import IMDb
class MovieList:
""" Class to generate a movie list HTML table """
def __init__(self, src=None):
self.html = """
My Movie List
Index
Title
Year
IMDb Rating
Genre
Status
"""
self.src = src
self.movie_list = []
self.status_list = []
self.threads = []
def _worker(self, arg, index):
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
imdb = IMDb()
save_stdout = sys.stdout
with open(os.devnull, 'wb') as sys.stdout:
while True:
try:
query = imdb.search_movie(arg)
break
except http.client.IncompleteRead:
pass
sys.stdout = save_stdout
movie = query[0]
imdb.update(movie)
movie['ID'] = query[0].movieID
if 'genres' not in movie.keys():
movie['genres'] = 'N/A'
if 'rating' not in movie.keys():
movie['rating'] = 'N/A'
url = f'http://www.imdb.com/title/tt{movie["ID"]}'
title = str(movie['title'])
year = str(movie['year'])
rating = str(movie['rating'])
genres = ', '.join(movie['genres'])
status = str(self.status_list[index])
self.html += (
f'\n\t
'
)
def gen(self):
""" Generate an HTML list based on input, using a threaded worker """
if not self.src:
self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
else:
self.src = Path(self.src)
if not self.src.exists():
sys.stderr.write(f'error: input does not exist - {self.src}\n')
return False
# Open the movie list & split the columns
with open(self.src, 'r') as fp_handle:
tmp_names = tmp_status = fp_handle.read()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
self.movie_list = tmp_names.splitlines()
self.status_list = tmp_status.splitlines()
# Progress bar. Enough said
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
for idx, val in enumerate(self.movie_list):
thread = threading.Thread(target=self._worker, args=(val, idx,))
self.threads.append(thread)
thread.start()
pbar.update(idx+1)
time.sleep(0.2)
if len(self.threads) % 16 == 0:
time.sleep(6)
for thread in self.threads:
thread.join()
return True
def write(self, dst=None):
""" Write the HTML list to index.html """
if not dst:
dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
else:
dst = Path(dst)
# Just a fancy scrollbar for the html
scroll = ''
self.html += ('\n\t\n
\n' +
'\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
' by ' + sys.argv[0] + scroll + '\n')
with open(dst, 'wb') as fp_handle:
fp_handle.write(self.html.encode('utf8'))
def main():
""" Default run """
src = dst = None
if len(sys.argv) > 3:
sys.stderr.write(f'error: max 2 variables, {len(sys.argv)-1} given!\n')
exit(1)
if len(sys.argv) > 1:
src = sys.argv[1]
if len(sys.argv) == 3:
dst = sys.argv[2]
mlist = MovieList(src=src)
if mlist.gen():
mlist.write(dst=dst)
if __name__ == "__main__":
main()