My Movie List

#!/usr/bin/env python """ 1. Import a movie_list txt file 2. Query IMDb for each entry, retrieving actual movie name, rating and genres 3. Generate an HTML table from the IMDb data 4. Store the HTML in index.html """ import os import re import time import sys import http.client import threading from pathlib import Path import progressbar from imdb import IMDb class MovieList: """ Class to generate a movie list HTML table """ def __init__(self, src=None): self.html = """ My Movie List

""" self.src = src self.movie_list = [] self.status_list = [] self.threads = [] def _worker(self, arg, index): # Scan IMDb for a given movie and append it to the html # This collects rating, genres, official name and a hyperlink imdb = IMDb() save_stdout = sys.stdout with open(os.devnull, 'wb') as sys.stdout: while True: try: query = imdb.search_movie(arg) break except http.client.IncompleteRead: pass sys.stdout = save_stdout movie = query[0] imdb.update(movie) movie['ID'] = query[0].movieID if 'genres' not in movie.keys(): movie['genres'] = 'N/A' if 'rating' not in movie.keys(): movie['rating'] = 'N/A' url = f'http://www.imdb.com/title/tt{movie["ID"]}' title = str(movie['title']) year = str(movie['year']) rating = str(movie['rating']) genres = ', '.join(movie['genres']) status = str(self.status_list[index]) self.html += ( f'\n\t' f'' f'' f'' f'' ) def gen(self): """ Generate an HTML list based on input, using a threaded worker """ if not self.src: self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list' else: self.src = Path(self.src) if not self.src.exists(): sys.stderr.write(f'error: input does not exist - {self.src}\n') return False # Open the movie list & split the columns with open(self.src, 'r') as fp_handle: tmp_names = tmp_status = fp_handle.read() tmp_names = re.sub(r'\).*', ')', tmp_names) tmp_status = re.sub(r'\]|.*\[', '', tmp_status) self.movie_list = tmp_names.splitlines() self.status_list = tmp_status.splitlines() # Progress bar. Enough said pbar = progressbar.ProgressBar(max_value=len(self.movie_list)) for idx, val in enumerate(self.movie_list): thread = threading.Thread(target=self._worker, args=(val, idx,)) self.threads.append(thread) thread.start() pbar.update(idx+1) time.sleep(0.2) if len(self.threads) % 16 == 0: time.sleep(6) for thread in self.threads: thread.join() return True def write(self, dst=None): """ Write the HTML list to index.html """ if not dst: dst = Path(os.path.dirname(sys.argv[0])) / 'index.html' else: dst = Path(dst) # Just a fancy scrollbar for the html scroll = '' self.html += ('\n\t\n

Index	Title	Year	IMDb Rating	Genre	Status
{index + 1}	{title}	{year}	{rating}	{genres}	{status}

\n' + '\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) + ' by ' + sys.argv[0] + scroll + '\n') with open(dst, 'wb') as fp_handle: fp_handle.write(self.html.encode('utf8')) def main(): """ Default run """ src = dst = None if len(sys.argv) > 3: sys.stderr.write(f'error: max 2 variables, {len(sys.argv)-1} given!\n') exit(1) if len(sys.argv) > 1: src = sys.argv[1] if len(sys.argv) == 3: dst = sys.argv[2] mlist = MovieList(src=src) if mlist.gen(): mlist.write(dst=dst) if __name__ == "__main__": main()