update movie_list script and list

2019-09-29 14:16:45 +03:00
parent 391ae02488
commit aac410f1e1
2 changed files with 180 additions and 104 deletions
@@ -1,18 +1,28 @@
 #!/usr/bin/env python

+"""
+1. Import a movie_list txt file
+2. Query IMDb for each entry, retrieving actual movie name, rating and genres
+3. Generate an HTML table from the IMDb data
+4. Store the HTML in index.html
+"""
+
+import os
+import re
+import time
+import sys
+import http.client
+import threading
+from pathlib import Path
+import progressbar
 from imdb import IMDb
-import os, re, threading, time, sys, subprocess
-from time import gmtime, strftime

-path = os.path.dirname(sys.argv[0])
-ia = IMDb()

-movie_list = []
-status_list = []
-threads = []
-query = []
-html = """<html>
-<head>
+class MovieList:
+    """ Class to generate a movie list HTML table """
+    def __init__(self, src=None):
+        self.html = """<html>
+    <head>
        <title>My Movie List</title>
        <link rel="stylesheet" type="text/css" href="style.css">
        <script src="../jquery-3.1.0.min.js"></script>
@@ -25,81 +35,111 @@ html = """<html>
                });
        });
        </script>
-</head>
-<body>
+    </head>
+    <body>
        <header>
            <div class="scroll-indicator" />
        </header>
-<base target="_parent" />
-<table id="sortable" class="sortable">
+    <base target="_parent" />
+    <table id="sortable" class="sortable">
        <thead>
-        <tr><th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th></tr>
+            <tr>
+                <th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
+            </tr>
        </thead>
        <tbody>"""
+        self.src = src
+        self.movie_list = []
+        self.status_list = []
+        self.threads = []

-# Open the movie list & split the columns
-file_ = open(path + '/movie_list', 'r')
-tmp_names = tmp_status = file_.read()
-file_.close()
-tmp_names = re.sub(r'\).*', ')', tmp_names)
-tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
-
-movie_list = tmp_names.splitlines()
-status_list = tmp_status.splitlines()
-
-# Scan IMDb for a given movie and append it to the html
-# This collects rating, genres, official name and a hyperlink
-def worker(arg, index):
+    def _worker(self, arg, index):
+        # Scan IMDb for a given movie and append it to the html
+        # This collects rating, genres, official name and a hyperlink
+        imdb = IMDb()
        save_stdout = sys.stdout
        sys.stdout = open('trash', 'w')
-	query = ia.search_movie(arg)
+        while True:
+            try:
+                query = imdb.search_movie(arg)
+                break
+            except http.client.IncompleteRead:
+                pass
        sys.stdout = save_stdout
+
        movie = query[0]
-	ia.update(movie)
+        imdb.update(movie)
        movie['ID'] = query[0].movieID
        if 'genres' not in movie.keys():
            movie['genres'] = 'N/A'
        if 'rating' not in movie.keys():
            movie['rating'] = 'N/A'
-	global html
-	html = (html +
-		'\n\t<tr><td>' + str(index+1) + '</td><td><p hidden>' + movie['title'] + '</p>' +
-		'<a href="http://www.imdb.com/title/tt' + movie['ID'] + '" target="_blank">' +
-		str(movie['title']) +
-		'</a></td><td>' +
-		str(movie['year']) + 
-		'</td><td align="center">' + 
-		str(movie['rating']) + 
-		'</td><td>' +
-		re.sub(r'\[|\]|\'', '', str(movie['genres'])) + 
-		'</td><td align="center">' +
-		str(status_list[index]) +
-		'</td></tr>')

-# Progress bar. Enough said
-import progressbar
-bar = progressbar.ProgressBar(max_value=len(movie_list))
+        url = f'http://www.imdb.com/title/tt{movie["ID"]}'
+        title = str(movie['title'])
+        year = str(movie['year'])
+        rating = str(movie['rating'])
+        genres = ', '.join(movie['genres'])
+        status = str(self.status_list[index])
+        self.html += (
+            f'\n\t<tr><td>{index + 1}</td>'
+            f'<td><p hidden>{title}</p><a href="{url}" target="_blank">{title}</a></td>'
+            f'<td>{year}</td><td align="center">{rating}</td>'
+            f'<td>{genres}</td>'
+            f'<td align="center">{status}</td></tr>'
+        )

-for idx, val in enumerate(movie_list):
-	t = threading.Thread(target=worker, args=(val, idx,))
-	threads.append(t)
-	t.start()
-	bar.update(idx+1)
+    def gen(self):
+        """ Generate an HTML list based on input, using a threaded worker """
+        if not self.src:
+            self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
+        else:
+            self.src = Path(self.src)
+
+        # Open the movie list & split the columns
+        with open(self.src, 'r') as fp_handle:
+            tmp_names = tmp_status = fp_handle.read()
+            tmp_names = re.sub(r'\).*', ')', tmp_names)
+            tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
+            self.movie_list = tmp_names.splitlines()
+            self.status_list = tmp_status.splitlines()
+
+        # Progress bar. Enough said
+        pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
+
+        for idx, val in enumerate(self.movie_list):
+            thread = threading.Thread(target=self._worker, args=(val, idx,))
+            self.threads.append(thread)
+            thread.start()
+            pbar.update(idx+1)
            time.sleep(0.2)
-	if len(threads)%16 == 0:
+            if len(self.threads) % 16 == 0:
                time.sleep(6)

-for x in threads:
-	x.join()
+        for thread in self.threads:
+            thread.join()

-# Just a fancy scrollbar for the html
-scroll = """
-<script type="text/javascript" src="scroll-indicator.js"></script>
-"""
-html += ('\n\t</tbody>\n</table>\n' +
-	 '\nGenerated on: ' + strftime('%Y-%m-%d %H:%M:%S', gmtime()) + 
+    def write(self, dst=None):
+        """ Write the HTML list to index.html """
+        if not dst:
+            dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
+        else:
+            dst = Path(dst)
+        # Just a fancy scrollbar for the html
+        scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
+        self.html += ('\n\t</tbody>\n</table>\n' +
+                      '\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
                      ' by ' + sys.argv[0] + scroll + '</body>\n</html>')
-file_ = open(path + '/index.html', 'wb')
-file_.write(html.encode('utf8'))
-file_.close()
+        with open(dst, 'wb') as fp_handle:
+            fp_handle.write(self.html.encode('utf8'))

+
+def main():
+    """ Default run """
+    mlist = MovieList()
+    mlist.gen()
+    mlist.write()
+
+
+if __name__ == "__main__":
+    main()
@@ -115,7 +115,7 @@ Star Wars: Episode IV - A New Hope (1977)			[DONE]
 Star Wars: Episode V - The Empire Strikes Back (1980)		[DONE]
 Star Wars: Episode VI - Return of the Jedi (1983)		[DONE]
 Star Wars: Episode: The Force Awakens (2015)			[DONE]
-Star Wars: Rogue One (2016)					[DONE]
+Rogue One: A Star Wars Story (2016)				[DONE]
 Men in Black (1997)						[DONE]
 Men in Black II (2002)						[DONE]
 Men in Black 3 (2012)						[DONE]
@@ -192,6 +192,7 @@ Identity (2003)							[DONE]
 Turist (Force Majeure) (2014)					[DONE]
 Gone Girl (2014)						[DONE]
 The Equalizer (2014)						[DONE]
+The Equalizer 2 (2018)						[DONE]
 Desert Flower (2009)						[DONE]
 I Origins (2014)						[DONE]
 Whiplash (2014)							[DONE]
@@ -246,7 +247,7 @@ Now You See Me (2013)						[DONE]
 Up (2009)							[DONE]
 The Shallows (2016)						[DONE]
 Boychoir (2014)							[DONE]
-Same Kind of Different as Me (2017)				[*]
+Same Kind of Different as Me (2017)				[DONE]
 Allied (2016)							[DONE]
 Mad Max: Fury Road (2015)					[DONE]
 Blood Father (2016)						[DONE]
@@ -287,7 +288,7 @@ Gifted (2017)							[DONE]
 Spider-Man: Homecoming (2017)					[DONE]
 Rememory (2017)							[DONE]
 Wristcutters: A Love Story (2006)				[DONE]
-Bohemian Rhapsody (2018)					[*]
+Bohemian Rhapsody (2018)					[DONE]
 Killer's Bodyguard (2017)					[*]
 Danny Collins (2015)						[DONE]
 Annabelle: Creation (2017)					[DONE]
@@ -304,3 +305,38 @@ Get Out (2017)							[DONE]
 Orphan (2009)							[DONE]
 Upgrade (2018)							[DONE]
 Dangal (2016)							[DONE]
+BlacKkKlansman (2018)						[DONE]
+The Shape of Water (2017)					[DONE]
+What Women Want (2000)						[DONE]
+What's Love Got to Do with It (1993)				[DONE]
+Spotlight (2015)						[DONE]
+First Man (2018)						[DONE]
+Halloween (2018)						[*]
+Catch-22 (1970)							[*]
+The Nun (2018)							[DONE]
+Into The White (2012)						[DONE]
+Trash (2014)							[DONE]
+First Man (2018)						[DONE]
+Full Metal Jacket (1987)					[*]
+Woman at War (2018)						[*]
+Roman J. Israel, Esq. (2017)					[DONE]
+Venom (2018)							[DONE]
+Moby Dick (1998)						[DONE]
+The Help (2011)							[DONE]
+Hidden Figures (2016)						[DONE]
+The Guilty (2018)						[DONE]
+Green Book (2018)						[DONE]
+The Road Within (2014)						[DONE]
+King of Thieves (2018)						[DONE]
+The House That Jack Built (2018)				[DONE]
+Captain Marvel (2019)						[DONE]
+Miss Sloane (2016)						[DONE]
+The Bra (2018)							[DONE]
+The Mule (2018)							[DONE]
+Escape Room (2019)						[DONE]
+Man on a Ledge (2012)						[DONE]
+The Highwaymen (2019)						[DONE]
+Cold War (2018)							[*]
+Cold Pursuit (2019)						[DONE]
+Ad Astra (2019)							[*]
+Storm Boy (2019)						[DONE]