update movie_list script and list

2019-09-29 14:16:45 +03:00
parent 391ae02488
commit aac410f1e1
2 changed files with 180 additions and 104 deletions
--- a/movie_list/index.py
+++ b/movie_list/index.py
@@ -1,105 +1,145 @@
 #!/usr/bin/env python

-from imdb import IMDb
-import os, re, threading, time, sys, subprocess
-from time import gmtime, strftime
-
-path = os.path.dirname(sys.argv[0])
-ia = IMDb()
-
-movie_list = []
-status_list = []
-threads = []
-query = []
-html = """<html>
-<head>
-    <title>My Movie List</title>
-    <link rel="stylesheet" type="text/css" href="style.css">
-    <script src="../jquery-3.1.0.min.js"></script>
-    <script src="jquery.dataTables.min.js"></script>
-    <script>
-    $(document).ready(function(){
-            $('#sortable').DataTable({
-                "pageLength": -1,
-                "bPaginate": false
-            });
-    });
-    </script>
-</head>
-<body>
-    <header>
-        <div class="scroll-indicator" />
-    </header>
-<base target="_parent" />
-<table id="sortable" class="sortable">
-    <thead>
-        <tr><th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th></tr>
-    </thead>
-    <tbody>"""
-
-# Open the movie list & split the columns
-file_ = open(path + '/movie_list', 'r')
-tmp_names = tmp_status = file_.read()
-file_.close()
-tmp_names = re.sub(r'\).*', ')', tmp_names)
-tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
-
-movie_list = tmp_names.splitlines()
-status_list = tmp_status.splitlines()
-
-# Scan IMDb for a given movie and append it to the html
-# This collects rating, genres, official name and a hyperlink
-def worker(arg, index):
-	save_stdout = sys.stdout
-	sys.stdout = open('trash', 'w')
-	query = ia.search_movie(arg)
-	sys.stdout = save_stdout
-	movie = query[0]
-	ia.update(movie)
-	movie['ID'] = query[0].movieID
-	if 'genres' not in movie.keys():
-		movie['genres'] = 'N/A'
-	if 'rating' not in movie.keys():
-		movie['rating'] = 'N/A'
-	global html
-	html = (html +
-		'\n\t<tr><td>' + str(index+1) + '</td><td><p hidden>' + movie['title'] + '</p>' +
-		'<a href="http://www.imdb.com/title/tt' + movie['ID'] + '" target="_blank">' +
-		str(movie['title']) +
-		'</a></td><td>' +
-		str(movie['year']) + 
-		'</td><td align="center">' + 
-		str(movie['rating']) + 
-		'</td><td>' +
-		re.sub(r'\[|\]|\'', '', str(movie['genres'])) + 
-		'</td><td align="center">' +
-		str(status_list[index]) +
-		'</td></tr>')
-
-# Progress bar. Enough said
-import progressbar
-bar = progressbar.ProgressBar(max_value=len(movie_list))
-
-for idx, val in enumerate(movie_list):
-	t = threading.Thread(target=worker, args=(val, idx,))
-	threads.append(t)
-	t.start()
-	bar.update(idx+1)
-	time.sleep(0.2)
-	if len(threads)%16 == 0:
-		time.sleep(6)
-
-for x in threads:
-	x.join()
-
-# Just a fancy scrollbar for the html
-scroll = """
-<script type="text/javascript" src="scroll-indicator.js"></script>
 """
-html += ('\n\t</tbody>\n</table>\n' +
-	 '\nGenerated on: ' + strftime('%Y-%m-%d %H:%M:%S', gmtime()) + 
-         ' by ' + sys.argv[0] + scroll + '</body>\n</html>')
-file_ = open(path + '/index.html', 'wb')
-file_.write(html.encode('utf8'))
-file_.close()
+1. Import a movie_list txt file
+2. Query IMDb for each entry, retrieving actual movie name, rating and genres
+3. Generate an HTML table from the IMDb data
+4. Store the HTML in index.html
+"""

+import os
+import re
+import time
+import sys
+import http.client
+import threading
+from pathlib import Path
+import progressbar
+from imdb import IMDb
+
+
+class MovieList:
+    """ Class to generate a movie list HTML table """
+    def __init__(self, src=None):
+        self.html = """<html>
+    <head>
+        <title>My Movie List</title>
+        <link rel="stylesheet" type="text/css" href="style.css">
+        <script src="../jquery-3.1.0.min.js"></script>
+        <script src="jquery.dataTables.min.js"></script>
+        <script>
+        $(document).ready(function(){
+                $('#sortable').DataTable({
+                    "pageLength": -1,
+                    "bPaginate": false
+                });
+        });
+        </script>
+    </head>
+    <body>
+        <header>
+            <div class="scroll-indicator" />
+        </header>
+    <base target="_parent" />
+    <table id="sortable" class="sortable">
+        <thead>
+            <tr>
+                <th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
+            </tr>
+        </thead>
+        <tbody>"""
+        self.src = src
+        self.movie_list = []
+        self.status_list = []
+        self.threads = []
+
+    def _worker(self, arg, index):
+        # Scan IMDb for a given movie and append it to the html
+        # This collects rating, genres, official name and a hyperlink
+        imdb = IMDb()
+        save_stdout = sys.stdout
+        sys.stdout = open('trash', 'w')
+        while True:
+            try:
+                query = imdb.search_movie(arg)
+                break
+            except http.client.IncompleteRead:
+                pass
+        sys.stdout = save_stdout
+
+        movie = query[0]
+        imdb.update(movie)
+        movie['ID'] = query[0].movieID
+        if 'genres' not in movie.keys():
+            movie['genres'] = 'N/A'
+        if 'rating' not in movie.keys():
+            movie['rating'] = 'N/A'
+
+        url = f'http://www.imdb.com/title/tt{movie["ID"]}'
+        title = str(movie['title'])
+        year = str(movie['year'])
+        rating = str(movie['rating'])
+        genres = ', '.join(movie['genres'])
+        status = str(self.status_list[index])
+        self.html += (
+            f'\n\t<tr><td>{index + 1}</td>'
+            f'<td><p hidden>{title}</p><a href="{url}" target="_blank">{title}</a></td>'
+            f'<td>{year}</td><td align="center">{rating}</td>'
+            f'<td>{genres}</td>'
+            f'<td align="center">{status}</td></tr>'
+        )
+
+    def gen(self):
+        """ Generate an HTML list based on input, using a threaded worker """
+        if not self.src:
+            self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
+        else:
+            self.src = Path(self.src)
+
+        # Open the movie list & split the columns
+        with open(self.src, 'r') as fp_handle:
+            tmp_names = tmp_status = fp_handle.read()
+            tmp_names = re.sub(r'\).*', ')', tmp_names)
+            tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
+            self.movie_list = tmp_names.splitlines()
+            self.status_list = tmp_status.splitlines()
+
+        # Progress bar. Enough said
+        pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
+
+        for idx, val in enumerate(self.movie_list):
+            thread = threading.Thread(target=self._worker, args=(val, idx,))
+            self.threads.append(thread)
+            thread.start()
+            pbar.update(idx+1)
+            time.sleep(0.2)
+            if len(self.threads) % 16 == 0:
+                time.sleep(6)
+
+        for thread in self.threads:
+            thread.join()
+
+    def write(self, dst=None):
+        """ Write the HTML list to index.html """
+        if not dst:
+            dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
+        else:
+            dst = Path(dst)
+        # Just a fancy scrollbar for the html
+        scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
+        self.html += ('\n\t</tbody>\n</table>\n' +
+                      '\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
+                      ' by ' + sys.argv[0] + scroll + '</body>\n</html>')
+        with open(dst, 'wb') as fp_handle:
+            fp_handle.write(self.html.encode('utf8'))
+
+
+def main():
+    """ Default run """
+    mlist = MovieList()
+    mlist.gen()
+    mlist.write()
+
+
+if __name__ == "__main__":
+    main()