update movie_list script and list

2019-09-29 14:16:45 +03:00
parent 391ae02488
commit aac410f1e1
2 changed files with 180 additions and 104 deletions
@@ -1,105 +1,145 @@
 #!/usr/bin/env python
 from imdb import IMDb
 import os, re, threading, time, sys, subprocess
 from time import gmtime, strftime
 path = os.path.dirname(sys.argv[0])
 ia = IMDb()
 movie_list = []
 status_list = []
 threads = []
 query = []
 html = """<html>
 <head>
    <title>My Movie List</title>
    <link rel="stylesheet" type="text/css" href="style.css">
    <script src="../jquery-3.1.0.min.js"></script>
    <script src="jquery.dataTables.min.js"></script>
    <script>
    $(document).ready(function(){
            $('#sortable').DataTable({
                "pageLength": -1,
                "bPaginate": false
            });
    });
    </script>
 </head>
 <body>
    <header>
        <div class="scroll-indicator" />
    </header>
 <base target="_parent" />
 <table id="sortable" class="sortable">
    <thead>
        <tr><th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th></tr>
    </thead>
    <tbody>"""
 # Open the movie list & split the columns
 file_ = open(path + '/movie_list', 'r')
 tmp_names = tmp_status = file_.read()
 file_.close()
 tmp_names = re.sub(r'\).*', ')', tmp_names)
 tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
 movie_list = tmp_names.splitlines()
 status_list = tmp_status.splitlines()
 # Scan IMDb for a given movie and append it to the html
 # This collects rating, genres, official name and a hyperlink
 def worker(arg, index):
 	save_stdout = sys.stdout
 	sys.stdout = open('trash', 'w')
 	query = ia.search_movie(arg)
 	sys.stdout = save_stdout
 	movie = query[0]
 	ia.update(movie)
 	movie['ID'] = query[0].movieID
 	if 'genres' not in movie.keys():
 		movie['genres'] = 'N/A'
 	if 'rating' not in movie.keys():
 		movie['rating'] = 'N/A'
 	global html
 	html = (html +
 		'\n\t<tr><td>' + str(index+1) + '</td><td><p hidden>' + movie['title'] + '</p>' +
 		'<a href="http://www.imdb.com/title/tt' + movie['ID'] + '" target="_blank">' +
 		str(movie['title']) +
 		'</a></td><td>' +
 		str(movie['year']) + 
 		'</td><td align="center">' + 
 		str(movie['rating']) + 
 		'</td><td>' +
 		re.sub(r'\[|\]|\'', '', str(movie['genres'])) + 
 		'</td><td align="center">' +
 		str(status_list[index]) +
 		'</td></tr>')
 # Progress bar. Enough said
 import progressbar
 bar = progressbar.ProgressBar(max_value=len(movie_list))
 for idx, val in enumerate(movie_list):
 	t = threading.Thread(target=worker, args=(val, idx,))
 	threads.append(t)
 	t.start()
 	bar.update(idx+1)
 	time.sleep(0.2)
 	if len(threads)%16 == 0:
 		time.sleep(6)
 for x in threads:
 	x.join()
 # Just a fancy scrollbar for the html
 scroll = """
 <script type="text/javascript" src="scroll-indicator.js"></script>
 """
-html += ('\n\t</tbody>\n</table>\n' +
+1. Import a movie_list txt file
-	 '\nGenerated on: ' + strftime('%Y-%m-%d %H:%M:%S', gmtime()) + 
+2. Query IMDb for each entry, retrieving actual movie name, rating and genres
-         ' by ' + sys.argv[0] + scroll + '</body>\n</html>')
+3. Generate an HTML table from the IMDb data
-file_ = open(path + '/index.html', 'wb')
+4. Store the HTML in index.html
-file_.write(html.encode('utf8'))
+"""
 file_.close()
 import os
 import re
 import time
 import sys
 import http.client
 import threading
 from pathlib import Path
 import progressbar
 from imdb import IMDb
 class MovieList:
    """ Class to generate a movie list HTML table """
    def __init__(self, src=None):
        self.html = """<html>
    <head>
        <title>My Movie List</title>
        <link rel="stylesheet" type="text/css" href="style.css">
        <script src="../jquery-3.1.0.min.js"></script>
        <script src="jquery.dataTables.min.js"></script>
        <script>
        $(document).ready(function(){
                $('#sortable').DataTable({
                    "pageLength": -1,
                    "bPaginate": false
                });
        });
        </script>
    </head>
    <body>
        <header>
            <div class="scroll-indicator" />
        </header>
    <base target="_parent" />
    <table id="sortable" class="sortable">
        <thead>
            <tr>
                <th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
            </tr>
        </thead>
        <tbody>"""
        self.src = src
        self.movie_list = []
        self.status_list = []
        self.threads = []
    def _worker(self, arg, index):
        # Scan IMDb for a given movie and append it to the html
        # This collects rating, genres, official name and a hyperlink
        imdb = IMDb()
        save_stdout = sys.stdout
        sys.stdout = open('trash', 'w')
        while True:
            try:
                query = imdb.search_movie(arg)
                break
            except http.client.IncompleteRead:
                pass
        sys.stdout = save_stdout
        movie = query[0]
        imdb.update(movie)
        movie['ID'] = query[0].movieID
        if 'genres' not in movie.keys():
            movie['genres'] = 'N/A'
        if 'rating' not in movie.keys():
            movie['rating'] = 'N/A'
        url = f'http://www.imdb.com/title/tt{movie["ID"]}'
        title = str(movie['title'])
        year = str(movie['year'])
        rating = str(movie['rating'])
        genres = ', '.join(movie['genres'])
        status = str(self.status_list[index])
        self.html += (
            f'\n\t<tr><td>{index + 1}</td>'
            f'<td><p hidden>{title}</p><a href="{url}" target="_blank">{title}</a></td>'
            f'<td>{year}</td><td align="center">{rating}</td>'
            f'<td>{genres}</td>'
            f'<td align="center">{status}</td></tr>'
        )
    def gen(self):
        """ Generate an HTML list based on input, using a threaded worker """
        if not self.src:
            self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
        else:
            self.src = Path(self.src)
        # Open the movie list & split the columns
        with open(self.src, 'r') as fp_handle:
            tmp_names = tmp_status = fp_handle.read()
            tmp_names = re.sub(r'\).*', ')', tmp_names)
            tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
            self.movie_list = tmp_names.splitlines()
            self.status_list = tmp_status.splitlines()
        # Progress bar. Enough said
        pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
        for idx, val in enumerate(self.movie_list):
            thread = threading.Thread(target=self._worker, args=(val, idx,))
            self.threads.append(thread)
            thread.start()
            pbar.update(idx+1)
            time.sleep(0.2)
            if len(self.threads) % 16 == 0:
                time.sleep(6)
        for thread in self.threads:
            thread.join()
    def write(self, dst=None):
        """ Write the HTML list to index.html """
        if not dst:
            dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
        else:
            dst = Path(dst)
        # Just a fancy scrollbar for the html
        scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
        self.html += ('\n\t</tbody>\n</table>\n' +
                      '\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
                      ' by ' + sys.argv[0] + scroll + '</body>\n</html>')
        with open(dst, 'wb') as fp_handle:
            fp_handle.write(self.html.encode('utf8'))
 def main():
    """ Default run """
    mlist = MovieList()
    mlist.gen()
    mlist.write()
 if __name__ == "__main__":
    main()
@@ -115,7 +115,7 @@ Star Wars: Episode IV - A New Hope (1977)			[DONE]
 Star Wars: Episode V - The Empire Strikes Back (1980)		[DONE]
 Star Wars: Episode VI - Return of the Jedi (1983)		[DONE]
 Star Wars: Episode: The Force Awakens (2015)			[DONE]
-Star Wars: Rogue One (2016)					[DONE]
+Rogue One: A Star Wars Story (2016)				[DONE]
 Men in Black (1997)						[DONE]
 Men in Black II (2002)						[DONE]
 Men in Black 3 (2012)						[DONE]
@@ -192,6 +192,7 @@ Identity (2003)							[DONE]
 Turist (Force Majeure) (2014)					[DONE]
 Gone Girl (2014)						[DONE]
 The Equalizer (2014)						[DONE]
 The Equalizer 2 (2018)						[DONE]
 Desert Flower (2009)						[DONE]
 I Origins (2014)						[DONE]
 Whiplash (2014)							[DONE]
@@ -246,7 +247,7 @@ Now You See Me (2013)						[DONE]
 Up (2009)							[DONE]
 The Shallows (2016)						[DONE]
 Boychoir (2014)							[DONE]
-Same Kind of Different as Me (2017)				[*]
+Same Kind of Different as Me (2017)				[DONE]
 Allied (2016)							[DONE]
 Mad Max: Fury Road (2015)					[DONE]
 Blood Father (2016)						[DONE]
@@ -287,7 +288,7 @@ Gifted (2017)							[DONE]
 Spider-Man: Homecoming (2017)					[DONE]
 Rememory (2017)							[DONE]
 Wristcutters: A Love Story (2006)				[DONE]
-Bohemian Rhapsody (2018)					[*]
+Bohemian Rhapsody (2018)					[DONE]
 Killer's Bodyguard (2017)					[*]
 Danny Collins (2015)						[DONE]
 Annabelle: Creation (2017)					[DONE]
@@ -304,3 +305,38 @@ Get Out (2017)							[DONE]
 Orphan (2009)							[DONE]
 Upgrade (2018)							[DONE]
 Dangal (2016)							[DONE]
 BlacKkKlansman (2018)						[DONE]
 The Shape of Water (2017)					[DONE]
 What Women Want (2000)						[DONE]
 What's Love Got to Do with It (1993)				[DONE]
 Spotlight (2015)						[DONE]
 First Man (2018)						[DONE]
 Halloween (2018)						[*]
 Catch-22 (1970)							[*]
 The Nun (2018)							[DONE]
 Into The White (2012)						[DONE]
 Trash (2014)							[DONE]
 First Man (2018)						[DONE]
 Full Metal Jacket (1987)					[*]
 Woman at War (2018)						[*]
 Roman J. Israel, Esq. (2017)					[DONE]
 Venom (2018)							[DONE]
 Moby Dick (1998)						[DONE]
 The Help (2011)							[DONE]
 Hidden Figures (2016)						[DONE]
 The Guilty (2018)						[DONE]
 Green Book (2018)						[DONE]
 The Road Within (2014)						[DONE]
 King of Thieves (2018)						[DONE]
 The House That Jack Built (2018)				[DONE]
 Captain Marvel (2019)						[DONE]
 Miss Sloane (2016)						[DONE]
 The Bra (2018)							[DONE]
 The Mule (2018)							[DONE]
 Escape Room (2019)						[DONE]
 Man on a Ledge (2012)						[DONE]
 The Highwaymen (2019)						[DONE]
 Cold War (2018)							[*]
 Cold Pursuit (2019)						[DONE]
 Ad Astra (2019)							[*]
 Storm Boy (2019)						[DONE]