update movie_list script and list

This commit is contained in:
2019-09-29 14:16:45 +03:00
parent 391ae02488
commit aac410f1e1
2 changed files with 180 additions and 104 deletions

View File

@@ -1,17 +1,27 @@
#!/usr/bin/env python #!/usr/bin/env python
"""
1. Import a movie_list txt file
2. Query IMDb for each entry, retrieving actual movie name, rating and genres
3. Generate an HTML table from the IMDb data
4. Store the HTML in index.html
"""
import os
import re
import time
import sys
import http.client
import threading
from pathlib import Path
import progressbar
from imdb import IMDb from imdb import IMDb
import os, re, threading, time, sys, subprocess
from time import gmtime, strftime
path = os.path.dirname(sys.argv[0])
ia = IMDb()
movie_list = [] class MovieList:
status_list = [] """ Class to generate a movie list HTML table """
threads = [] def __init__(self, src=None):
query = [] self.html = """<html>
html = """<html>
<head> <head>
<title>My Movie List</title> <title>My Movie List</title>
<link rel="stylesheet" type="text/css" href="style.css"> <link rel="stylesheet" type="text/css" href="style.css">
@@ -33,73 +43,103 @@ html = """<html>
<base target="_parent" /> <base target="_parent" />
<table id="sortable" class="sortable"> <table id="sortable" class="sortable">
<thead> <thead>
<tr><th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th></tr> <tr>
<th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
</tr>
</thead> </thead>
<tbody>""" <tbody>"""
self.src = src
self.movie_list = []
self.status_list = []
self.threads = []
# Open the movie list & split the columns def _worker(self, arg, index):
file_ = open(path + '/movie_list', 'r')
tmp_names = tmp_status = file_.read()
file_.close()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
movie_list = tmp_names.splitlines()
status_list = tmp_status.splitlines()
# Scan IMDb for a given movie and append it to the html # Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink # This collects rating, genres, official name and a hyperlink
def worker(arg, index): imdb = IMDb()
save_stdout = sys.stdout save_stdout = sys.stdout
sys.stdout = open('trash', 'w') sys.stdout = open('trash', 'w')
query = ia.search_movie(arg) while True:
try:
query = imdb.search_movie(arg)
break
except http.client.IncompleteRead:
pass
sys.stdout = save_stdout sys.stdout = save_stdout
movie = query[0] movie = query[0]
ia.update(movie) imdb.update(movie)
movie['ID'] = query[0].movieID movie['ID'] = query[0].movieID
if 'genres' not in movie.keys(): if 'genres' not in movie.keys():
movie['genres'] = 'N/A' movie['genres'] = 'N/A'
if 'rating' not in movie.keys(): if 'rating' not in movie.keys():
movie['rating'] = 'N/A' movie['rating'] = 'N/A'
global html
html = (html + url = f'http://www.imdb.com/title/tt{movie["ID"]}'
'\n\t<tr><td>' + str(index+1) + '</td><td><p hidden>' + movie['title'] + '</p>' + title = str(movie['title'])
'<a href="http://www.imdb.com/title/tt' + movie['ID'] + '" target="_blank">' + year = str(movie['year'])
str(movie['title']) + rating = str(movie['rating'])
'</a></td><td>' + genres = ', '.join(movie['genres'])
str(movie['year']) + status = str(self.status_list[index])
'</td><td align="center">' + self.html += (
str(movie['rating']) + f'\n\t<tr><td>{index + 1}</td>'
'</td><td>' + f'<td><p hidden>{title}</p><a href="{url}" target="_blank">{title}</a></td>'
re.sub(r'\[|\]|\'', '', str(movie['genres'])) + f'<td>{year}</td><td align="center">{rating}</td>'
'</td><td align="center">' + f'<td>{genres}</td>'
str(status_list[index]) + f'<td align="center">{status}</td></tr>'
'</td></tr>') )
def gen(self):
""" Generate an HTML list based on input, using a threaded worker """
if not self.src:
self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
else:
self.src = Path(self.src)
# Open the movie list & split the columns
with open(self.src, 'r') as fp_handle:
tmp_names = tmp_status = fp_handle.read()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
self.movie_list = tmp_names.splitlines()
self.status_list = tmp_status.splitlines()
# Progress bar. Enough said # Progress bar. Enough said
import progressbar pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
bar = progressbar.ProgressBar(max_value=len(movie_list))
for idx, val in enumerate(movie_list): for idx, val in enumerate(self.movie_list):
t = threading.Thread(target=worker, args=(val, idx,)) thread = threading.Thread(target=self._worker, args=(val, idx,))
threads.append(t) self.threads.append(thread)
t.start() thread.start()
bar.update(idx+1) pbar.update(idx+1)
time.sleep(0.2) time.sleep(0.2)
if len(threads)%16 == 0: if len(self.threads) % 16 == 0:
time.sleep(6) time.sleep(6)
for x in threads: for thread in self.threads:
x.join() thread.join()
def write(self, dst=None):
""" Write the HTML list to index.html """
if not dst:
dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
else:
dst = Path(dst)
# Just a fancy scrollbar for the html # Just a fancy scrollbar for the html
scroll = """ scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
<script type="text/javascript" src="scroll-indicator.js"></script> self.html += ('\n\t</tbody>\n</table>\n' +
""" '\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
html += ('\n\t</tbody>\n</table>\n' +
'\nGenerated on: ' + strftime('%Y-%m-%d %H:%M:%S', gmtime()) +
' by ' + sys.argv[0] + scroll + '</body>\n</html>') ' by ' + sys.argv[0] + scroll + '</body>\n</html>')
file_ = open(path + '/index.html', 'wb') with open(dst, 'wb') as fp_handle:
file_.write(html.encode('utf8')) fp_handle.write(self.html.encode('utf8'))
file_.close()
def main():
""" Default run """
mlist = MovieList()
mlist.gen()
mlist.write()
if __name__ == "__main__":
main()

View File

@@ -115,7 +115,7 @@ Star Wars: Episode IV - A New Hope (1977) [DONE]
Star Wars: Episode V - The Empire Strikes Back (1980) [DONE] Star Wars: Episode V - The Empire Strikes Back (1980) [DONE]
Star Wars: Episode VI - Return of the Jedi (1983) [DONE] Star Wars: Episode VI - Return of the Jedi (1983) [DONE]
Star Wars: Episode: The Force Awakens (2015) [DONE] Star Wars: Episode: The Force Awakens (2015) [DONE]
Star Wars: Rogue One (2016) [DONE] Rogue One: A Star Wars Story (2016) [DONE]
Men in Black (1997) [DONE] Men in Black (1997) [DONE]
Men in Black II (2002) [DONE] Men in Black II (2002) [DONE]
Men in Black 3 (2012) [DONE] Men in Black 3 (2012) [DONE]
@@ -192,6 +192,7 @@ Identity (2003) [DONE]
Turist (Force Majeure) (2014) [DONE] Turist (Force Majeure) (2014) [DONE]
Gone Girl (2014) [DONE] Gone Girl (2014) [DONE]
The Equalizer (2014) [DONE] The Equalizer (2014) [DONE]
The Equalizer 2 (2018) [DONE]
Desert Flower (2009) [DONE] Desert Flower (2009) [DONE]
I Origins (2014) [DONE] I Origins (2014) [DONE]
Whiplash (2014) [DONE] Whiplash (2014) [DONE]
@@ -246,7 +247,7 @@ Now You See Me (2013) [DONE]
Up (2009) [DONE] Up (2009) [DONE]
The Shallows (2016) [DONE] The Shallows (2016) [DONE]
Boychoir (2014) [DONE] Boychoir (2014) [DONE]
Same Kind of Different as Me (2017) [*] Same Kind of Different as Me (2017) [DONE]
Allied (2016) [DONE] Allied (2016) [DONE]
Mad Max: Fury Road (2015) [DONE] Mad Max: Fury Road (2015) [DONE]
Blood Father (2016) [DONE] Blood Father (2016) [DONE]
@@ -287,7 +288,7 @@ Gifted (2017) [DONE]
Spider-Man: Homecoming (2017) [DONE] Spider-Man: Homecoming (2017) [DONE]
Rememory (2017) [DONE] Rememory (2017) [DONE]
Wristcutters: A Love Story (2006) [DONE] Wristcutters: A Love Story (2006) [DONE]
Bohemian Rhapsody (2018) [*] Bohemian Rhapsody (2018) [DONE]
Killer's Bodyguard (2017) [*] Killer's Bodyguard (2017) [*]
Danny Collins (2015) [DONE] Danny Collins (2015) [DONE]
Annabelle: Creation (2017) [DONE] Annabelle: Creation (2017) [DONE]
@@ -304,3 +305,38 @@ Get Out (2017) [DONE]
Orphan (2009) [DONE] Orphan (2009) [DONE]
Upgrade (2018) [DONE] Upgrade (2018) [DONE]
Dangal (2016) [DONE] Dangal (2016) [DONE]
BlacKkKlansman (2018) [DONE]
The Shape of Water (2017) [DONE]
What Women Want (2000) [DONE]
What's Love Got to Do with It (1993) [DONE]
Spotlight (2015) [DONE]
First Man (2018) [DONE]
Halloween (2018) [*]
Catch-22 (1970) [*]
The Nun (2018) [DONE]
Into The White (2012) [DONE]
Trash (2014) [DONE]
First Man (2018) [DONE]
Full Metal Jacket (1987) [*]
Woman at War (2018) [*]
Roman J. Israel, Esq. (2017) [DONE]
Venom (2018) [DONE]
Moby Dick (1998) [DONE]
The Help (2011) [DONE]
Hidden Figures (2016) [DONE]
The Guilty (2018) [DONE]
Green Book (2018) [DONE]
The Road Within (2014) [DONE]
King of Thieves (2018) [DONE]
The House That Jack Built (2018) [DONE]
Captain Marvel (2019) [DONE]
Miss Sloane (2016) [DONE]
The Bra (2018) [DONE]
The Mule (2018) [DONE]
Escape Room (2019) [DONE]
Man on a Ledge (2012) [DONE]
The Highwaymen (2019) [DONE]
Cold War (2018) [*]
Cold Pursuit (2019) [DONE]
Ad Astra (2019) [*]
Storm Boy (2019) [DONE]