update movie_list script and list

This commit is contained in:
2019-09-29 14:16:45 +03:00
parent 391ae02488
commit aac410f1e1
2 changed files with 180 additions and 104 deletions

View File

@@ -1,18 +1,28 @@
#!/usr/bin/env python
"""
1. Import a movie_list txt file
2. Query IMDb for each entry, retrieving actual movie name, rating and genres
3. Generate an HTML table from the IMDb data
4. Store the HTML in index.html
"""
import os
import re
import time
import sys
import http.client
import threading
from pathlib import Path
import progressbar
from imdb import IMDb
import os, re, threading, time, sys, subprocess
from time import gmtime, strftime
path = os.path.dirname(sys.argv[0])
ia = IMDb()
movie_list = []
status_list = []
threads = []
query = []
html = """<html>
<head>
class MovieList:
""" Class to generate a movie list HTML table """
def __init__(self, src=None):
self.html = """<html>
<head>
<title>My Movie List</title>
<link rel="stylesheet" type="text/css" href="style.css">
<script src="../jquery-3.1.0.min.js"></script>
@@ -25,81 +35,111 @@ html = """<html>
});
});
</script>
</head>
<body>
</head>
<body>
<header>
<div class="scroll-indicator" />
</header>
<base target="_parent" />
<table id="sortable" class="sortable">
<base target="_parent" />
<table id="sortable" class="sortable">
<thead>
<tr><th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th></tr>
<tr>
<th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
</tr>
</thead>
<tbody>"""
self.src = src
self.movie_list = []
self.status_list = []
self.threads = []
# Open the movie list & split the columns
file_ = open(path + '/movie_list', 'r')
tmp_names = tmp_status = file_.read()
file_.close()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
movie_list = tmp_names.splitlines()
status_list = tmp_status.splitlines()
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
def worker(arg, index):
def _worker(self, arg, index):
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
imdb = IMDb()
save_stdout = sys.stdout
sys.stdout = open('trash', 'w')
query = ia.search_movie(arg)
while True:
try:
query = imdb.search_movie(arg)
break
except http.client.IncompleteRead:
pass
sys.stdout = save_stdout
movie = query[0]
ia.update(movie)
imdb.update(movie)
movie['ID'] = query[0].movieID
if 'genres' not in movie.keys():
movie['genres'] = 'N/A'
if 'rating' not in movie.keys():
movie['rating'] = 'N/A'
global html
html = (html +
'\n\t<tr><td>' + str(index+1) + '</td><td><p hidden>' + movie['title'] + '</p>' +
'<a href="http://www.imdb.com/title/tt' + movie['ID'] + '" target="_blank">' +
str(movie['title']) +
'</a></td><td>' +
str(movie['year']) +
'</td><td align="center">' +
str(movie['rating']) +
'</td><td>' +
re.sub(r'\[|\]|\'', '', str(movie['genres'])) +
'</td><td align="center">' +
str(status_list[index]) +
'</td></tr>')
# Progress bar. Enough said
import progressbar
bar = progressbar.ProgressBar(max_value=len(movie_list))
url = f'http://www.imdb.com/title/tt{movie["ID"]}'
title = str(movie['title'])
year = str(movie['year'])
rating = str(movie['rating'])
genres = ', '.join(movie['genres'])
status = str(self.status_list[index])
self.html += (
f'\n\t<tr><td>{index + 1}</td>'
f'<td><p hidden>{title}</p><a href="{url}" target="_blank">{title}</a></td>'
f'<td>{year}</td><td align="center">{rating}</td>'
f'<td>{genres}</td>'
f'<td align="center">{status}</td></tr>'
)
for idx, val in enumerate(movie_list):
t = threading.Thread(target=worker, args=(val, idx,))
threads.append(t)
t.start()
bar.update(idx+1)
def gen(self):
""" Generate an HTML list based on input, using a threaded worker """
if not self.src:
self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
else:
self.src = Path(self.src)
# Open the movie list & split the columns
with open(self.src, 'r') as fp_handle:
tmp_names = tmp_status = fp_handle.read()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
self.movie_list = tmp_names.splitlines()
self.status_list = tmp_status.splitlines()
# Progress bar. Enough said
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
for idx, val in enumerate(self.movie_list):
thread = threading.Thread(target=self._worker, args=(val, idx,))
self.threads.append(thread)
thread.start()
pbar.update(idx+1)
time.sleep(0.2)
if len(threads)%16 == 0:
if len(self.threads) % 16 == 0:
time.sleep(6)
for x in threads:
x.join()
for thread in self.threads:
thread.join()
# Just a fancy scrollbar for the html
scroll = """
<script type="text/javascript" src="scroll-indicator.js"></script>
"""
html += ('\n\t</tbody>\n</table>\n' +
'\nGenerated on: ' + strftime('%Y-%m-%d %H:%M:%S', gmtime()) +
def write(self, dst=None):
""" Write the HTML list to index.html """
if not dst:
dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
else:
dst = Path(dst)
# Just a fancy scrollbar for the html
scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
self.html += ('\n\t</tbody>\n</table>\n' +
'\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
' by ' + sys.argv[0] + scroll + '</body>\n</html>')
file_ = open(path + '/index.html', 'wb')
file_.write(html.encode('utf8'))
file_.close()
with open(dst, 'wb') as fp_handle:
fp_handle.write(self.html.encode('utf8'))
def main():
""" Default run """
mlist = MovieList()
mlist.gen()
mlist.write()
if __name__ == "__main__":
main()

View File

@@ -115,7 +115,7 @@ Star Wars: Episode IV - A New Hope (1977) [DONE]
Star Wars: Episode V - The Empire Strikes Back (1980) [DONE]
Star Wars: Episode VI - Return of the Jedi (1983) [DONE]
Star Wars: Episode: The Force Awakens (2015) [DONE]
Star Wars: Rogue One (2016) [DONE]
Rogue One: A Star Wars Story (2016) [DONE]
Men in Black (1997) [DONE]
Men in Black II (2002) [DONE]
Men in Black 3 (2012) [DONE]
@@ -192,6 +192,7 @@ Identity (2003) [DONE]
Turist (Force Majeure) (2014) [DONE]
Gone Girl (2014) [DONE]
The Equalizer (2014) [DONE]
The Equalizer 2 (2018) [DONE]
Desert Flower (2009) [DONE]
I Origins (2014) [DONE]
Whiplash (2014) [DONE]
@@ -246,7 +247,7 @@ Now You See Me (2013) [DONE]
Up (2009) [DONE]
The Shallows (2016) [DONE]
Boychoir (2014) [DONE]
Same Kind of Different as Me (2017) [*]
Same Kind of Different as Me (2017) [DONE]
Allied (2016) [DONE]
Mad Max: Fury Road (2015) [DONE]
Blood Father (2016) [DONE]
@@ -287,7 +288,7 @@ Gifted (2017) [DONE]
Spider-Man: Homecoming (2017) [DONE]
Rememory (2017) [DONE]
Wristcutters: A Love Story (2006) [DONE]
Bohemian Rhapsody (2018) [*]
Bohemian Rhapsody (2018) [DONE]
Killer's Bodyguard (2017) [*]
Danny Collins (2015) [DONE]
Annabelle: Creation (2017) [DONE]
@@ -304,3 +305,38 @@ Get Out (2017) [DONE]
Orphan (2009) [DONE]
Upgrade (2018) [DONE]
Dangal (2016) [DONE]
BlacKkKlansman (2018) [DONE]
The Shape of Water (2017) [DONE]
What Women Want (2000) [DONE]
What's Love Got to Do with It (1993) [DONE]
Spotlight (2015) [DONE]
First Man (2018) [DONE]
Halloween (2018) [*]
Catch-22 (1970) [*]
The Nun (2018) [DONE]
Into The White (2012) [DONE]
Trash (2014) [DONE]
First Man (2018) [DONE]
Full Metal Jacket (1987) [*]
Woman at War (2018) [*]
Roman J. Israel, Esq. (2017) [DONE]
Venom (2018) [DONE]
Moby Dick (1998) [DONE]
The Help (2011) [DONE]
Hidden Figures (2016) [DONE]
The Guilty (2018) [DONE]
Green Book (2018) [DONE]
The Road Within (2014) [DONE]
King of Thieves (2018) [DONE]
The House That Jack Built (2018) [DONE]
Captain Marvel (2019) [DONE]
Miss Sloane (2016) [DONE]
The Bra (2018) [DONE]
The Mule (2018) [DONE]
Escape Room (2019) [DONE]
Man on a Ledge (2012) [DONE]
The Highwaymen (2019) [DONE]
Cold War (2018) [*]
Cold Pursuit (2019) [DONE]
Ad Astra (2019) [*]
Storm Boy (2019) [DONE]