update movie_list script and list

This commit is contained in:
2019-09-29 14:16:45 +03:00
parent 391ae02488
commit aac410f1e1
2 changed files with 180 additions and 104 deletions

View File

@@ -1,105 +1,145 @@
#!/usr/bin/env python #!/usr/bin/env python
from imdb import IMDb
import os, re, threading, time, sys, subprocess
from time import gmtime, strftime
path = os.path.dirname(sys.argv[0])
ia = IMDb()
movie_list = []
status_list = []
threads = []
query = []
html = """<html>
<head>
<title>My Movie List</title>
<link rel="stylesheet" type="text/css" href="style.css">
<script src="../jquery-3.1.0.min.js"></script>
<script src="jquery.dataTables.min.js"></script>
<script>
$(document).ready(function(){
$('#sortable').DataTable({
"pageLength": -1,
"bPaginate": false
});
});
</script>
</head>
<body>
<header>
<div class="scroll-indicator" />
</header>
<base target="_parent" />
<table id="sortable" class="sortable">
<thead>
<tr><th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th></tr>
</thead>
<tbody>"""
# Open the movie list & split the columns
file_ = open(path + '/movie_list', 'r')
tmp_names = tmp_status = file_.read()
file_.close()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
movie_list = tmp_names.splitlines()
status_list = tmp_status.splitlines()
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
def worker(arg, index):
save_stdout = sys.stdout
sys.stdout = open('trash', 'w')
query = ia.search_movie(arg)
sys.stdout = save_stdout
movie = query[0]
ia.update(movie)
movie['ID'] = query[0].movieID
if 'genres' not in movie.keys():
movie['genres'] = 'N/A'
if 'rating' not in movie.keys():
movie['rating'] = 'N/A'
global html
html = (html +
'\n\t<tr><td>' + str(index+1) + '</td><td><p hidden>' + movie['title'] + '</p>' +
'<a href="http://www.imdb.com/title/tt' + movie['ID'] + '" target="_blank">' +
str(movie['title']) +
'</a></td><td>' +
str(movie['year']) +
'</td><td align="center">' +
str(movie['rating']) +
'</td><td>' +
re.sub(r'\[|\]|\'', '', str(movie['genres'])) +
'</td><td align="center">' +
str(status_list[index]) +
'</td></tr>')
# Progress bar. Enough said
import progressbar
bar = progressbar.ProgressBar(max_value=len(movie_list))
for idx, val in enumerate(movie_list):
t = threading.Thread(target=worker, args=(val, idx,))
threads.append(t)
t.start()
bar.update(idx+1)
time.sleep(0.2)
if len(threads)%16 == 0:
time.sleep(6)
for x in threads:
x.join()
# Just a fancy scrollbar for the html
scroll = """
<script type="text/javascript" src="scroll-indicator.js"></script>
""" """
html += ('\n\t</tbody>\n</table>\n' + 1. Import a movie_list txt file
'\nGenerated on: ' + strftime('%Y-%m-%d %H:%M:%S', gmtime()) + 2. Query IMDb for each entry, retrieving actual movie name, rating and genres
' by ' + sys.argv[0] + scroll + '</body>\n</html>') 3. Generate an HTML table from the IMDb data
file_ = open(path + '/index.html', 'wb') 4. Store the HTML in index.html
file_.write(html.encode('utf8')) """
file_.close()
import os
import re
import time
import sys
import http.client
import threading
from pathlib import Path
import progressbar
from imdb import IMDb
class MovieList:
""" Class to generate a movie list HTML table """
def __init__(self, src=None):
self.html = """<html>
<head>
<title>My Movie List</title>
<link rel="stylesheet" type="text/css" href="style.css">
<script src="../jquery-3.1.0.min.js"></script>
<script src="jquery.dataTables.min.js"></script>
<script>
$(document).ready(function(){
$('#sortable').DataTable({
"pageLength": -1,
"bPaginate": false
});
});
</script>
</head>
<body>
<header>
<div class="scroll-indicator" />
</header>
<base target="_parent" />
<table id="sortable" class="sortable">
<thead>
<tr>
<th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
</tr>
</thead>
<tbody>"""
self.src = src
self.movie_list = []
self.status_list = []
self.threads = []
def _worker(self, arg, index):
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
imdb = IMDb()
save_stdout = sys.stdout
sys.stdout = open('trash', 'w')
while True:
try:
query = imdb.search_movie(arg)
break
except http.client.IncompleteRead:
pass
sys.stdout = save_stdout
movie = query[0]
imdb.update(movie)
movie['ID'] = query[0].movieID
if 'genres' not in movie.keys():
movie['genres'] = 'N/A'
if 'rating' not in movie.keys():
movie['rating'] = 'N/A'
url = f'http://www.imdb.com/title/tt{movie["ID"]}'
title = str(movie['title'])
year = str(movie['year'])
rating = str(movie['rating'])
genres = ', '.join(movie['genres'])
status = str(self.status_list[index])
self.html += (
f'\n\t<tr><td>{index + 1}</td>'
f'<td><p hidden>{title}</p><a href="{url}" target="_blank">{title}</a></td>'
f'<td>{year}</td><td align="center">{rating}</td>'
f'<td>{genres}</td>'
f'<td align="center">{status}</td></tr>'
)
def gen(self):
""" Generate an HTML list based on input, using a threaded worker """
if not self.src:
self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
else:
self.src = Path(self.src)
# Open the movie list & split the columns
with open(self.src, 'r') as fp_handle:
tmp_names = tmp_status = fp_handle.read()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
self.movie_list = tmp_names.splitlines()
self.status_list = tmp_status.splitlines()
# Progress bar. Enough said
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
for idx, val in enumerate(self.movie_list):
thread = threading.Thread(target=self._worker, args=(val, idx,))
self.threads.append(thread)
thread.start()
pbar.update(idx+1)
time.sleep(0.2)
if len(self.threads) % 16 == 0:
time.sleep(6)
for thread in self.threads:
thread.join()
def write(self, dst=None):
""" Write the HTML list to index.html """
if not dst:
dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
else:
dst = Path(dst)
# Just a fancy scrollbar for the html
scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
self.html += ('\n\t</tbody>\n</table>\n' +
'\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
' by ' + sys.argv[0] + scroll + '</body>\n</html>')
with open(dst, 'wb') as fp_handle:
fp_handle.write(self.html.encode('utf8'))
def main():
""" Default run """
mlist = MovieList()
mlist.gen()
mlist.write()
if __name__ == "__main__":
main()

View File

@@ -115,7 +115,7 @@ Star Wars: Episode IV - A New Hope (1977) [DONE]
Star Wars: Episode V - The Empire Strikes Back (1980) [DONE] Star Wars: Episode V - The Empire Strikes Back (1980) [DONE]
Star Wars: Episode VI - Return of the Jedi (1983) [DONE] Star Wars: Episode VI - Return of the Jedi (1983) [DONE]
Star Wars: Episode: The Force Awakens (2015) [DONE] Star Wars: Episode: The Force Awakens (2015) [DONE]
Star Wars: Rogue One (2016) [DONE] Rogue One: A Star Wars Story (2016) [DONE]
Men in Black (1997) [DONE] Men in Black (1997) [DONE]
Men in Black II (2002) [DONE] Men in Black II (2002) [DONE]
Men in Black 3 (2012) [DONE] Men in Black 3 (2012) [DONE]
@@ -192,6 +192,7 @@ Identity (2003) [DONE]
Turist (Force Majeure) (2014) [DONE] Turist (Force Majeure) (2014) [DONE]
Gone Girl (2014) [DONE] Gone Girl (2014) [DONE]
The Equalizer (2014) [DONE] The Equalizer (2014) [DONE]
The Equalizer 2 (2018) [DONE]
Desert Flower (2009) [DONE] Desert Flower (2009) [DONE]
I Origins (2014) [DONE] I Origins (2014) [DONE]
Whiplash (2014) [DONE] Whiplash (2014) [DONE]
@@ -246,7 +247,7 @@ Now You See Me (2013) [DONE]
Up (2009) [DONE] Up (2009) [DONE]
The Shallows (2016) [DONE] The Shallows (2016) [DONE]
Boychoir (2014) [DONE] Boychoir (2014) [DONE]
Same Kind of Different as Me (2017) [*] Same Kind of Different as Me (2017) [DONE]
Allied (2016) [DONE] Allied (2016) [DONE]
Mad Max: Fury Road (2015) [DONE] Mad Max: Fury Road (2015) [DONE]
Blood Father (2016) [DONE] Blood Father (2016) [DONE]
@@ -287,7 +288,7 @@ Gifted (2017) [DONE]
Spider-Man: Homecoming (2017) [DONE] Spider-Man: Homecoming (2017) [DONE]
Rememory (2017) [DONE] Rememory (2017) [DONE]
Wristcutters: A Love Story (2006) [DONE] Wristcutters: A Love Story (2006) [DONE]
Bohemian Rhapsody (2018) [*] Bohemian Rhapsody (2018) [DONE]
Killer's Bodyguard (2017) [*] Killer's Bodyguard (2017) [*]
Danny Collins (2015) [DONE] Danny Collins (2015) [DONE]
Annabelle: Creation (2017) [DONE] Annabelle: Creation (2017) [DONE]
@@ -304,3 +305,38 @@ Get Out (2017) [DONE]
Orphan (2009) [DONE] Orphan (2009) [DONE]
Upgrade (2018) [DONE] Upgrade (2018) [DONE]
Dangal (2016) [DONE] Dangal (2016) [DONE]
BlacKkKlansman (2018) [DONE]
The Shape of Water (2017) [DONE]
What Women Want (2000) [DONE]
What's Love Got to Do with It (1993) [DONE]
Spotlight (2015) [DONE]
First Man (2018) [DONE]
Halloween (2018) [*]
Catch-22 (1970) [*]
The Nun (2018) [DONE]
Into The White (2012) [DONE]
Trash (2014) [DONE]
First Man (2018) [DONE]
Full Metal Jacket (1987) [*]
Woman at War (2018) [*]
Roman J. Israel, Esq. (2017) [DONE]
Venom (2018) [DONE]
Moby Dick (1998) [DONE]
The Help (2011) [DONE]
Hidden Figures (2016) [DONE]
The Guilty (2018) [DONE]
Green Book (2018) [DONE]
The Road Within (2014) [DONE]
King of Thieves (2018) [DONE]
The House That Jack Built (2018) [DONE]
Captain Marvel (2019) [DONE]
Miss Sloane (2016) [DONE]
The Bra (2018) [DONE]
The Mule (2018) [DONE]
Escape Room (2019) [DONE]
Man on a Ledge (2012) [DONE]
The Highwaymen (2019) [DONE]
Cold War (2018) [*]
Cold Pursuit (2019) [DONE]
Ad Astra (2019) [*]
Storm Boy (2019) [DONE]