update movie_list script and list

This commit is contained in:
2019-09-29 14:16:45 +03:00
parent 391ae02488
commit aac410f1e1
2 changed files with 180 additions and 104 deletions

View File

@@ -1,105 +1,145 @@
#!/usr/bin/env python
from imdb import IMDb
import os, re, threading, time, sys, subprocess
from time import gmtime, strftime
path = os.path.dirname(sys.argv[0])
ia = IMDb()
movie_list = []
status_list = []
threads = []
query = []
html = """<html>
<head>
<title>My Movie List</title>
<link rel="stylesheet" type="text/css" href="style.css">
<script src="../jquery-3.1.0.min.js"></script>
<script src="jquery.dataTables.min.js"></script>
<script>
$(document).ready(function(){
$('#sortable').DataTable({
"pageLength": -1,
"bPaginate": false
});
});
</script>
</head>
<body>
<header>
<div class="scroll-indicator" />
</header>
<base target="_parent" />
<table id="sortable" class="sortable">
<thead>
<tr><th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th></tr>
</thead>
<tbody>"""
# Open the movie list & split the columns
file_ = open(path + '/movie_list', 'r')
tmp_names = tmp_status = file_.read()
file_.close()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
movie_list = tmp_names.splitlines()
status_list = tmp_status.splitlines()
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
def worker(arg, index):
save_stdout = sys.stdout
sys.stdout = open('trash', 'w')
query = ia.search_movie(arg)
sys.stdout = save_stdout
movie = query[0]
ia.update(movie)
movie['ID'] = query[0].movieID
if 'genres' not in movie.keys():
movie['genres'] = 'N/A'
if 'rating' not in movie.keys():
movie['rating'] = 'N/A'
global html
html = (html +
'\n\t<tr><td>' + str(index+1) + '</td><td><p hidden>' + movie['title'] + '</p>' +
'<a href="http://www.imdb.com/title/tt' + movie['ID'] + '" target="_blank">' +
str(movie['title']) +
'</a></td><td>' +
str(movie['year']) +
'</td><td align="center">' +
str(movie['rating']) +
'</td><td>' +
re.sub(r'\[|\]|\'', '', str(movie['genres'])) +
'</td><td align="center">' +
str(status_list[index]) +
'</td></tr>')
# Progress bar. Enough said
import progressbar
bar = progressbar.ProgressBar(max_value=len(movie_list))
for idx, val in enumerate(movie_list):
t = threading.Thread(target=worker, args=(val, idx,))
threads.append(t)
t.start()
bar.update(idx+1)
time.sleep(0.2)
if len(threads)%16 == 0:
time.sleep(6)
for x in threads:
x.join()
# Just a fancy scrollbar for the html
scroll = """
<script type="text/javascript" src="scroll-indicator.js"></script>
"""
html += ('\n\t</tbody>\n</table>\n' +
'\nGenerated on: ' + strftime('%Y-%m-%d %H:%M:%S', gmtime()) +
' by ' + sys.argv[0] + scroll + '</body>\n</html>')
file_ = open(path + '/index.html', 'wb')
file_.write(html.encode('utf8'))
file_.close()
1. Import a movie_list txt file
2. Query IMDb for each entry, retrieving actual movie name, rating and genres
3. Generate an HTML table from the IMDb data
4. Store the HTML in index.html
"""
import os
import re
import time
import sys
import http.client
import threading
from pathlib import Path
import progressbar
from imdb import IMDb
class MovieList:
""" Class to generate a movie list HTML table """
def __init__(self, src=None):
self.html = """<html>
<head>
<title>My Movie List</title>
<link rel="stylesheet" type="text/css" href="style.css">
<script src="../jquery-3.1.0.min.js"></script>
<script src="jquery.dataTables.min.js"></script>
<script>
$(document).ready(function(){
$('#sortable').DataTable({
"pageLength": -1,
"bPaginate": false
});
});
</script>
</head>
<body>
<header>
<div class="scroll-indicator" />
</header>
<base target="_parent" />
<table id="sortable" class="sortable">
<thead>
<tr>
<th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
</tr>
</thead>
<tbody>"""
self.src = src
self.movie_list = []
self.status_list = []
self.threads = []
def _worker(self, arg, index):
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
imdb = IMDb()
save_stdout = sys.stdout
sys.stdout = open('trash', 'w')
while True:
try:
query = imdb.search_movie(arg)
break
except http.client.IncompleteRead:
pass
sys.stdout = save_stdout
movie = query[0]
imdb.update(movie)
movie['ID'] = query[0].movieID
if 'genres' not in movie.keys():
movie['genres'] = 'N/A'
if 'rating' not in movie.keys():
movie['rating'] = 'N/A'
url = f'http://www.imdb.com/title/tt{movie["ID"]}'
title = str(movie['title'])
year = str(movie['year'])
rating = str(movie['rating'])
genres = ', '.join(movie['genres'])
status = str(self.status_list[index])
self.html += (
f'\n\t<tr><td>{index + 1}</td>'
f'<td><p hidden>{title}</p><a href="{url}" target="_blank">{title}</a></td>'
f'<td>{year}</td><td align="center">{rating}</td>'
f'<td>{genres}</td>'
f'<td align="center">{status}</td></tr>'
)
def gen(self):
""" Generate an HTML list based on input, using a threaded worker """
if not self.src:
self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
else:
self.src = Path(self.src)
# Open the movie list & split the columns
with open(self.src, 'r') as fp_handle:
tmp_names = tmp_status = fp_handle.read()
tmp_names = re.sub(r'\).*', ')', tmp_names)
tmp_status = re.sub(r'\]|.*\[', '', tmp_status)
self.movie_list = tmp_names.splitlines()
self.status_list = tmp_status.splitlines()
# Progress bar. Enough said
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
for idx, val in enumerate(self.movie_list):
thread = threading.Thread(target=self._worker, args=(val, idx,))
self.threads.append(thread)
thread.start()
pbar.update(idx+1)
time.sleep(0.2)
if len(self.threads) % 16 == 0:
time.sleep(6)
for thread in self.threads:
thread.join()
def write(self, dst=None):
""" Write the HTML list to index.html """
if not dst:
dst = Path(os.path.dirname(sys.argv[0])) / 'index.html'
else:
dst = Path(dst)
# Just a fancy scrollbar for the html
scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
self.html += ('\n\t</tbody>\n</table>\n' +
'\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
' by ' + sys.argv[0] + scroll + '</body>\n</html>')
with open(dst, 'wb') as fp_handle:
fp_handle.write(self.html.encode('utf8'))
def main():
""" Default run """
mlist = MovieList()
mlist.gen()
mlist.write()
if __name__ == "__main__":
main()