Improve movie list generator, add more movies

Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
This commit is contained in:
2022-12-07 23:36:36 +02:00
parent 4bfc45c100
commit 3e7dafb4b2
2 changed files with 62 additions and 36 deletions

View File

@@ -12,9 +12,9 @@ import sys
import time import time
import threading import threading
from pathlib import Path from pathlib import Path
from http.client import IncompleteRead
import progressbar import progressbar
from imdb import IMDb from imdb import IMDb
from imdb._exceptions import IMDbParserError
class MovieList: class MovieList:
@@ -59,27 +59,35 @@ class MovieList:
# Scan IMDb for a given movie and append it to the html # Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink # This collects rating, genres, official name and a hyperlink
imdb = IMDb() imdb = IMDb()
save_stdout = sys.stdout
with open(os.devnull, 'wb') as sys.stdout:
while True: while True:
try: try:
query = imdb.search_movie(f'{arg["title"]} {arg["year"]}') query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
break break
except IncompleteRead: except IMDbParserError as exc:
pass query = []
sys.stdout = save_stdout #print(exc)
break
except Exception as exc:
#print(f'error: {exc.__class__.__name__}: {arg["title"]}')
time.sleep(10)
movie = None movie = None
for entry in query: for entry in query:
has_minimum_keys = True #print(entry)
for key in ['kind', 'year', 'title']: imdb.update(entry)
if key not in entry.keys(): # in case any of these keys is missing in the query, continue
has_minimum_keys = False if not all(key in entry.keys() for key in ['kind', 'year', 'title']):
if not has_minimum_keys: #print(f'missing key {entry.keys()}')
continue
if arg['status'] == 'DONE' and 'rating' not in entry.keys():
continue continue
# Try to eliminate episode results # Try to eliminate episode results
if [i for i in entry.keys() if 'episode' in i.lower()] or \ # Must not have "episode" in the object keys
'episode' in entry['title'].lower(): # Must not have "episode" in the query title key,
# unless "episode" is in the query search string
if [i for i in entry.keys() if 'episode' in i.lower()] or (
'episode' in entry['title'].lower() and \
'episode' not in arg['title'].lower()):
continue continue
if entry['kind'].lower() == arg['kind'].lower(): if entry['kind'].lower() == arg['kind'].lower():
movie = entry movie = entry
@@ -91,7 +99,6 @@ class MovieList:
'year': arg['year'], 'year': arg['year'],
'dummy': None 'dummy': None
} }
if 'genres' not in movie.keys(): if 'genres' not in movie.keys():
movie['genres'] = ['N/A'] movie['genres'] = ['N/A']
if 'rating' not in movie.keys(): if 'rating' not in movie.keys():
@@ -101,7 +108,7 @@ class MovieList:
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>' f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
self.html_table[index] = ( self.html_table[index] = (
f'\n{" "*8}<tr><td>{index + 1}</td>' f'\n{" "*8}<tr><td>{index + 1}</td>'
f'<td><p hidden>{movie["title"]}</p>{html_title_td}</td>' f'<td><p hidden>{arg["title"]}</p>{html_title_td}</td>'
f'<td>{movie["year"]}</td><td align="center">{movie["rating"]}</td>' f'<td>{movie["year"]}</td><td align="center">{movie["rating"]}</td>'
f'<td>{", ".join(movie["genres"])}</td>' f'<td>{", ".join(movie["genres"])}</td>'
f'<td align="center">{arg["status"]}</td></tr>' f'<td align="center">{arg["status"]}</td></tr>'
@@ -124,7 +131,7 @@ class MovieList:
for raw_line in mlist_raw.splitlines(): for raw_line in mlist_raw.splitlines():
self.movie_list.update({ self.movie_list.update({
len(self.movie_list): { len(self.movie_list): {
'title': raw_line.split('(', 1)[0].strip(), 'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie', 'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
'year': raw_line[raw_line.find('(')+1:raw_line.find(')')], 'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
'status': raw_line[raw_line.find('[')+1:raw_line.find(']')], 'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
@@ -135,28 +142,46 @@ class MovieList:
# Progress bar. Enough said # Progress bar. Enough said
pbar = progressbar.ProgressBar(max_value=len(self.movie_list)) pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
for idx, movie in self.movie_list.items(): for idx, movie in self.movie_list.items():
match = [html_row for html_row in self.prev_html if movie['title'] in html_row] match = [html_row for html_row in self.prev_html if movie['title'] in html_row and 'N/A' not in html_row]
if match: if match:
# Update movies as DONE in case of change # Update movies as DONE in case of change
match = match[0].replace('*', movie['status']) match = match[0].replace('*', movie['status'])
# Directly insert the current HTML line from the older output # Directly insert the current HTML line from the older output
self.html_table[idx] = \ self.html_table[idx] = \
f'\n{" "*8}<tr><td>{idx + 1}</td>{match[match.find("</td>") + 5:]}' f'\n{" "*8}<tr><td>{idx + 1}</td>{match[match.find("</td>") + 5:]}'
pbar.update(idx + 1) pbar.increment()
else: else:
thread = threading.Thread(target=self._worker, args=(movie, idx)) thread = threading.Thread(target=self._worker, args=(movie, idx))
self.threads.append(thread) self.threads.append(thread)
thread.start()
pbar.update(idx+1)
time.sleep(0.2)
if len(self.threads) % 16 == 0:
time.sleep(6)
for thread in self.threads: max_threads = 10
thread.join() while self.threads:
threads_alive = self.get_alive_threads()
threads_to_be_started = [i for i in self.threads if i not in threads_alive]
for idx in range(max_threads if max_threads < len(threads_to_be_started) else len(threads_to_be_started)):
threads_to_be_started[idx].start()
pbar.increment()
time.sleep(1)
time.sleep(1)
self.delete_finished_threads()
self.html += ''.join(self.html_table) self.html += ''.join(self.html_table)
return True return True
def delete_finished_threads(self):
for idx, thread in enumerate(self.threads):
if not thread.is_alive() and thread._started.is_set():
thread.join()
self.threads[idx] = None
self.threads = list(filter(lambda a: a is not None, self.threads))
def get_alive_threads(self):
threads = []
for thread in self.threads:
if thread.is_alive() or thread._started.is_set():
threads.append(thread)
return threads
def write(self, dst=None): def write(self, dst=None):
""" Write the HTML list to index.html """ """ Write the HTML list to index.html """
out_path = dst if dst else self.dst out_path = dst if dst else self.dst

View File

@@ -18,7 +18,7 @@ Mary and Max (2009) [DONE]
Lucky Number Slevin (2006) [DONE] Lucky Number Slevin (2006) [DONE]
The Road (2009) [DONE] The Road (2009) [DONE]
Magnolia (1999) [DONE] Magnolia (1999) [DONE]
Il Mostro (1994) [DONE] The Monster (1994) [DONE]
The Tiger and the Snow (2005) [DONE] The Tiger and the Snow (2005) [DONE]
Lucy (2014) [DONE] Lucy (2014) [DONE]
End of Watch (2012) [DONE] End of Watch (2012) [DONE]
@@ -115,7 +115,7 @@ Star Wars: Episode III - Revenge of the Sith (2005) [DONE]
Star Wars: Episode IV - A New Hope (1977) [DONE] Star Wars: Episode IV - A New Hope (1977) [DONE]
Star Wars: Episode V - The Empire Strikes Back (1980) [DONE] Star Wars: Episode V - The Empire Strikes Back (1980) [DONE]
Star Wars: Episode VI - Return of the Jedi (1983) [DONE] Star Wars: Episode VI - Return of the Jedi (1983) [DONE]
Star Wars: Episode: The Force Awakens (2015) [DONE] Star Wars: Episode VII: The Force Awakens (2015) [DONE]
Rogue One: A Star Wars Story (2016) [DONE] Rogue One: A Star Wars Story (2016) [DONE]
Men in Black (1997) [DONE] Men in Black (1997) [DONE]
Men in Black II (2002) [DONE] Men in Black II (2002) [DONE]
@@ -160,7 +160,7 @@ Unforgiven (1992) [DONE]
Scarface (1983) [DONE] Scarface (1983) [DONE]
Die Hard (1988) [DONE] Die Hard (1988) [DONE]
Die Hard 2 (1990) [DONE] Die Hard 2 (1990) [DONE]
Die Hard: With a Vengeance (1995) [DONE] Die Hard with a Vengeance (1995) [DONE]
Live Free or Die Hard (2007) [DONE] Live Free or Die Hard (2007) [DONE]
A Good Day to Die Hard (2013) [DONE] A Good Day to Die Hard (2013) [DONE]
Die Another Day (2002) [DONE] Die Another Day (2002) [DONE]
@@ -184,13 +184,13 @@ Pan (2015) [DONE]
The Cobbler (2014) [DONE] The Cobbler (2014) [DONE]
The Conjuring (2013) [DONE] The Conjuring (2013) [DONE]
Starred Up (2013) [DONE] Starred Up (2013) [DONE]
Kraftidioten (2014) [DONE] In Order of Disappearance (2014) [DONE]
The Imitation Game (2014) [DONE] The Imitation Game (2014) [DONE]
Begin Again (2013) [DONE] Begin Again (2013) [DONE]
A Walk Among the Tombstones (2014) [DONE] A Walk Among the Tombstones (2014) [DONE]
Detachment (2011) [DONE] Detachment (2011) [DONE]
Identity (2003) [DONE] Identity (2003) [DONE]
Turist (Force Majeure) (2014) [DONE] Force Majeure (2014) [DONE]
Gone Girl (2014) [DONE] Gone Girl (2014) [DONE]
The Equalizer (2014) [DONE] The Equalizer (2014) [DONE]
The Equalizer 2 (2018) [DONE] The Equalizer 2 (2018) [DONE]
@@ -351,7 +351,6 @@ Togo (2019) [*]
Knives Out (2019) [DONE] Knives Out (2019) [DONE]
Children of Men (2006) [DONE] Children of Men (2006) [DONE]
Bogowie (2014) [DONE] Bogowie (2014) [DONE]
Spiral (2021) [*]
Ip Man 4 (2019) [DONE] Ip Man 4 (2019) [DONE]
The Collini Case (2019) [DONE] The Collini Case (2019) [DONE]
The Traitor (2019) [DONE] The Traitor (2019) [DONE]
@@ -397,7 +396,7 @@ Arrival (2016) [DONE]
Duel (1971) [DONE] Duel (1971) [DONE]
The Courier (2020) [DONE] The Courier (2020) [DONE]
The Bone Collector (1999) [DONE] The Bone Collector (1999) [DONE]
Oslo (2021) [DONE] Oslo <TV Movie> (2021) [DONE]
Wrath of Man (2021) [DONE] Wrath of Man (2021) [DONE]
Nobody (2021) [DONE] Nobody (2021) [DONE]
Голата истина за група Жигули (2021) [DONE] Голата истина за група Жигули (2021) [DONE]
@@ -411,3 +410,5 @@ Boss Level (2020) [DONE]
Boite Noire (2021) [DONE] Boite Noire (2021) [DONE]
Fresh (2022) [DONE] Fresh (2022) [DONE]
Death on the Nile (2022) [DONE] Death on the Nile (2022) [DONE]
All Quiet on the Western Front (2022) [DONE]
Triangle of Sadness (2022) [DONE]