Improve movie list generator, add more movies

Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
This commit is contained in:
2022-12-07 23:36:36 +02:00
parent 4bfc45c100
commit 3e7dafb4b2
2 changed files with 62 additions and 36 deletions

View File

@@ -12,9 +12,9 @@ import sys
import time
import threading
from pathlib import Path
from http.client import IncompleteRead
import progressbar
from imdb import IMDb
from imdb._exceptions import IMDbParserError
class MovieList:
@@ -59,27 +59,35 @@ class MovieList:
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
imdb = IMDb()
save_stdout = sys.stdout
with open(os.devnull, 'wb') as sys.stdout:
while True:
try:
query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
break
except IncompleteRead:
pass
sys.stdout = save_stdout
while True:
try:
query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
break
except IMDbParserError as exc:
query = []
#print(exc)
break
except Exception as exc:
#print(f'error: {exc.__class__.__name__}: {arg["title"]}')
time.sleep(10)
movie = None
for entry in query:
has_minimum_keys = True
for key in ['kind', 'year', 'title']:
if key not in entry.keys():
has_minimum_keys = False
if not has_minimum_keys:
#print(entry)
imdb.update(entry)
# in case any of these keys is missing in the query, continue
if not all(key in entry.keys() for key in ['kind', 'year', 'title']):
#print(f'missing key {entry.keys()}')
continue
if arg['status'] == 'DONE' and 'rating' not in entry.keys():
continue
# Try to eliminate episode results
if [i for i in entry.keys() if 'episode' in i.lower()] or \
'episode' in entry['title'].lower():
# Must not have "episode" in the object keys
# Must not have "episode" in the query title key,
# unless "episode" is in the query search string
if [i for i in entry.keys() if 'episode' in i.lower()] or (
'episode' in entry['title'].lower() and \
'episode' not in arg['title'].lower()):
continue
if entry['kind'].lower() == arg['kind'].lower():
movie = entry
@@ -91,7 +99,6 @@ class MovieList:
'year': arg['year'],
'dummy': None
}
if 'genres' not in movie.keys():
movie['genres'] = ['N/A']
if 'rating' not in movie.keys():
@@ -101,7 +108,7 @@ class MovieList:
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
self.html_table[index] = (
f'\n{" "*8}<tr><td>{index + 1}</td>'
f'<td><p hidden>{movie["title"]}</p>{html_title_td}</td>'
f'<td><p hidden>{arg["title"]}</p>{html_title_td}</td>'
f'<td>{movie["year"]}</td><td align="center">{movie["rating"]}</td>'
f'<td>{", ".join(movie["genres"])}</td>'
f'<td align="center">{arg["status"]}</td></tr>'
@@ -124,7 +131,7 @@ class MovieList:
for raw_line in mlist_raw.splitlines():
self.movie_list.update({
len(self.movie_list): {
'title': raw_line.split('(', 1)[0].strip(),
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
@@ -135,28 +142,46 @@ class MovieList:
# Progress bar. Enough said
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
for idx, movie in self.movie_list.items():
match = [html_row for html_row in self.prev_html if movie['title'] in html_row]
match = [html_row for html_row in self.prev_html if movie['title'] in html_row and 'N/A' not in html_row]
if match:
# Update movies as DONE in case of change
match = match[0].replace('*', movie['status'])
# Directly insert the current HTML line from the older output
self.html_table[idx] = \
f'\n{" "*8}<tr><td>{idx + 1}</td>{match[match.find("</td>") + 5:]}'
pbar.update(idx + 1)
pbar.increment()
else:
thread = threading.Thread(target=self._worker, args=(movie, idx))
self.threads.append(thread)
thread.start()
pbar.update(idx+1)
time.sleep(0.2)
if len(self.threads) % 16 == 0:
time.sleep(6)
for thread in self.threads:
thread.join()
max_threads = 10
while self.threads:
threads_alive = self.get_alive_threads()
threads_to_be_started = [i for i in self.threads if i not in threads_alive]
for idx in range(max_threads if max_threads < len(threads_to_be_started) else len(threads_to_be_started)):
threads_to_be_started[idx].start()
pbar.increment()
time.sleep(1)
time.sleep(1)
self.delete_finished_threads()
self.html += ''.join(self.html_table)
return True
def delete_finished_threads(self):
for idx, thread in enumerate(self.threads):
if not thread.is_alive() and thread._started.is_set():
thread.join()
self.threads[idx] = None
self.threads = list(filter(lambda a: a is not None, self.threads))
def get_alive_threads(self):
threads = []
for thread in self.threads:
if thread.is_alive() or thread._started.is_set():
threads.append(thread)
return threads
def write(self, dst=None):
""" Write the HTML list to index.html """
out_path = dst if dst else self.dst