From b2b80893209244e4256ff3560683e8b1d59897be Mon Sep 17 00:00:00 2001 From: Bogomil Vasilev Date: Mon, 19 Jan 2026 08:36:09 +0200 Subject: [PATCH] movielist: update to newer UI Signed-off-by: Bogomil Vasilev --- movie_list/index.py | 433 ++++++++++++++++++++++++++++++++++++------ movie_list/movie_list | 55 +++++- 2 files changed, 432 insertions(+), 56 deletions(-) diff --git a/movie_list/index.py b/movie_list/index.py index 1d26fe3..dfe1144 100755 --- a/movie_list/index.py +++ b/movie_list/index.py @@ -14,40 +14,300 @@ import threading from pathlib import Path import progressbar from imdb import IMDb -from imdb._exceptions import IMDbParserError +from imdb._exceptions import IMDbParserError, IMDbDataAccessError class MovieList: """ Class to generate a movie list HTML table """ def __init__(self, src=None, dst=None): self.prev_html = [] - self.html = """ - - My Movie List - - - - + + + - - -
-
-
+ }); + + + - - - - - - - """ +
+

🎬 My Movie Collection

+
Index Title Year IMDb Rating Genre Status
+ + + + + + + + + + + """ self.src = src self.dst = Path(dst) if dst else Path(os.path.dirname(sys.argv[0])) / 'index.html' self.movie_list = [] @@ -59,32 +319,41 @@ class MovieList: # Scan IMDb for a given movie and append it to the html # This collects rating, genres, official name and a hyperlink imdb = IMDb() + first_run = True while True: + if not first_run: + time.sleep(10) + else: + first_run = False try: query = imdb.search_movie(f'{arg["title"]} {arg["year"]}') break - except IMDbParserError as exc: + except IMDbDataAccessError as imdb_data_exc: + exc = str(imdb_data_exc) + if '503' in exc: + sys.stderr.write('503 - Service Unavailable, retrying...') + elif '403' in exc: + sys.stderr.write('403 - Forbidden, retrying...\n') + query = [] + time.sleep(10) + except IMDbParserError as imdb_parser_exc: query = [] - #print(exc) break except Exception as exc: - #print(f'error: {exc.__class__.__name__}: {arg["title"]}') time.sleep(10) movie = None for entry in query: - #print(entry) - imdb.update(entry) + try: + imdb.update(entry) + except Exception as e: + sys.stderr.write('update err') # in case any of these keys is missing in the query, continue if not all(key in entry.keys() for key in ['kind', 'year', 'title']): - #print(f'missing key {entry.keys()}') continue if arg['status'] == 'DONE' and 'rating' not in entry.keys(): continue # Try to eliminate episode results - # Must not have "episode" in the object keys - # Must not have "episode" in the query title key, - # unless "episode" is in the query search string if [i for i in entry.keys() if 'episode' in i.lower()] or ( 'episode' in entry['title'].lower() and \ 'episode' not in arg['title'].lower()): @@ -106,12 +375,16 @@ class MovieList: html_title_td = movie['title'] if 'dummy' in movie.keys() else \ f'{movie["title"]}' + self.html_table[index] = ( - f'\n{" "*8}' - f'' - f'' - f'' - f'' + f'\n ' + f'' + f'' + f'' + f'' + f'' + f'' + f'' ) def gen(self): @@ -129,6 +402,9 @@ class MovieList: with open(self.src, 'r', encoding='utf-8') as fp_handle: mlist_raw = fp_handle.read() for raw_line in mlist_raw.splitlines(): + # In case the line is empty + if not raw_line: + continue self.movie_list.update({ len(self.movie_list): { 'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1], @@ -139,16 +415,25 @@ class MovieList: }) self.html_table = [None] * len(self.movie_list) - # Progress bar. Enough said + # Progress bar pbar = progressbar.ProgressBar(max_value=len(self.movie_list)) for idx, movie in self.movie_list.items(): - match = [html_row for html_row in self.prev_html if movie['title'] in html_row and 'N/A' not in html_row] + # More precise matching - look for the hidden

tag with exact title + match = [html_row for html_row in self.prev_html + if f'

' in html_row + and 'N/A' not in html_row] if match: - # Update movies as DONE in case of change - match = match[0].replace('*', movie['status']) - # Directly insert the current HTML line from the older output - self.html_table[idx] = \ - f'\n{" "*8}{match[match.find("") + 5:]}' + # Update the index and status from the cached row + match_str = match[0] + # Replace the status (* -> DONE or vice versa) + match_str = match_str.replace('*', movie['status']).replace('DONE', movie['status']) + # Update the index number + if '', 1)[1] if '' in match_str else match_str + self.html_table[idx] = f'\n {after_index}' + else: + self.html_table[idx] = match_str pbar.increment() else: thread = threading.Thread(target=self._worker, args=(movie, idx)) @@ -161,11 +446,19 @@ class MovieList: for idx in range(max_threads if max_threads < len(threads_to_be_started) else len(threads_to_be_started)): threads_to_be_started[idx].start() pbar.increment() - time.sleep(1) - time.sleep(1) + time.sleep(2) + time.sleep(2) self.delete_finished_threads() self.html += ''.join(self.html_table) + + # Deduplicate entries before writing + num_entries = self.deduplicate_html() + print(f"\nDeduplicated to {num_entries} unique entries") + + self.html = self.html.split('')[0] # Remove everything after tbody if it exists + self.html += ''.join(self.html_table) + return True def delete_finished_threads(self): @@ -185,11 +478,14 @@ class MovieList: def write(self, dst=None): """ Write the HTML list to index.html """ out_path = dst if dst else self.dst - # Just a fancy scrollbar for the html - scroll = '' - self.html += ('\n\t\n
#TitleYearRatingGenreStatus
{index + 1}{html_title_td}{movie["year"]}{movie["rating"]}{", ".join(movie["genres"])}{arg["status"]}
{index + 1}{html_title_td}{movie["year"]}{movie["rating"]}{", ".join(movie["genres"])}{arg["status"]}
{idx + 1}' in match_str: + # Extract everything after the index cell + after_index = match_str.split('
{idx + 1}
\n' + - '\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) + - ' by ' + sys.argv[0] + scroll + '\n') + timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) + self.html += f''' + + + +
Generated {timestamp} UTC
+ +''' with open(out_path, 'wb') as fp_handle: fp_handle.write(self.html.encode('utf8')) @@ -199,6 +495,35 @@ class MovieList: with open(self.dst, 'rb') as fp_handle: self.prev_html = fp_handle.read().decode('utf8').split('\n') + def deduplicate_html(self): + """ Remove duplicate entries from html_table based on movie titles """ + seen_titles = set() + deduplicated = [] + + for idx, row in enumerate(self.html_table): + if row is None: + continue + + # Extract the hidden title from the row + if '' in row: + start = row.find('', start) + title = row[start:end] + + if title not in seen_titles: + seen_titles.add(title) + deduplicated.append(row) + else: + # Skip duplicate + continue + else: + # If we can't find the hidden title, keep the row anyway + deduplicated.append(row) + + # Update html_table with deduplicated content + self.html_table = deduplicated + return len(self.html_table) + def main(): """ Default run """ diff --git a/movie_list/movie_list b/movie_list/movie_list index d139ae5..8ffefe7 100644 --- a/movie_list/movie_list +++ b/movie_list/movie_list @@ -319,7 +319,6 @@ Into The White (2012) [DONE] Trash (2014) [DONE] First Man (2018) [DONE] Full Metal Jacket (1987) [*] -Woman at War (2018) [*] Roman J. Israel, Esq. (2017) [DONE] Venom (2018) [DONE] Moby Dick (1998) [DONE] @@ -381,7 +380,6 @@ Lean on Me (1989) [DONE] Let Him Go (2020) [DONE] Super Lopez (2018) [DONE] Midnight Special (2016) [DONE] -The Father (2020) [DONE] Another Round (2020) [DONE] Promising Young Woman (2020) [DONE] La Daronne (2020) [DONE] @@ -412,3 +410,56 @@ Fresh (2022) [DONE] Death on the Nile (2022) [DONE] All Quiet on the Western Front (2022) [DONE] Triangle of Sadness (2022) [DONE] +Jerry and Marge Go Large (2022) [DONE] +Fighting With My Family (2019) [DONE] +The Woman King (2022) [DONE] +Capernaum (2018) [DONE] +The Menu (2022) [DONE] +Rye Lane (2023) [DONE] +Sisu (2022) [DONE] +Zodiac (2007) [DONE] +Legend (2015) [DONE] +Luther The Fallen Sun (2023) [DONE] +Mr. Morgan's Last Love (2013) [DONE] +Sweet November (2001) [DONE] +The Debt (2010) [DONE] +Don Jon (2013) [DONE] +Nefarious (2023) [DONE] +Oppenhaimer (2023) [DONE] +No Hard Feelings (2023) [DONE] +Powder (1995) [DONE] +The Life of David Gale (2003) [DONE] +Crazy, Stupid, Love. (2011) [DONE] +Before We Go (2014) [DONE] +The Banshees of Inisherin (2022) [DONE] +Three Thousand Years of Longing (2022) [DONE] +Hardcore Henry (2015) [DONE] +Chalga (2023) [DONE] +Dumb Money (2023) [DONE] +Society of the Snow (2023) [DONE] +The Aviator's Wife (1981) [*] +Past Lives (2023) [DONE] +Comandante (2023) [DONE] +The Lost Boys (1987) [DONE] +Blade Runner (2049) [DONE] +The Lunchbox (2013) [DONE] +Half Nelson (2006) [DONE] +The Dirt (2019) [DONE] +Manchester by the Sea (2016) [DONE] +The Perks of Being a Wallflower (2012) [DONE] +The Ministry of Ungentlemanly Warfare (2024) [DONE] +Wings of Desire (1987) [*] +Cadillac Records (2008) [DONE] +The Substance (2024) [DONE] +Don't Look Up (2021) [DONE] +La Vie en Rose (2007) [DONE] +The Peanut Butter Falcon (2019) [DONE] +One Battle After Another (2025) [DONE] +The Master and Margarita (2024) [DONE] +Woman at War (2018) [DONE] +Good Fortune (2025) [DONE] +The Age of Disclosure (2025) [*] +Evil Does Not Exist (2023) [DONE] +I.S.S. (2023) [*] +Black Cat, White Cat (1998) [DONE] +Nuremberg (2025) [DONE]