From f7f4bf66e85b52efaa30dcfb009d41a7a75d08f1 Mon Sep 17 00:00:00 2001 From: Bogomil Vasilev Date: Thu, 19 Feb 2026 11:21:41 +0200 Subject: [PATCH] movielist: fix duplication of table entries Signed-off-by: Bogomil Vasilev --- movie_list/gen_movie_list.py | 125 +++++++++++++++++++++-------------- movie_list/movie_list | 1 + 2 files changed, 76 insertions(+), 50 deletions(-) diff --git a/movie_list/gen_movie_list.py b/movie_list/gen_movie_list.py index 5808df9..966d721 100755 --- a/movie_list/gen_movie_list.py +++ b/movie_list/gen_movie_list.py @@ -55,6 +55,7 @@ class MovieList: border: 1px solid rgba(255, 255, 255, 0.1); padding: 0.75rem; height: 100%; + min-height: 0; overflow: hidden; display: flex; flex-direction: column; @@ -70,14 +71,16 @@ class MovieList: .dataTables_wrapper { flex: 1; - overflow: hidden; + overflow: auto; display: flex; flex-direction: column; + min-height: 0; } .dataTables_filter { margin-bottom: 0.5rem; text-align: right; + flex-shrink: 0; } .dataTables_filter label { @@ -113,22 +116,19 @@ class MovieList: padding: 0.5rem 0; color: rgba(255, 255, 255, 0.6); font-size: 0.85rem; - } - - .dataTables_scroll { - flex: 1; - overflow: hidden; - display: flex; - flex-direction: column; - } - - .dataTables_scrollHead { flex-shrink: 0; } + .dataTables_scroll { + display: none; + } + + .dataTables_scrollHead { + display: none; + } + .dataTables_scrollBody { - flex: 1; - overflow-y: auto !important; + display: none; } .dataTables_scrollBody::-webkit-scrollbar { @@ -144,7 +144,20 @@ class MovieList: border-radius: 3px; } - table.dataTable.stripe tbody tr.odd, + .dataTables_wrapper::-webkit-scrollbar { + width: 6px; + } + + .dataTables_wrapper::-webkit-scrollbar-track { + background: rgba(255, 255, 255, 0.05); + } + + .dataTables_wrapper::-webkit-scrollbar-thumb { + background: rgba(255, 255, 255, 0.2); + border-radius: 3px; + } + + table.dataTable.stripe tbody tr.odd, table.dataTable.display tbody tr.odd, table.dataTable tbody tr { background: transparent !important; @@ -274,21 +287,23 @@ class MovieList: @@ -375,7 +390,7 @@ class MovieList: html_title_td = movie['title'] if 'dummy' in movie.keys() else \ f'{movie["title"]}' - + self.html_table[index] = ( f'\n ' f'{index + 1}' @@ -398,29 +413,42 @@ class MovieList: return False self.movie_list = {} + seen_titles = set() # Track unique titles + # Open the movie list & split the columns with open(self.src, 'r', encoding='utf-8') as fp_handle: mlist_raw = fp_handle.read() + idx = 0 for raw_line in mlist_raw.splitlines(): # In case the line is empty if not raw_line: continue + + title = raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1] + + # Skip if we've already seen this title + if title in seen_titles: + continue + + seen_titles.add(title) self.movie_list.update({ - len(self.movie_list): { - 'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1], + idx: { + 'title': title, 'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie', 'year': raw_line[raw_line.find('(')+1:raw_line.find(')')], 'status': raw_line[raw_line.find('[')+1:raw_line.find(']')], } }) + idx += 1 + self.html_table = [None] * len(self.movie_list) # Progress bar pbar = progressbar.ProgressBar(max_value=len(self.movie_list)) for idx, movie in self.movie_list.items(): # More precise matching - look for the hidden

tag with exact title - match = [html_row for html_row in self.prev_html - if f'

' in html_row + match = [html_row for html_row in self.prev_html + if f'' in html_row and 'N/A' not in html_row] if match: # Update the index and status from the cached row @@ -450,15 +478,7 @@ class MovieList: time.sleep(2) self.delete_finished_threads() - self.html += ''.join(self.html_table) - - # Deduplicate entries before writing - num_entries = self.deduplicate_html() - print(f"\nDeduplicated to {num_entries} unique entries") - - self.html = self.html.split('')[0] # Remove everything after tbody if it exists - self.html += ''.join(self.html_table) - + # Don't append to self.html here - we'll do it in write() return True def delete_finished_threads(self): @@ -479,37 +499,42 @@ class MovieList: """ Write the HTML list to index.html """ out_path = dst if dst else self.dst timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) - self.html += f''' + + # Build the final HTML - don't append, rebuild from scratch + final_html = self.html.split('')[0] + '' + final_html += ''.join([row for row in self.html_table if row is not None]) + final_html += f'''
Generated {timestamp} UTC
''' + with open(out_path, 'wb') as fp_handle: - fp_handle.write(self.html.encode('utf8')) + fp_handle.write(final_html.encode('utf8')) def read_prev_output(self): """ Import a previous HTML table """ if self.dst.exists(): with open(self.dst, 'rb') as fp_handle: self.prev_html = fp_handle.read().decode('utf8').split('\n') - + def deduplicate_html(self): """ Remove duplicate entries from html_table based on movie titles """ seen_titles = set() deduplicated = [] - + for idx, row in enumerate(self.html_table): if row is None: continue - + # Extract the hidden title from the row if '' in row: start = row.find('', start) title = row[start:end] - + if title not in seen_titles: seen_titles.add(title) deduplicated.append(row) @@ -519,7 +544,7 @@ class MovieList: else: # If we can't find the hidden title, keep the row anyway deduplicated.append(row) - + # Update html_table with deduplicated content self.html_table = deduplicated return len(self.html_table) diff --git a/movie_list/movie_list b/movie_list/movie_list index 3c387c5..b52d911 100644 --- a/movie_list/movie_list +++ b/movie_list/movie_list @@ -466,3 +466,4 @@ Poor Things (2023) [*] The French Dispatch (2021) [DONE] Sentimental Value (2025) [DONE] 28 Years Later: The Bone Temple (2026) [*] +Youth (2015) [DONE]