movielist: fix duplication of table entries

Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
This commit is contained in:
2026-02-19 11:21:41 +02:00
parent 1ca024226e
commit f7f4bf66e8
2 changed files with 76 additions and 50 deletions

View File

@@ -55,6 +55,7 @@ class MovieList:
border: 1px solid rgba(255, 255, 255, 0.1);
padding: 0.75rem;
height: 100%;
min-height: 0;
overflow: hidden;
display: flex;
flex-direction: column;
@@ -70,14 +71,16 @@ class MovieList:
.dataTables_wrapper {
flex: 1;
overflow: hidden;
overflow: auto;
display: flex;
flex-direction: column;
min-height: 0;
}
.dataTables_filter {
margin-bottom: 0.5rem;
text-align: right;
flex-shrink: 0;
}
.dataTables_filter label {
@@ -113,22 +116,19 @@ class MovieList:
padding: 0.5rem 0;
color: rgba(255, 255, 255, 0.6);
font-size: 0.85rem;
}
.dataTables_scroll {
flex: 1;
overflow: hidden;
display: flex;
flex-direction: column;
}
.dataTables_scrollHead {
flex-shrink: 0;
}
.dataTables_scroll {
display: none;
}
.dataTables_scrollHead {
display: none;
}
.dataTables_scrollBody {
flex: 1;
overflow-y: auto !important;
display: none;
}
.dataTables_scrollBody::-webkit-scrollbar {
@@ -144,7 +144,20 @@ class MovieList:
border-radius: 3px;
}
table.dataTable.stripe tbody tr.odd,
.dataTables_wrapper::-webkit-scrollbar {
width: 6px;
}
.dataTables_wrapper::-webkit-scrollbar-track {
background: rgba(255, 255, 255, 0.05);
}
.dataTables_wrapper::-webkit-scrollbar-thumb {
background: rgba(255, 255, 255, 0.2);
border-radius: 3px;
}
table.dataTable.stripe tbody tr.odd,
table.dataTable.display tbody tr.odd,
table.dataTable tbody tr {
background: transparent !important;
@@ -274,21 +287,23 @@ class MovieList:
</style>
<script>
$(document).ready(function(){
$('#sortable').DataTable({
"paging": false,
"info": true,
"searching": true,
"ordering": true,
"order": [[0, "asc"]],
"scrollY": "calc(100vh - 200px)",
"scrollCollapse": true,
"language": {
"search": "Search:",
"info": "Showing _TOTAL_ movies",
"infoEmpty": "No movies",
"infoFiltered": "(filtered from _MAX_)"
}
});
try {
$('#sortable').DataTable({
"paging": false,
"info": true,
"searching": true,
"ordering": true,
"order": [[0, "desc"]],
"language": {
"search": "Search:",
"info": "Showing _TOTAL_ movies",
"infoEmpty": "No movies",
"infoFiltered": "(filtered from _MAX_)"
}
});
} catch(e) {
console.error("DataTables init error:", e);
}
});
</script>
</head>
@@ -375,7 +390,7 @@ class MovieList:
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
self.html_table[index] = (
f'\n <tr>'
f'<td data-label="#">{index + 1}</td>'
@@ -398,29 +413,42 @@ class MovieList:
return False
self.movie_list = {}
seen_titles = set() # Track unique titles
# Open the movie list & split the columns
with open(self.src, 'r', encoding='utf-8') as fp_handle:
mlist_raw = fp_handle.read()
idx = 0
for raw_line in mlist_raw.splitlines():
# In case the line is empty
if not raw_line:
continue
title = raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1]
# Skip if we've already seen this title
if title in seen_titles:
continue
seen_titles.add(title)
self.movie_list.update({
len(self.movie_list): {
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
idx: {
'title': title,
'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
}
})
idx += 1
self.html_table = [None] * len(self.movie_list)
# Progress bar
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
for idx, movie in self.movie_list.items():
# More precise matching - look for the hidden <p> tag with exact title
match = [html_row for html_row in self.prev_html
if f'<p hidden>{movie["title"]}</p>' in html_row
match = [html_row for html_row in self.prev_html
if f'<p hidden>{movie["title"]}</p>' in html_row
and 'N/A' not in html_row]
if match:
# Update the index and status from the cached row
@@ -450,15 +478,7 @@ class MovieList:
time.sleep(2)
self.delete_finished_threads()
self.html += ''.join(self.html_table)
# Deduplicate entries before writing
num_entries = self.deduplicate_html()
print(f"\nDeduplicated to {num_entries} unique entries")
self.html = self.html.split('</tbody>')[0] # Remove everything after tbody if it exists
self.html += ''.join(self.html_table)
# Don't append to self.html here - we'll do it in write()
return True
def delete_finished_threads(self):
@@ -479,37 +499,42 @@ class MovieList:
""" Write the HTML list to index.html """
out_path = dst if dst else self.dst
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
self.html += f'''
# Build the final HTML - don't append, rebuild from scratch
final_html = self.html.split('<tbody>')[0] + '<tbody>'
final_html += ''.join([row for row in self.html_table if row is not None])
final_html += f'''
</tbody>
</table>
</div>
<div class="timestamp">Generated {timestamp} UTC</div>
</body>
</html>'''
with open(out_path, 'wb') as fp_handle:
fp_handle.write(self.html.encode('utf8'))
fp_handle.write(final_html.encode('utf8'))
def read_prev_output(self):
""" Import a previous HTML table """
if self.dst.exists():
with open(self.dst, 'rb') as fp_handle:
self.prev_html = fp_handle.read().decode('utf8').split('\n')
def deduplicate_html(self):
""" Remove duplicate entries from html_table based on movie titles """
seen_titles = set()
deduplicated = []
for idx, row in enumerate(self.html_table):
if row is None:
continue
# Extract the hidden title from the row
if '<p hidden>' in row and '</p>' in row:
start = row.find('<p hidden>') + 10
end = row.find('</p>', start)
title = row[start:end]
if title not in seen_titles:
seen_titles.add(title)
deduplicated.append(row)
@@ -519,7 +544,7 @@ class MovieList:
else:
# If we can't find the hidden title, keep the row anyway
deduplicated.append(row)
# Update html_table with deduplicated content
self.html_table = deduplicated
return len(self.html_table)

View File

@@ -466,3 +466,4 @@ Poor Things (2023) [*]
The French Dispatch (2021) [DONE]
Sentimental Value (2025) [DONE]
28 Years Later: The Bone Temple (2026) [*]
Youth (2015) [DONE]