movielist: fix duplication of table entries
Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
This commit is contained in:
@@ -55,6 +55,7 @@ class MovieList:
|
||||
border: 1px solid rgba(255, 255, 255, 0.1);
|
||||
padding: 0.75rem;
|
||||
height: 100%;
|
||||
min-height: 0;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
@@ -70,14 +71,16 @@ class MovieList:
|
||||
|
||||
.dataTables_wrapper {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
overflow: auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
min-height: 0;
|
||||
}
|
||||
|
||||
.dataTables_filter {
|
||||
margin-bottom: 0.5rem;
|
||||
text-align: right;
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.dataTables_filter label {
|
||||
@@ -113,22 +116,19 @@ class MovieList:
|
||||
padding: 0.5rem 0;
|
||||
color: rgba(255, 255, 255, 0.6);
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
|
||||
.dataTables_scroll {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.dataTables_scrollHead {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.dataTables_scroll {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.dataTables_scrollHead {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.dataTables_scrollBody {
|
||||
flex: 1;
|
||||
overflow-y: auto !important;
|
||||
display: none;
|
||||
}
|
||||
|
||||
.dataTables_scrollBody::-webkit-scrollbar {
|
||||
@@ -144,7 +144,20 @@ class MovieList:
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
table.dataTable.stripe tbody tr.odd,
|
||||
.dataTables_wrapper::-webkit-scrollbar {
|
||||
width: 6px;
|
||||
}
|
||||
|
||||
.dataTables_wrapper::-webkit-scrollbar-track {
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
}
|
||||
|
||||
.dataTables_wrapper::-webkit-scrollbar-thumb {
|
||||
background: rgba(255, 255, 255, 0.2);
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
table.dataTable.stripe tbody tr.odd,
|
||||
table.dataTable.display tbody tr.odd,
|
||||
table.dataTable tbody tr {
|
||||
background: transparent !important;
|
||||
@@ -274,21 +287,23 @@ class MovieList:
|
||||
</style>
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$('#sortable').DataTable({
|
||||
"paging": false,
|
||||
"info": true,
|
||||
"searching": true,
|
||||
"ordering": true,
|
||||
"order": [[0, "asc"]],
|
||||
"scrollY": "calc(100vh - 200px)",
|
||||
"scrollCollapse": true,
|
||||
"language": {
|
||||
"search": "Search:",
|
||||
"info": "Showing _TOTAL_ movies",
|
||||
"infoEmpty": "No movies",
|
||||
"infoFiltered": "(filtered from _MAX_)"
|
||||
}
|
||||
});
|
||||
try {
|
||||
$('#sortable').DataTable({
|
||||
"paging": false,
|
||||
"info": true,
|
||||
"searching": true,
|
||||
"ordering": true,
|
||||
"order": [[0, "desc"]],
|
||||
"language": {
|
||||
"search": "Search:",
|
||||
"info": "Showing _TOTAL_ movies",
|
||||
"infoEmpty": "No movies",
|
||||
"infoFiltered": "(filtered from _MAX_)"
|
||||
}
|
||||
});
|
||||
} catch(e) {
|
||||
console.error("DataTables init error:", e);
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
@@ -375,7 +390,7 @@ class MovieList:
|
||||
|
||||
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
|
||||
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
|
||||
|
||||
|
||||
self.html_table[index] = (
|
||||
f'\n <tr>'
|
||||
f'<td data-label="#">{index + 1}</td>'
|
||||
@@ -398,29 +413,42 @@ class MovieList:
|
||||
return False
|
||||
|
||||
self.movie_list = {}
|
||||
seen_titles = set() # Track unique titles
|
||||
|
||||
# Open the movie list & split the columns
|
||||
with open(self.src, 'r', encoding='utf-8') as fp_handle:
|
||||
mlist_raw = fp_handle.read()
|
||||
idx = 0
|
||||
for raw_line in mlist_raw.splitlines():
|
||||
# In case the line is empty
|
||||
if not raw_line:
|
||||
continue
|
||||
|
||||
title = raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1]
|
||||
|
||||
# Skip if we've already seen this title
|
||||
if title in seen_titles:
|
||||
continue
|
||||
|
||||
seen_titles.add(title)
|
||||
self.movie_list.update({
|
||||
len(self.movie_list): {
|
||||
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
|
||||
idx: {
|
||||
'title': title,
|
||||
'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
|
||||
'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
|
||||
'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
|
||||
}
|
||||
})
|
||||
idx += 1
|
||||
|
||||
self.html_table = [None] * len(self.movie_list)
|
||||
|
||||
# Progress bar
|
||||
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
||||
for idx, movie in self.movie_list.items():
|
||||
# More precise matching - look for the hidden <p> tag with exact title
|
||||
match = [html_row for html_row in self.prev_html
|
||||
if f'<p hidden>{movie["title"]}</p>' in html_row
|
||||
match = [html_row for html_row in self.prev_html
|
||||
if f'<p hidden>{movie["title"]}</p>' in html_row
|
||||
and 'N/A' not in html_row]
|
||||
if match:
|
||||
# Update the index and status from the cached row
|
||||
@@ -450,15 +478,7 @@ class MovieList:
|
||||
time.sleep(2)
|
||||
self.delete_finished_threads()
|
||||
|
||||
self.html += ''.join(self.html_table)
|
||||
|
||||
# Deduplicate entries before writing
|
||||
num_entries = self.deduplicate_html()
|
||||
print(f"\nDeduplicated to {num_entries} unique entries")
|
||||
|
||||
self.html = self.html.split('</tbody>')[0] # Remove everything after tbody if it exists
|
||||
self.html += ''.join(self.html_table)
|
||||
|
||||
# Don't append to self.html here - we'll do it in write()
|
||||
return True
|
||||
|
||||
def delete_finished_threads(self):
|
||||
@@ -479,37 +499,42 @@ class MovieList:
|
||||
""" Write the HTML list to index.html """
|
||||
out_path = dst if dst else self.dst
|
||||
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
|
||||
self.html += f'''
|
||||
|
||||
# Build the final HTML - don't append, rebuild from scratch
|
||||
final_html = self.html.split('<tbody>')[0] + '<tbody>'
|
||||
final_html += ''.join([row for row in self.html_table if row is not None])
|
||||
final_html += f'''
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div class="timestamp">Generated {timestamp} UTC</div>
|
||||
</body>
|
||||
</html>'''
|
||||
|
||||
with open(out_path, 'wb') as fp_handle:
|
||||
fp_handle.write(self.html.encode('utf8'))
|
||||
fp_handle.write(final_html.encode('utf8'))
|
||||
|
||||
def read_prev_output(self):
|
||||
""" Import a previous HTML table """
|
||||
if self.dst.exists():
|
||||
with open(self.dst, 'rb') as fp_handle:
|
||||
self.prev_html = fp_handle.read().decode('utf8').split('\n')
|
||||
|
||||
|
||||
def deduplicate_html(self):
|
||||
""" Remove duplicate entries from html_table based on movie titles """
|
||||
seen_titles = set()
|
||||
deduplicated = []
|
||||
|
||||
|
||||
for idx, row in enumerate(self.html_table):
|
||||
if row is None:
|
||||
continue
|
||||
|
||||
|
||||
# Extract the hidden title from the row
|
||||
if '<p hidden>' in row and '</p>' in row:
|
||||
start = row.find('<p hidden>') + 10
|
||||
end = row.find('</p>', start)
|
||||
title = row[start:end]
|
||||
|
||||
|
||||
if title not in seen_titles:
|
||||
seen_titles.add(title)
|
||||
deduplicated.append(row)
|
||||
@@ -519,7 +544,7 @@ class MovieList:
|
||||
else:
|
||||
# If we can't find the hidden title, keep the row anyway
|
||||
deduplicated.append(row)
|
||||
|
||||
|
||||
# Update html_table with deduplicated content
|
||||
self.html_table = deduplicated
|
||||
return len(self.html_table)
|
||||
|
||||
@@ -466,3 +466,4 @@ Poor Things (2023) [*]
|
||||
The French Dispatch (2021) [DONE]
|
||||
Sentimental Value (2025) [DONE]
|
||||
28 Years Later: The Bone Temple (2026) [*]
|
||||
Youth (2015) [DONE]
|
||||
|
||||
Reference in New Issue
Block a user