movielist: fix duplication of table entries
Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
This commit is contained in:
@@ -55,6 +55,7 @@ class MovieList:
|
|||||||
border: 1px solid rgba(255, 255, 255, 0.1);
|
border: 1px solid rgba(255, 255, 255, 0.1);
|
||||||
padding: 0.75rem;
|
padding: 0.75rem;
|
||||||
height: 100%;
|
height: 100%;
|
||||||
|
min-height: 0;
|
||||||
overflow: hidden;
|
overflow: hidden;
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
@@ -70,14 +71,16 @@ class MovieList:
|
|||||||
|
|
||||||
.dataTables_wrapper {
|
.dataTables_wrapper {
|
||||||
flex: 1;
|
flex: 1;
|
||||||
overflow: hidden;
|
overflow: auto;
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
|
min-height: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dataTables_filter {
|
.dataTables_filter {
|
||||||
margin-bottom: 0.5rem;
|
margin-bottom: 0.5rem;
|
||||||
text-align: right;
|
text-align: right;
|
||||||
|
flex-shrink: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dataTables_filter label {
|
.dataTables_filter label {
|
||||||
@@ -113,22 +116,19 @@ class MovieList:
|
|||||||
padding: 0.5rem 0;
|
padding: 0.5rem 0;
|
||||||
color: rgba(255, 255, 255, 0.6);
|
color: rgba(255, 255, 255, 0.6);
|
||||||
font-size: 0.85rem;
|
font-size: 0.85rem;
|
||||||
}
|
|
||||||
|
|
||||||
.dataTables_scroll {
|
|
||||||
flex: 1;
|
|
||||||
overflow: hidden;
|
|
||||||
display: flex;
|
|
||||||
flex-direction: column;
|
|
||||||
}
|
|
||||||
|
|
||||||
.dataTables_scrollHead {
|
|
||||||
flex-shrink: 0;
|
flex-shrink: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.dataTables_scroll {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_scrollHead {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
.dataTables_scrollBody {
|
.dataTables_scrollBody {
|
||||||
flex: 1;
|
display: none;
|
||||||
overflow-y: auto !important;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.dataTables_scrollBody::-webkit-scrollbar {
|
.dataTables_scrollBody::-webkit-scrollbar {
|
||||||
@@ -144,7 +144,20 @@ class MovieList:
|
|||||||
border-radius: 3px;
|
border-radius: 3px;
|
||||||
}
|
}
|
||||||
|
|
||||||
table.dataTable.stripe tbody tr.odd,
|
.dataTables_wrapper::-webkit-scrollbar {
|
||||||
|
width: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_wrapper::-webkit-scrollbar-track {
|
||||||
|
background: rgba(255, 255, 255, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_wrapper::-webkit-scrollbar-thumb {
|
||||||
|
background: rgba(255, 255, 255, 0.2);
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.dataTable.stripe tbody tr.odd,
|
||||||
table.dataTable.display tbody tr.odd,
|
table.dataTable.display tbody tr.odd,
|
||||||
table.dataTable tbody tr {
|
table.dataTable tbody tr {
|
||||||
background: transparent !important;
|
background: transparent !important;
|
||||||
@@ -274,21 +287,23 @@ class MovieList:
|
|||||||
</style>
|
</style>
|
||||||
<script>
|
<script>
|
||||||
$(document).ready(function(){
|
$(document).ready(function(){
|
||||||
$('#sortable').DataTable({
|
try {
|
||||||
"paging": false,
|
$('#sortable').DataTable({
|
||||||
"info": true,
|
"paging": false,
|
||||||
"searching": true,
|
"info": true,
|
||||||
"ordering": true,
|
"searching": true,
|
||||||
"order": [[0, "asc"]],
|
"ordering": true,
|
||||||
"scrollY": "calc(100vh - 200px)",
|
"order": [[0, "desc"]],
|
||||||
"scrollCollapse": true,
|
"language": {
|
||||||
"language": {
|
"search": "Search:",
|
||||||
"search": "Search:",
|
"info": "Showing _TOTAL_ movies",
|
||||||
"info": "Showing _TOTAL_ movies",
|
"infoEmpty": "No movies",
|
||||||
"infoEmpty": "No movies",
|
"infoFiltered": "(filtered from _MAX_)"
|
||||||
"infoFiltered": "(filtered from _MAX_)"
|
}
|
||||||
}
|
});
|
||||||
});
|
} catch(e) {
|
||||||
|
console.error("DataTables init error:", e);
|
||||||
|
}
|
||||||
});
|
});
|
||||||
</script>
|
</script>
|
||||||
</head>
|
</head>
|
||||||
@@ -375,7 +390,7 @@ class MovieList:
|
|||||||
|
|
||||||
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
|
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
|
||||||
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
|
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
|
||||||
|
|
||||||
self.html_table[index] = (
|
self.html_table[index] = (
|
||||||
f'\n <tr>'
|
f'\n <tr>'
|
||||||
f'<td data-label="#">{index + 1}</td>'
|
f'<td data-label="#">{index + 1}</td>'
|
||||||
@@ -398,29 +413,42 @@ class MovieList:
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
self.movie_list = {}
|
self.movie_list = {}
|
||||||
|
seen_titles = set() # Track unique titles
|
||||||
|
|
||||||
# Open the movie list & split the columns
|
# Open the movie list & split the columns
|
||||||
with open(self.src, 'r', encoding='utf-8') as fp_handle:
|
with open(self.src, 'r', encoding='utf-8') as fp_handle:
|
||||||
mlist_raw = fp_handle.read()
|
mlist_raw = fp_handle.read()
|
||||||
|
idx = 0
|
||||||
for raw_line in mlist_raw.splitlines():
|
for raw_line in mlist_raw.splitlines():
|
||||||
# In case the line is empty
|
# In case the line is empty
|
||||||
if not raw_line:
|
if not raw_line:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
title = raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1]
|
||||||
|
|
||||||
|
# Skip if we've already seen this title
|
||||||
|
if title in seen_titles:
|
||||||
|
continue
|
||||||
|
|
||||||
|
seen_titles.add(title)
|
||||||
self.movie_list.update({
|
self.movie_list.update({
|
||||||
len(self.movie_list): {
|
idx: {
|
||||||
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
|
'title': title,
|
||||||
'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
|
'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
|
||||||
'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
|
'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
|
||||||
'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
|
'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
idx += 1
|
||||||
|
|
||||||
self.html_table = [None] * len(self.movie_list)
|
self.html_table = [None] * len(self.movie_list)
|
||||||
|
|
||||||
# Progress bar
|
# Progress bar
|
||||||
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
||||||
for idx, movie in self.movie_list.items():
|
for idx, movie in self.movie_list.items():
|
||||||
# More precise matching - look for the hidden <p> tag with exact title
|
# More precise matching - look for the hidden <p> tag with exact title
|
||||||
match = [html_row for html_row in self.prev_html
|
match = [html_row for html_row in self.prev_html
|
||||||
if f'<p hidden>{movie["title"]}</p>' in html_row
|
if f'<p hidden>{movie["title"]}</p>' in html_row
|
||||||
and 'N/A' not in html_row]
|
and 'N/A' not in html_row]
|
||||||
if match:
|
if match:
|
||||||
# Update the index and status from the cached row
|
# Update the index and status from the cached row
|
||||||
@@ -450,15 +478,7 @@ class MovieList:
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
self.delete_finished_threads()
|
self.delete_finished_threads()
|
||||||
|
|
||||||
self.html += ''.join(self.html_table)
|
# Don't append to self.html here - we'll do it in write()
|
||||||
|
|
||||||
# Deduplicate entries before writing
|
|
||||||
num_entries = self.deduplicate_html()
|
|
||||||
print(f"\nDeduplicated to {num_entries} unique entries")
|
|
||||||
|
|
||||||
self.html = self.html.split('</tbody>')[0] # Remove everything after tbody if it exists
|
|
||||||
self.html += ''.join(self.html_table)
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def delete_finished_threads(self):
|
def delete_finished_threads(self):
|
||||||
@@ -479,37 +499,42 @@ class MovieList:
|
|||||||
""" Write the HTML list to index.html """
|
""" Write the HTML list to index.html """
|
||||||
out_path = dst if dst else self.dst
|
out_path = dst if dst else self.dst
|
||||||
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
|
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
|
||||||
self.html += f'''
|
|
||||||
|
# Build the final HTML - don't append, rebuild from scratch
|
||||||
|
final_html = self.html.split('<tbody>')[0] + '<tbody>'
|
||||||
|
final_html += ''.join([row for row in self.html_table if row is not None])
|
||||||
|
final_html += f'''
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
</div>
|
</div>
|
||||||
<div class="timestamp">Generated {timestamp} UTC</div>
|
<div class="timestamp">Generated {timestamp} UTC</div>
|
||||||
</body>
|
</body>
|
||||||
</html>'''
|
</html>'''
|
||||||
|
|
||||||
with open(out_path, 'wb') as fp_handle:
|
with open(out_path, 'wb') as fp_handle:
|
||||||
fp_handle.write(self.html.encode('utf8'))
|
fp_handle.write(final_html.encode('utf8'))
|
||||||
|
|
||||||
def read_prev_output(self):
|
def read_prev_output(self):
|
||||||
""" Import a previous HTML table """
|
""" Import a previous HTML table """
|
||||||
if self.dst.exists():
|
if self.dst.exists():
|
||||||
with open(self.dst, 'rb') as fp_handle:
|
with open(self.dst, 'rb') as fp_handle:
|
||||||
self.prev_html = fp_handle.read().decode('utf8').split('\n')
|
self.prev_html = fp_handle.read().decode('utf8').split('\n')
|
||||||
|
|
||||||
def deduplicate_html(self):
|
def deduplicate_html(self):
|
||||||
""" Remove duplicate entries from html_table based on movie titles """
|
""" Remove duplicate entries from html_table based on movie titles """
|
||||||
seen_titles = set()
|
seen_titles = set()
|
||||||
deduplicated = []
|
deduplicated = []
|
||||||
|
|
||||||
for idx, row in enumerate(self.html_table):
|
for idx, row in enumerate(self.html_table):
|
||||||
if row is None:
|
if row is None:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
# Extract the hidden title from the row
|
# Extract the hidden title from the row
|
||||||
if '<p hidden>' in row and '</p>' in row:
|
if '<p hidden>' in row and '</p>' in row:
|
||||||
start = row.find('<p hidden>') + 10
|
start = row.find('<p hidden>') + 10
|
||||||
end = row.find('</p>', start)
|
end = row.find('</p>', start)
|
||||||
title = row[start:end]
|
title = row[start:end]
|
||||||
|
|
||||||
if title not in seen_titles:
|
if title not in seen_titles:
|
||||||
seen_titles.add(title)
|
seen_titles.add(title)
|
||||||
deduplicated.append(row)
|
deduplicated.append(row)
|
||||||
@@ -519,7 +544,7 @@ class MovieList:
|
|||||||
else:
|
else:
|
||||||
# If we can't find the hidden title, keep the row anyway
|
# If we can't find the hidden title, keep the row anyway
|
||||||
deduplicated.append(row)
|
deduplicated.append(row)
|
||||||
|
|
||||||
# Update html_table with deduplicated content
|
# Update html_table with deduplicated content
|
||||||
self.html_table = deduplicated
|
self.html_table = deduplicated
|
||||||
return len(self.html_table)
|
return len(self.html_table)
|
||||||
|
|||||||
@@ -466,3 +466,4 @@ Poor Things (2023) [*]
|
|||||||
The French Dispatch (2021) [DONE]
|
The French Dispatch (2021) [DONE]
|
||||||
Sentimental Value (2025) [DONE]
|
Sentimental Value (2025) [DONE]
|
||||||
28 Years Later: The Bone Temple (2026) [*]
|
28 Years Later: The Bone Temple (2026) [*]
|
||||||
|
Youth (2015) [DONE]
|
||||||
|
|||||||
Reference in New Issue
Block a user