|
|
|
|
@@ -28,7 +28,7 @@ class MovieList:
|
|
|
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
|
|
|
<title>My Movie List</title>
|
|
|
|
|
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.24/css/jquery.dataTables.min.css">
|
|
|
|
|
<script src="../jquery-3.1.0.min.js"></script>
|
|
|
|
|
<script src="../jquery-3.7.1.min.js"></script>
|
|
|
|
|
<script src="https://cdn.datatables.net/1.10.24/js/jquery.dataTables.min.js"></script>
|
|
|
|
|
<style>
|
|
|
|
|
* {
|
|
|
|
|
@@ -144,7 +144,7 @@ class MovieList:
|
|
|
|
|
border-radius: 3px;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
table.dataTable.stripe tbody tr.odd,
|
|
|
|
|
table.dataTable.stripe tbody tr.odd,
|
|
|
|
|
table.dataTable.display tbody tr.odd,
|
|
|
|
|
table.dataTable tbody tr {
|
|
|
|
|
background: transparent !important;
|
|
|
|
|
@@ -375,7 +375,7 @@ class MovieList:
|
|
|
|
|
|
|
|
|
|
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
|
|
|
|
|
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.html_table[index] = (
|
|
|
|
|
f'\n <tr>'
|
|
|
|
|
f'<td data-label="#">{index + 1}</td>'
|
|
|
|
|
@@ -419,8 +419,8 @@ class MovieList:
|
|
|
|
|
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
|
|
|
|
for idx, movie in self.movie_list.items():
|
|
|
|
|
# More precise matching - look for the hidden <p> tag with exact title
|
|
|
|
|
match = [html_row for html_row in self.prev_html
|
|
|
|
|
if f'<p hidden>{movie["title"]}</p>' in html_row
|
|
|
|
|
match = [html_row for html_row in self.prev_html
|
|
|
|
|
if f'<p hidden>{movie["title"]}</p>' in html_row
|
|
|
|
|
and 'N/A' not in html_row]
|
|
|
|
|
if match:
|
|
|
|
|
# Update the index and status from the cached row
|
|
|
|
|
@@ -451,14 +451,14 @@ class MovieList:
|
|
|
|
|
self.delete_finished_threads()
|
|
|
|
|
|
|
|
|
|
self.html += ''.join(self.html_table)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Deduplicate entries before writing
|
|
|
|
|
num_entries = self.deduplicate_html()
|
|
|
|
|
print(f"\nDeduplicated to {num_entries} unique entries")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.html = self.html.split('</tbody>')[0] # Remove everything after tbody if it exists
|
|
|
|
|
self.html += ''.join(self.html_table)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return True
|
|
|
|
|
|
|
|
|
|
def delete_finished_threads(self):
|
|
|
|
|
@@ -494,22 +494,22 @@ class MovieList:
|
|
|
|
|
if self.dst.exists():
|
|
|
|
|
with open(self.dst, 'rb') as fp_handle:
|
|
|
|
|
self.prev_html = fp_handle.read().decode('utf8').split('\n')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def deduplicate_html(self):
|
|
|
|
|
""" Remove duplicate entries from html_table based on movie titles """
|
|
|
|
|
seen_titles = set()
|
|
|
|
|
deduplicated = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for idx, row in enumerate(self.html_table):
|
|
|
|
|
if row is None:
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Extract the hidden title from the row
|
|
|
|
|
if '<p hidden>' in row and '</p>' in row:
|
|
|
|
|
start = row.find('<p hidden>') + 10
|
|
|
|
|
end = row.find('</p>', start)
|
|
|
|
|
title = row[start:end]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if title not in seen_titles:
|
|
|
|
|
seen_titles.add(title)
|
|
|
|
|
deduplicated.append(row)
|
|
|
|
|
@@ -519,7 +519,7 @@ class MovieList:
|
|
|
|
|
else:
|
|
|
|
|
# If we can't find the hidden title, keep the row anyway
|
|
|
|
|
deduplicated.append(row)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# Update html_table with deduplicated content
|
|
|
|
|
self.html_table = deduplicated
|
|
|
|
|
return len(self.html_table)
|