From f7f4bf66e85b52efaa30dcfb009d41a7a75d08f1 Mon Sep 17 00:00:00 2001
From: Bogomil Vasilev <smirky@smirky.net>
Date: Thu, 19 Feb 2026 11:21:41 +0200
Subject: [PATCH] movielist: fix duplication of table entries

Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
---
 movie_list/gen_movie_list.py | 125 +++++++++++++++++++++--------------
 movie_list/movie_list        |   1 +
 2 files changed, 76 insertions(+), 50 deletions(-)
diff --git a/movie_list/gen_movie_list.py b/movie_list/gen_movie_list.py
index 5808df9..966d721 100755
--- a/movie_list/gen_movie_list.py
+++ b/movie_list/gen_movie_list.py
@@ -55,6 +55,7 @@ class MovieList:
             border: 1px solid rgba(255, 255, 255, 0.1);
             padding: 0.75rem;
             height: 100%;
+            min-height: 0;
             overflow: hidden;
             display: flex;
             flex-direction: column;
@@ -70,14 +71,16 @@ class MovieList:
 
         .dataTables_wrapper {
             flex: 1;
-            overflow: hidden;
+            overflow: auto;
             display: flex;
             flex-direction: column;
+            min-height: 0;
         }
 
         .dataTables_filter {
             margin-bottom: 0.5rem;
             text-align: right;
+            flex-shrink: 0;
         }
 
         .dataTables_filter label {
@@ -113,22 +116,19 @@ class MovieList:
             padding: 0.5rem 0;
             color: rgba(255, 255, 255, 0.6);
             font-size: 0.85rem;
-        }
-
-        .dataTables_scroll {
-            flex: 1;
-            overflow: hidden;
-            display: flex;
-            flex-direction: column;
-        }
-
-        .dataTables_scrollHead {
             flex-shrink: 0;
         }
 
+        .dataTables_scroll {
+            display: none;
+        }
+
+        .dataTables_scrollHead {
+            display: none;
+        }
+
         .dataTables_scrollBody {
-            flex: 1;
-            overflow-y: auto !important;
+            display: none;
         }
 
         .dataTables_scrollBody::-webkit-scrollbar {
@@ -144,7 +144,20 @@ class MovieList:
             border-radius: 3px;
         }
 
-        table.dataTable.stripe tbody tr.odd, 
+        .dataTables_wrapper::-webkit-scrollbar {
+            width: 6px;
+        }
+
+        .dataTables_wrapper::-webkit-scrollbar-track {
+            background: rgba(255, 255, 255, 0.05);
+        }
+
+        .dataTables_wrapper::-webkit-scrollbar-thumb {
+            background: rgba(255, 255, 255, 0.2);
+            border-radius: 3px;
+        }
+
+        table.dataTable.stripe tbody tr.odd,
         table.dataTable.display tbody tr.odd,
         table.dataTable tbody tr {
             background: transparent !important;
@@ -274,21 +287,23 @@ class MovieList:
     </style>
     <script>
     $(document).ready(function(){
-        $('#sortable').DataTable({
-            "paging": false,
-            "info": true,
-            "searching": true,
-            "ordering": true,
-            "order": [[0, "asc"]],
-            "scrollY": "calc(100vh - 200px)",
-            "scrollCollapse": true,
-            "language": {
-                "search": "Search:",
-                "info": "Showing _TOTAL_ movies",
-                "infoEmpty": "No movies",
-                "infoFiltered": "(filtered from _MAX_)"
-            }
-        });
+        try {
+            $('#sortable').DataTable({
+                "paging": false,
+                "info": true,
+                "searching": true,
+                "ordering": true,
+                "order": [[0, "desc"]],
+                "language": {
+                    "search": "Search:",
+                    "info": "Showing _TOTAL_ movies",
+                    "infoEmpty": "No movies",
+                    "infoFiltered": "(filtered from _MAX_)"
+                }
+            });
+        } catch(e) {
+            console.error("DataTables init error:", e);
+        }
     });
     </script>
 </head>
@@ -375,7 +390,7 @@ class MovieList:
 
         html_title_td = movie['title'] if 'dummy' in movie.keys() else \
                 f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
-        
+
         self.html_table[index] = (
             f'\n                <tr>'
             f'<td data-label="#">{index + 1}</td>'
@@ -398,29 +413,42 @@ class MovieList:
                 return False
 
         self.movie_list = {}
+        seen_titles = set()  # Track unique titles
+
         # Open the movie list & split the columns
         with open(self.src, 'r', encoding='utf-8') as fp_handle:
             mlist_raw = fp_handle.read()
+            idx = 0
             for raw_line in mlist_raw.splitlines():
                 # In case the line is empty
                 if not raw_line:
                     continue
+
+                title = raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1]
+
+                # Skip if we've already seen this title
+                if title in seen_titles:
+                    continue
+
+                seen_titles.add(title)
                 self.movie_list.update({
-                    len(self.movie_list): {
-                        'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
+                    idx: {
+                        'title': title,
                         'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
                         'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
                         'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
                     }
                 })
+                idx += 1
+
             self.html_table = [None] * len(self.movie_list)
 
         # Progress bar
         pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
         for idx, movie in self.movie_list.items():
             # More precise matching - look for the hidden <p> tag with exact title
-            match = [html_row for html_row in self.prev_html 
-                     if f'<p hidden>{movie["title"]}</p>' in html_row 
+            match = [html_row for html_row in self.prev_html
+                     if f'<p hidden>{movie["title"]}</p>' in html_row
                      and 'N/A' not in html_row]
             if match:
                 # Update the index and status from the cached row
@@ -450,15 +478,7 @@ class MovieList:
             time.sleep(2)
             self.delete_finished_threads()
 
-        self.html += ''.join(self.html_table)
-        
-        # Deduplicate entries before writing
-        num_entries = self.deduplicate_html()
-        print(f"\nDeduplicated to {num_entries} unique entries")
-        
-        self.html = self.html.split('</tbody>')[0]  # Remove everything after tbody if it exists
-        self.html += ''.join(self.html_table)
-        
+        # Don't append to self.html here - we'll do it in write()
         return True
 
     def delete_finished_threads(self):
@@ -479,37 +499,42 @@ class MovieList:
         """ Write the HTML list to index.html """
         out_path = dst if dst else self.dst
         timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
-        self.html += f'''
+
+        # Build the final HTML - don't append, rebuild from scratch
+        final_html = self.html.split('<tbody>')[0] + '<tbody>'
+        final_html += ''.join([row for row in self.html_table if row is not None])
+        final_html += f'''
             </tbody>
         </table>
     </div>
     <div class="timestamp">Generated {timestamp} UTC</div>
 </body>
 </html>'''
+
         with open(out_path, 'wb') as fp_handle:
-            fp_handle.write(self.html.encode('utf8'))
+            fp_handle.write(final_html.encode('utf8'))
 
     def read_prev_output(self):
         """ Import a previous HTML table """
         if self.dst.exists():
             with open(self.dst, 'rb') as fp_handle:
                 self.prev_html = fp_handle.read().decode('utf8').split('\n')
-    
+
     def deduplicate_html(self):
         """ Remove duplicate entries from html_table based on movie titles """
         seen_titles = set()
         deduplicated = []
-        
+
         for idx, row in enumerate(self.html_table):
             if row is None:
                 continue
-            
+
             # Extract the hidden title from the row
             if '<p hidden>' in row and '</p>' in row:
                 start = row.find('<p hidden>') + 10
                 end = row.find('</p>', start)
                 title = row[start:end]
-                
+
                 if title not in seen_titles:
                     seen_titles.add(title)
                     deduplicated.append(row)
@@ -519,7 +544,7 @@ class MovieList:
             else:
                 # If we can't find the hidden title, keep the row anyway
                 deduplicated.append(row)
-        
+
         # Update html_table with deduplicated content
         self.html_table = deduplicated
         return len(self.html_table)
diff --git a/movie_list/movie_list b/movie_list/movie_list
index 3c387c5..b52d911 100644
--- a/movie_list/movie_list
+++ b/movie_list/movie_list
@@ -466,3 +466,4 @@ Poor Things (2023)						[*]
 The French Dispatch (2021)					[DONE]
 Sentimental Value (2025)					[DONE]
 28 Years Later: The Bone Temple (2026)				[*]
+Youth (2015)							[DONE]