movielist: fix duplication of table entries

Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
2026-02-19 11:21:41 +02:00
parent 1ca024226e
commit f7f4bf66e8
2 changed files with 76 additions and 50 deletions
@@ -55,6 +55,7 @@ class MovieList:
            border: 1px solid rgba(255, 255, 255, 0.1);
            padding: 0.75rem;
            height: 100%;
+            min-height: 0;
            overflow: hidden;
            display: flex;
            flex-direction: column;
@@ -70,14 +71,16 @@ class MovieList:

        .dataTables_wrapper {
            flex: 1;
-            overflow: hidden;
+            overflow: auto;
            display: flex;
            flex-direction: column;
+            min-height: 0;
        }

        .dataTables_filter {
            margin-bottom: 0.5rem;
            text-align: right;
+            flex-shrink: 0;
        }

        .dataTables_filter label {
@@ -113,22 +116,19 @@ class MovieList:
            padding: 0.5rem 0;
            color: rgba(255, 255, 255, 0.6);
            font-size: 0.85rem;
-        }
-
-        .dataTables_scroll {
-            flex: 1;
-            overflow: hidden;
-            display: flex;
-            flex-direction: column;
-        }
-
-        .dataTables_scrollHead {
            flex-shrink: 0;
        }

+        .dataTables_scroll {
+            display: none;
+        }
+
+        .dataTables_scrollHead {
+            display: none;
+        }
+
        .dataTables_scrollBody {
-            flex: 1;
-            overflow-y: auto !important;
+            display: none;
        }

        .dataTables_scrollBody::-webkit-scrollbar {
@@ -144,7 +144,20 @@ class MovieList:
            border-radius: 3px;
        }

-        table.dataTable.stripe tbody tr.odd, 
+        .dataTables_wrapper::-webkit-scrollbar {
+            width: 6px;
+        }
+
+        .dataTables_wrapper::-webkit-scrollbar-track {
+            background: rgba(255, 255, 255, 0.05);
+        }
+
+        .dataTables_wrapper::-webkit-scrollbar-thumb {
+            background: rgba(255, 255, 255, 0.2);
+            border-radius: 3px;
+        }
+
+        table.dataTable.stripe tbody tr.odd,
        table.dataTable.display tbody tr.odd,
        table.dataTable tbody tr {
            background: transparent !important;
@@ -274,21 +287,23 @@ class MovieList:
    </style>
    <script>
    $(document).ready(function(){
-        $('#sortable').DataTable({
-            "paging": false,
-            "info": true,
-            "searching": true,
-            "ordering": true,
-            "order": [[0, "asc"]],
-            "scrollY": "calc(100vh - 200px)",
-            "scrollCollapse": true,
-            "language": {
-                "search": "Search:",
-                "info": "Showing _TOTAL_ movies",
-                "infoEmpty": "No movies",
-                "infoFiltered": "(filtered from _MAX_)"
-            }
-        });
+        try {
+            $('#sortable').DataTable({
+                "paging": false,
+                "info": true,
+                "searching": true,
+                "ordering": true,
+                "order": [[0, "desc"]],
+                "language": {
+                    "search": "Search:",
+                    "info": "Showing _TOTAL_ movies",
+                    "infoEmpty": "No movies",
+                    "infoFiltered": "(filtered from _MAX_)"
+                }
+            });
+        } catch(e) {
+            console.error("DataTables init error:", e);
+        }
    });
    </script>
 </head>
@@ -375,7 +390,7 @@ class MovieList:

        html_title_td = movie['title'] if 'dummy' in movie.keys() else \
                f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
-        
+
        self.html_table[index] = (
            f'\n                <tr>'
            f'<td data-label="#">{index + 1}</td>'
@@ -398,29 +413,42 @@ class MovieList:
                return False

        self.movie_list = {}
+        seen_titles = set()  # Track unique titles
+
        # Open the movie list & split the columns
        with open(self.src, 'r', encoding='utf-8') as fp_handle:
            mlist_raw = fp_handle.read()
+            idx = 0
            for raw_line in mlist_raw.splitlines():
                # In case the line is empty
                if not raw_line:
                    continue
+
+                title = raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1]
+
+                # Skip if we've already seen this title
+                if title in seen_titles:
+                    continue
+
+                seen_titles.add(title)
                self.movie_list.update({
-                    len(self.movie_list): {
-                        'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
+                    idx: {
+                        'title': title,
                        'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
                        'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
                        'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
                    }
                })
+                idx += 1
+
            self.html_table = [None] * len(self.movie_list)

        # Progress bar
        pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
        for idx, movie in self.movie_list.items():
            # More precise matching - look for the hidden <p> tag with exact title
-            match = [html_row for html_row in self.prev_html 
-                     if f'<p hidden>{movie["title"]}</p>' in html_row 
+            match = [html_row for html_row in self.prev_html
+                     if f'<p hidden>{movie["title"]}</p>' in html_row
                     and 'N/A' not in html_row]
            if match:
                # Update the index and status from the cached row
@@ -450,15 +478,7 @@ class MovieList:
            time.sleep(2)
            self.delete_finished_threads()

-        self.html += ''.join(self.html_table)
-        
-        # Deduplicate entries before writing
-        num_entries = self.deduplicate_html()
-        print(f"\nDeduplicated to {num_entries} unique entries")
-        
-        self.html = self.html.split('</tbody>')[0]  # Remove everything after tbody if it exists
-        self.html += ''.join(self.html_table)
-        
+        # Don't append to self.html here - we'll do it in write()
        return True

    def delete_finished_threads(self):
@@ -479,37 +499,42 @@ class MovieList:
        """ Write the HTML list to index.html """
        out_path = dst if dst else self.dst
        timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
-        self.html += f'''
+
+        # Build the final HTML - don't append, rebuild from scratch
+        final_html = self.html.split('<tbody>')[0] + '<tbody>'
+        final_html += ''.join([row for row in self.html_table if row is not None])
+        final_html += f'''
            </tbody>
        </table>
    </div>
    <div class="timestamp">Generated {timestamp} UTC</div>
 </body>
 </html>'''
+
        with open(out_path, 'wb') as fp_handle:
-            fp_handle.write(self.html.encode('utf8'))
+            fp_handle.write(final_html.encode('utf8'))

    def read_prev_output(self):
        """ Import a previous HTML table """
        if self.dst.exists():
            with open(self.dst, 'rb') as fp_handle:
                self.prev_html = fp_handle.read().decode('utf8').split('\n')
-    
+
    def deduplicate_html(self):
        """ Remove duplicate entries from html_table based on movie titles """
        seen_titles = set()
        deduplicated = []
-        
+
        for idx, row in enumerate(self.html_table):
            if row is None:
                continue
-            
+
            # Extract the hidden title from the row
            if '<p hidden>' in row and '</p>' in row:
                start = row.find('<p hidden>') + 10
                end = row.find('</p>', start)
                title = row[start:end]
-                
+
                if title not in seen_titles:
                    seen_titles.add(title)
                    deduplicated.append(row)
@@ -519,7 +544,7 @@ class MovieList:
            else:
                # If we can't find the hidden title, keep the row anyway
                deduplicated.append(row)
-        
+
        # Update html_table with deduplicated content
        self.html_table = deduplicated
        return len(self.html_table)
@@ -466,3 +466,4 @@ Poor Things (2023)						[*]
 The French Dispatch (2021)					[DONE]
 Sentimental Value (2025)					[DONE]
 28 Years Later: The Bone Temple (2026)				[*]
+Youth (2015)							[DONE]