movielist: update to newer UI
Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
This commit is contained in:
@@ -14,40 +14,300 @@ import threading
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import progressbar
|
import progressbar
|
||||||
from imdb import IMDb
|
from imdb import IMDb
|
||||||
from imdb._exceptions import IMDbParserError
|
from imdb._exceptions import IMDbParserError, IMDbDataAccessError
|
||||||
|
|
||||||
|
|
||||||
class MovieList:
|
class MovieList:
|
||||||
""" Class to generate a movie list HTML table """
|
""" Class to generate a movie list HTML table """
|
||||||
def __init__(self, src=None, dst=None):
|
def __init__(self, src=None, dst=None):
|
||||||
self.prev_html = []
|
self.prev_html = []
|
||||||
self.html = """<html>
|
self.html = """<!DOCTYPE html>
|
||||||
<head>
|
<html lang="en">
|
||||||
<title>My Movie List</title>
|
<head>
|
||||||
<link rel="stylesheet" type="text/css" href="style.css">
|
<meta charset="UTF-8">
|
||||||
<script src="../jquery-3.1.0.min.js"></script>
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<script src="jquery.dataTables.min.js"></script>
|
<title>My Movie List</title>
|
||||||
<script>
|
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.24/css/jquery.dataTables.min.css">
|
||||||
$(document).ready(function(){
|
<script src="../jquery-3.1.0.min.js"></script>
|
||||||
$('#sortable').DataTable({
|
<script src="https://cdn.datatables.net/1.10.24/js/jquery.dataTables.min.js"></script>
|
||||||
"pageLength": -1,
|
<style>
|
||||||
"bPaginate": false
|
* {
|
||||||
});
|
margin: 0;
|
||||||
|
padding: 0;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
|
|
||||||
|
body {
|
||||||
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||||
|
background: transparent;
|
||||||
|
height: 100vh;
|
||||||
|
padding: 0.5rem;
|
||||||
|
color: #fff;
|
||||||
|
overflow: hidden;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.container {
|
||||||
|
background: rgba(0, 0, 0, 0.6);
|
||||||
|
backdrop-filter: blur(10px);
|
||||||
|
border-radius: 8px;
|
||||||
|
border: 1px solid rgba(255, 255, 255, 0.1);
|
||||||
|
padding: 0.75rem;
|
||||||
|
height: 100%;
|
||||||
|
overflow: hidden;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
h1 {
|
||||||
|
color: #fff;
|
||||||
|
margin-bottom: 0.75rem;
|
||||||
|
font-size: 1.5rem;
|
||||||
|
font-weight: 600;
|
||||||
|
text-align: center;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_wrapper {
|
||||||
|
flex: 1;
|
||||||
|
overflow: hidden;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_filter {
|
||||||
|
margin-bottom: 0.5rem;
|
||||||
|
text-align: right;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_filter label {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: flex-end;
|
||||||
|
gap: 0.5rem;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: rgba(255, 255, 255, 0.9);
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_filter input {
|
||||||
|
padding: 0.4rem 0.75rem;
|
||||||
|
border: 1px solid rgba(255, 255, 255, 0.2);
|
||||||
|
border-radius: 6px;
|
||||||
|
background: rgba(0, 0, 0, 0.3);
|
||||||
|
color: #fff;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
width: 250px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_filter input::placeholder {
|
||||||
|
color: rgba(255, 255, 255, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_filter input:focus {
|
||||||
|
outline: none;
|
||||||
|
border-color: rgba(255, 255, 255, 0.4);
|
||||||
|
background: rgba(0, 0, 0, 0.4);
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_info {
|
||||||
|
padding: 0.5rem 0;
|
||||||
|
color: rgba(255, 255, 255, 0.6);
|
||||||
|
font-size: 0.85rem;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_scroll {
|
||||||
|
flex: 1;
|
||||||
|
overflow: hidden;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_scrollHead {
|
||||||
|
flex-shrink: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_scrollBody {
|
||||||
|
flex: 1;
|
||||||
|
overflow-y: auto !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_scrollBody::-webkit-scrollbar {
|
||||||
|
width: 6px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_scrollBody::-webkit-scrollbar-track {
|
||||||
|
background: rgba(255, 255, 255, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
.dataTables_scrollBody::-webkit-scrollbar-thumb {
|
||||||
|
background: rgba(255, 255, 255, 0.2);
|
||||||
|
border-radius: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.dataTable.stripe tbody tr.odd,
|
||||||
|
table.dataTable.display tbody tr.odd,
|
||||||
|
table.dataTable tbody tr {
|
||||||
|
background: transparent !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.dataTable.hover tbody tr:hover,
|
||||||
|
table.dataTable.display tbody tr:hover {
|
||||||
|
background: rgba(255, 255, 255, 0.1) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
table.dataTable tbody td {
|
||||||
|
background: transparent !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable {
|
||||||
|
width: 100%;
|
||||||
|
border-collapse: collapse;
|
||||||
|
background: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable thead {
|
||||||
|
background: rgba(0, 0, 0, 0.4);
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable thead th {
|
||||||
|
padding: 0.5rem 0.75rem;
|
||||||
|
text-align: left;
|
||||||
|
font-weight: 600;
|
||||||
|
font-size: 0.85rem;
|
||||||
|
color: rgba(255, 255, 255, 0.9);
|
||||||
|
cursor: pointer;
|
||||||
|
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
|
||||||
|
white-space: nowrap;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable thead th:hover {
|
||||||
|
background: rgba(255, 255, 255, 0.05);
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable thead th.sorting,
|
||||||
|
#sortable thead th.sorting_asc,
|
||||||
|
#sortable thead th.sorting_desc {
|
||||||
|
padding-right: 1.5rem;
|
||||||
|
position: relative;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable thead th.sorting:after {
|
||||||
|
content: '⇅';
|
||||||
|
position: absolute;
|
||||||
|
right: 0.5rem;
|
||||||
|
top: 50%;
|
||||||
|
transform: translateY(-50%);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: rgba(255, 255, 255, 0.5);
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable thead th.sorting_asc:after {
|
||||||
|
content: '↑';
|
||||||
|
position: absolute;
|
||||||
|
right: 0.5rem;
|
||||||
|
top: 50%;
|
||||||
|
transform: translateY(-50%);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: rgba(255, 255, 255, 0.9);
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable thead th.sorting_desc:after {
|
||||||
|
content: '↓';
|
||||||
|
position: absolute;
|
||||||
|
right: 0.5rem;
|
||||||
|
top: 50%;
|
||||||
|
transform: translateY(-50%);
|
||||||
|
font-size: 0.75rem;
|
||||||
|
color: rgba(255, 255, 255, 0.9);
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable tbody tr {
|
||||||
|
border-bottom: 1px solid rgba(255, 255, 255, 0.05);
|
||||||
|
transition: background 0.15s ease;
|
||||||
|
background: transparent !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable tbody tr:hover {
|
||||||
|
background: rgba(255, 255, 255, 0.1) !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable tbody td {
|
||||||
|
padding: 0.5rem 0.75rem;
|
||||||
|
font-size: 0.9rem;
|
||||||
|
color: rgba(255, 255, 255, 0.9);
|
||||||
|
background: transparent !important;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable tbody td:first-child {
|
||||||
|
color: rgba(255, 255, 255, 0.5);
|
||||||
|
font-size: 0.85rem;
|
||||||
|
width: 50px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable tbody td a {
|
||||||
|
color: rgba(135, 206, 250, 0.9);
|
||||||
|
text-decoration: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable tbody td a:hover {
|
||||||
|
color: rgba(135, 206, 250, 1);
|
||||||
|
text-decoration: underline;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable tbody td p[hidden] {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
#sortable tbody td:nth-child(4) {
|
||||||
|
color: rgba(255, 193, 7, 0.9);
|
||||||
|
font-weight: 500;
|
||||||
|
}
|
||||||
|
|
||||||
|
.timestamp {
|
||||||
|
text-align: center;
|
||||||
|
margin-top: 0.5rem;
|
||||||
|
padding-top: 0.5rem;
|
||||||
|
border-top: 1px solid rgba(255, 255, 255, 0.1);
|
||||||
|
color: rgba(255, 255, 255, 0.5);
|
||||||
|
font-size: 0.8rem;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
<script>
|
||||||
|
$(document).ready(function(){
|
||||||
|
$('#sortable').DataTable({
|
||||||
|
"paging": false,
|
||||||
|
"info": true,
|
||||||
|
"searching": true,
|
||||||
|
"ordering": true,
|
||||||
|
"order": [[0, "asc"]],
|
||||||
|
"scrollY": "calc(100vh - 200px)",
|
||||||
|
"scrollCollapse": true,
|
||||||
|
"language": {
|
||||||
|
"search": "Search:",
|
||||||
|
"info": "Showing _TOTAL_ movies",
|
||||||
|
"infoEmpty": "No movies",
|
||||||
|
"infoFiltered": "(filtered from _MAX_)"
|
||||||
|
}
|
||||||
});
|
});
|
||||||
</script>
|
});
|
||||||
</head>
|
</script>
|
||||||
<body>
|
</head>
|
||||||
<header>
|
<body>
|
||||||
<div class="scroll-indicator" />
|
|
||||||
</header>
|
|
||||||
<base target="_parent" />
|
<base target="_parent" />
|
||||||
<table id="sortable" class="sortable">
|
<div class="container">
|
||||||
<thead>
|
<h1>🎬 My Movie Collection</h1>
|
||||||
<tr>
|
<table id="sortable" class="sortable">
|
||||||
<th> Index </th><th> Title </th><th> Year </th><th> IMDb Rating </th><th> Genre </th><th> Status </th>
|
<thead>
|
||||||
</tr>
|
<tr>
|
||||||
</thead>
|
<th>#</th>
|
||||||
<tbody>"""
|
<th>Title</th>
|
||||||
|
<th>Year</th>
|
||||||
|
<th>Rating</th>
|
||||||
|
<th>Genre</th>
|
||||||
|
<th>Status</th>
|
||||||
|
</tr>
|
||||||
|
</thead>
|
||||||
|
<tbody>"""
|
||||||
self.src = src
|
self.src = src
|
||||||
self.dst = Path(dst) if dst else Path(os.path.dirname(sys.argv[0])) / 'index.html'
|
self.dst = Path(dst) if dst else Path(os.path.dirname(sys.argv[0])) / 'index.html'
|
||||||
self.movie_list = []
|
self.movie_list = []
|
||||||
@@ -59,32 +319,41 @@ class MovieList:
|
|||||||
# Scan IMDb for a given movie and append it to the html
|
# Scan IMDb for a given movie and append it to the html
|
||||||
# This collects rating, genres, official name and a hyperlink
|
# This collects rating, genres, official name and a hyperlink
|
||||||
imdb = IMDb()
|
imdb = IMDb()
|
||||||
|
first_run = True
|
||||||
while True:
|
while True:
|
||||||
|
if not first_run:
|
||||||
|
time.sleep(10)
|
||||||
|
else:
|
||||||
|
first_run = False
|
||||||
try:
|
try:
|
||||||
query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
|
query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
|
||||||
break
|
break
|
||||||
except IMDbParserError as exc:
|
except IMDbDataAccessError as imdb_data_exc:
|
||||||
|
exc = str(imdb_data_exc)
|
||||||
|
if '503' in exc:
|
||||||
|
sys.stderr.write('503 - Service Unavailable, retrying...')
|
||||||
|
elif '403' in exc:
|
||||||
|
sys.stderr.write('403 - Forbidden, retrying...\n')
|
||||||
|
query = []
|
||||||
|
time.sleep(10)
|
||||||
|
except IMDbParserError as imdb_parser_exc:
|
||||||
query = []
|
query = []
|
||||||
#print(exc)
|
|
||||||
break
|
break
|
||||||
except Exception as exc:
|
except Exception as exc:
|
||||||
#print(f'error: {exc.__class__.__name__}: {arg["title"]}')
|
|
||||||
time.sleep(10)
|
time.sleep(10)
|
||||||
|
|
||||||
movie = None
|
movie = None
|
||||||
for entry in query:
|
for entry in query:
|
||||||
#print(entry)
|
try:
|
||||||
imdb.update(entry)
|
imdb.update(entry)
|
||||||
|
except Exception as e:
|
||||||
|
sys.stderr.write('update err')
|
||||||
# in case any of these keys is missing in the query, continue
|
# in case any of these keys is missing in the query, continue
|
||||||
if not all(key in entry.keys() for key in ['kind', 'year', 'title']):
|
if not all(key in entry.keys() for key in ['kind', 'year', 'title']):
|
||||||
#print(f'missing key {entry.keys()}')
|
|
||||||
continue
|
continue
|
||||||
if arg['status'] == 'DONE' and 'rating' not in entry.keys():
|
if arg['status'] == 'DONE' and 'rating' not in entry.keys():
|
||||||
continue
|
continue
|
||||||
# Try to eliminate episode results
|
# Try to eliminate episode results
|
||||||
# Must not have "episode" in the object keys
|
|
||||||
# Must not have "episode" in the query title key,
|
|
||||||
# unless "episode" is in the query search string
|
|
||||||
if [i for i in entry.keys() if 'episode' in i.lower()] or (
|
if [i for i in entry.keys() if 'episode' in i.lower()] or (
|
||||||
'episode' in entry['title'].lower() and \
|
'episode' in entry['title'].lower() and \
|
||||||
'episode' not in arg['title'].lower()):
|
'episode' not in arg['title'].lower()):
|
||||||
@@ -106,12 +375,16 @@ class MovieList:
|
|||||||
|
|
||||||
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
|
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
|
||||||
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
|
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
|
||||||
|
|
||||||
self.html_table[index] = (
|
self.html_table[index] = (
|
||||||
f'\n{" "*8}<tr><td>{index + 1}</td>'
|
f'\n <tr>'
|
||||||
f'<td><p hidden>{arg["title"]}</p>{html_title_td}</td>'
|
f'<td data-label="#">{index + 1}</td>'
|
||||||
f'<td>{movie["year"]}</td><td align="center">{movie["rating"]}</td>'
|
f'<td data-label="Title"><p hidden>{arg["title"]}</p>{html_title_td}</td>'
|
||||||
f'<td>{", ".join(movie["genres"])}</td>'
|
f'<td data-label="Year">{movie["year"]}</td>'
|
||||||
f'<td align="center">{arg["status"]}</td></tr>'
|
f'<td data-label="Rating" align="center">{movie["rating"]}</td>'
|
||||||
|
f'<td data-label="Genre">{", ".join(movie["genres"])}</td>'
|
||||||
|
f'<td data-label="Status" align="center">{arg["status"]}</td>'
|
||||||
|
f'</tr>'
|
||||||
)
|
)
|
||||||
|
|
||||||
def gen(self):
|
def gen(self):
|
||||||
@@ -129,6 +402,9 @@ class MovieList:
|
|||||||
with open(self.src, 'r', encoding='utf-8') as fp_handle:
|
with open(self.src, 'r', encoding='utf-8') as fp_handle:
|
||||||
mlist_raw = fp_handle.read()
|
mlist_raw = fp_handle.read()
|
||||||
for raw_line in mlist_raw.splitlines():
|
for raw_line in mlist_raw.splitlines():
|
||||||
|
# In case the line is empty
|
||||||
|
if not raw_line:
|
||||||
|
continue
|
||||||
self.movie_list.update({
|
self.movie_list.update({
|
||||||
len(self.movie_list): {
|
len(self.movie_list): {
|
||||||
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
|
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
|
||||||
@@ -139,16 +415,25 @@ class MovieList:
|
|||||||
})
|
})
|
||||||
self.html_table = [None] * len(self.movie_list)
|
self.html_table = [None] * len(self.movie_list)
|
||||||
|
|
||||||
# Progress bar. Enough said
|
# Progress bar
|
||||||
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
||||||
for idx, movie in self.movie_list.items():
|
for idx, movie in self.movie_list.items():
|
||||||
match = [html_row for html_row in self.prev_html if movie['title'] in html_row and 'N/A' not in html_row]
|
# More precise matching - look for the hidden <p> tag with exact title
|
||||||
|
match = [html_row for html_row in self.prev_html
|
||||||
|
if f'<p hidden>{movie["title"]}</p>' in html_row
|
||||||
|
and 'N/A' not in html_row]
|
||||||
if match:
|
if match:
|
||||||
# Update movies as DONE in case of change
|
# Update the index and status from the cached row
|
||||||
match = match[0].replace('*', movie['status'])
|
match_str = match[0]
|
||||||
# Directly insert the current HTML line from the older output
|
# Replace the status (* -> DONE or vice versa)
|
||||||
self.html_table[idx] = \
|
match_str = match_str.replace('*', movie['status']).replace('DONE', movie['status'])
|
||||||
f'\n{" "*8}<tr><td>{idx + 1}</td>{match[match.find("</td>") + 5:]}'
|
# Update the index number
|
||||||
|
if '<td data-label="#">' in match_str:
|
||||||
|
# Extract everything after the index cell
|
||||||
|
after_index = match_str.split('</td>', 1)[1] if '</td>' in match_str else match_str
|
||||||
|
self.html_table[idx] = f'\n <tr><td data-label="#">{idx + 1}</td>{after_index}'
|
||||||
|
else:
|
||||||
|
self.html_table[idx] = match_str
|
||||||
pbar.increment()
|
pbar.increment()
|
||||||
else:
|
else:
|
||||||
thread = threading.Thread(target=self._worker, args=(movie, idx))
|
thread = threading.Thread(target=self._worker, args=(movie, idx))
|
||||||
@@ -161,11 +446,19 @@ class MovieList:
|
|||||||
for idx in range(max_threads if max_threads < len(threads_to_be_started) else len(threads_to_be_started)):
|
for idx in range(max_threads if max_threads < len(threads_to_be_started) else len(threads_to_be_started)):
|
||||||
threads_to_be_started[idx].start()
|
threads_to_be_started[idx].start()
|
||||||
pbar.increment()
|
pbar.increment()
|
||||||
time.sleep(1)
|
time.sleep(2)
|
||||||
time.sleep(1)
|
time.sleep(2)
|
||||||
self.delete_finished_threads()
|
self.delete_finished_threads()
|
||||||
|
|
||||||
self.html += ''.join(self.html_table)
|
self.html += ''.join(self.html_table)
|
||||||
|
|
||||||
|
# Deduplicate entries before writing
|
||||||
|
num_entries = self.deduplicate_html()
|
||||||
|
print(f"\nDeduplicated to {num_entries} unique entries")
|
||||||
|
|
||||||
|
self.html = self.html.split('</tbody>')[0] # Remove everything after tbody if it exists
|
||||||
|
self.html += ''.join(self.html_table)
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def delete_finished_threads(self):
|
def delete_finished_threads(self):
|
||||||
@@ -185,11 +478,14 @@ class MovieList:
|
|||||||
def write(self, dst=None):
|
def write(self, dst=None):
|
||||||
""" Write the HTML list to index.html """
|
""" Write the HTML list to index.html """
|
||||||
out_path = dst if dst else self.dst
|
out_path = dst if dst else self.dst
|
||||||
# Just a fancy scrollbar for the html
|
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
|
||||||
scroll = '<script type="text/javascript" src="scroll-indicator.js"></script>'
|
self.html += f'''
|
||||||
self.html += ('\n\t</tbody>\n</table>\n' +
|
</tbody>
|
||||||
'\nGenerated on: ' + time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime()) +
|
</table>
|
||||||
' by ' + sys.argv[0] + scroll + '</body>\n</html>')
|
</div>
|
||||||
|
<div class="timestamp">Generated {timestamp} UTC</div>
|
||||||
|
</body>
|
||||||
|
</html>'''
|
||||||
with open(out_path, 'wb') as fp_handle:
|
with open(out_path, 'wb') as fp_handle:
|
||||||
fp_handle.write(self.html.encode('utf8'))
|
fp_handle.write(self.html.encode('utf8'))
|
||||||
|
|
||||||
@@ -199,6 +495,35 @@ class MovieList:
|
|||||||
with open(self.dst, 'rb') as fp_handle:
|
with open(self.dst, 'rb') as fp_handle:
|
||||||
self.prev_html = fp_handle.read().decode('utf8').split('\n')
|
self.prev_html = fp_handle.read().decode('utf8').split('\n')
|
||||||
|
|
||||||
|
def deduplicate_html(self):
|
||||||
|
""" Remove duplicate entries from html_table based on movie titles """
|
||||||
|
seen_titles = set()
|
||||||
|
deduplicated = []
|
||||||
|
|
||||||
|
for idx, row in enumerate(self.html_table):
|
||||||
|
if row is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Extract the hidden title from the row
|
||||||
|
if '<p hidden>' in row and '</p>' in row:
|
||||||
|
start = row.find('<p hidden>') + 10
|
||||||
|
end = row.find('</p>', start)
|
||||||
|
title = row[start:end]
|
||||||
|
|
||||||
|
if title not in seen_titles:
|
||||||
|
seen_titles.add(title)
|
||||||
|
deduplicated.append(row)
|
||||||
|
else:
|
||||||
|
# Skip duplicate
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# If we can't find the hidden title, keep the row anyway
|
||||||
|
deduplicated.append(row)
|
||||||
|
|
||||||
|
# Update html_table with deduplicated content
|
||||||
|
self.html_table = deduplicated
|
||||||
|
return len(self.html_table)
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
""" Default run """
|
""" Default run """
|
||||||
|
|||||||
@@ -319,7 +319,6 @@ Into The White (2012) [DONE]
|
|||||||
Trash (2014) [DONE]
|
Trash (2014) [DONE]
|
||||||
First Man (2018) [DONE]
|
First Man (2018) [DONE]
|
||||||
Full Metal Jacket (1987) [*]
|
Full Metal Jacket (1987) [*]
|
||||||
Woman at War (2018) [*]
|
|
||||||
Roman J. Israel, Esq. (2017) [DONE]
|
Roman J. Israel, Esq. (2017) [DONE]
|
||||||
Venom (2018) [DONE]
|
Venom (2018) [DONE]
|
||||||
Moby Dick (1998) <TV Mini Series> [DONE]
|
Moby Dick (1998) <TV Mini Series> [DONE]
|
||||||
@@ -381,7 +380,6 @@ Lean on Me (1989) [DONE]
|
|||||||
Let Him Go (2020) [DONE]
|
Let Him Go (2020) [DONE]
|
||||||
Super Lopez (2018) [DONE]
|
Super Lopez (2018) [DONE]
|
||||||
Midnight Special (2016) [DONE]
|
Midnight Special (2016) [DONE]
|
||||||
The Father (2020) [DONE]
|
|
||||||
Another Round (2020) [DONE]
|
Another Round (2020) [DONE]
|
||||||
Promising Young Woman (2020) [DONE]
|
Promising Young Woman (2020) [DONE]
|
||||||
La Daronne (2020) [DONE]
|
La Daronne (2020) [DONE]
|
||||||
@@ -412,3 +410,56 @@ Fresh (2022) [DONE]
|
|||||||
Death on the Nile (2022) [DONE]
|
Death on the Nile (2022) [DONE]
|
||||||
All Quiet on the Western Front (2022) [DONE]
|
All Quiet on the Western Front (2022) [DONE]
|
||||||
Triangle of Sadness (2022) [DONE]
|
Triangle of Sadness (2022) [DONE]
|
||||||
|
Jerry and Marge Go Large (2022) [DONE]
|
||||||
|
Fighting With My Family (2019) [DONE]
|
||||||
|
The Woman King (2022) [DONE]
|
||||||
|
Capernaum (2018) [DONE]
|
||||||
|
The Menu (2022) [DONE]
|
||||||
|
Rye Lane (2023) [DONE]
|
||||||
|
Sisu (2022) [DONE]
|
||||||
|
Zodiac (2007) [DONE]
|
||||||
|
Legend (2015) [DONE]
|
||||||
|
Luther The Fallen Sun (2023) [DONE]
|
||||||
|
Mr. Morgan's Last Love (2013) [DONE]
|
||||||
|
Sweet November (2001) [DONE]
|
||||||
|
The Debt (2010) [DONE]
|
||||||
|
Don Jon (2013) [DONE]
|
||||||
|
Nefarious (2023) [DONE]
|
||||||
|
Oppenhaimer (2023) [DONE]
|
||||||
|
No Hard Feelings (2023) [DONE]
|
||||||
|
Powder (1995) [DONE]
|
||||||
|
The Life of David Gale (2003) [DONE]
|
||||||
|
Crazy, Stupid, Love. (2011) [DONE]
|
||||||
|
Before We Go (2014) [DONE]
|
||||||
|
The Banshees of Inisherin (2022) [DONE]
|
||||||
|
Three Thousand Years of Longing (2022) [DONE]
|
||||||
|
Hardcore Henry (2015) [DONE]
|
||||||
|
Chalga (2023) [DONE]
|
||||||
|
Dumb Money (2023) [DONE]
|
||||||
|
Society of the Snow (2023) [DONE]
|
||||||
|
The Aviator's Wife (1981) [*]
|
||||||
|
Past Lives (2023) [DONE]
|
||||||
|
Comandante (2023) [DONE]
|
||||||
|
The Lost Boys (1987) [DONE]
|
||||||
|
Blade Runner (2049) [DONE]
|
||||||
|
The Lunchbox (2013) [DONE]
|
||||||
|
Half Nelson (2006) [DONE]
|
||||||
|
The Dirt (2019) [DONE]
|
||||||
|
Manchester by the Sea (2016) [DONE]
|
||||||
|
The Perks of Being a Wallflower (2012) [DONE]
|
||||||
|
The Ministry of Ungentlemanly Warfare (2024) [DONE]
|
||||||
|
Wings of Desire (1987) [*]
|
||||||
|
Cadillac Records (2008) [DONE]
|
||||||
|
The Substance (2024) [DONE]
|
||||||
|
Don't Look Up (2021) [DONE]
|
||||||
|
La Vie en Rose (2007) [DONE]
|
||||||
|
The Peanut Butter Falcon (2019) [DONE]
|
||||||
|
One Battle After Another (2025) [DONE]
|
||||||
|
The Master and Margarita (2024) [DONE]
|
||||||
|
Woman at War (2018) [DONE]
|
||||||
|
Good Fortune (2025) [DONE]
|
||||||
|
The Age of Disclosure (2025) [*]
|
||||||
|
Evil Does Not Exist (2023) [DONE]
|
||||||
|
I.S.S. (2023) [*]
|
||||||
|
Black Cat, White Cat (1998) [DONE]
|
||||||
|
Nuremberg (2025) [DONE]
|
||||||
|
|||||||
Reference in New Issue
Block a user