add movies and update the script
Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
This commit is contained in:
546
movie_list/gen_movie_list.py
Executable file
546
movie_list/gen_movie_list.py
Executable file
@@ -0,0 +1,546 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
1. Import a movie_list txt file
|
||||
2. Query IMDb for each entry, retrieving actual movie name, rating and genres
|
||||
3. Generate an HTML table from the IMDb data
|
||||
4. Store the HTML in index.html
|
||||
"""
|
||||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import threading
|
||||
from pathlib import Path
|
||||
import progressbar
|
||||
from imdb import IMDb
|
||||
from imdb._exceptions import IMDbParserError, IMDbDataAccessError
|
||||
|
||||
|
||||
class MovieList:
|
||||
""" Class to generate a movie list HTML table """
|
||||
def __init__(self, src=None, dst=None):
|
||||
self.prev_html = []
|
||||
self.html = """<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>My Movie List</title>
|
||||
<link rel="stylesheet" type="text/css" href="https://cdn.datatables.net/1.10.24/css/jquery.dataTables.min.css">
|
||||
<script src="../jquery-3.7.1.min.js"></script>
|
||||
<script src="https://cdn.datatables.net/1.10.24/js/jquery.dataTables.min.js"></script>
|
||||
<style>
|
||||
* {
|
||||
margin: 0;
|
||||
padding: 0;
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
body {
|
||||
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, sans-serif;
|
||||
background: transparent;
|
||||
height: 100vh;
|
||||
padding: 0.5rem;
|
||||
color: #fff;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.container {
|
||||
background: rgba(0, 0, 0, 0.6);
|
||||
backdrop-filter: blur(10px);
|
||||
border-radius: 8px;
|
||||
border: 1px solid rgba(255, 255, 255, 0.1);
|
||||
padding: 0.75rem;
|
||||
height: 100%;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
h1 {
|
||||
color: #fff;
|
||||
margin-bottom: 0.75rem;
|
||||
font-size: 1.5rem;
|
||||
font-weight: 600;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
.dataTables_wrapper {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.dataTables_filter {
|
||||
margin-bottom: 0.5rem;
|
||||
text-align: right;
|
||||
}
|
||||
|
||||
.dataTables_filter label {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: flex-end;
|
||||
gap: 0.5rem;
|
||||
font-size: 0.9rem;
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
}
|
||||
|
||||
.dataTables_filter input {
|
||||
padding: 0.4rem 0.75rem;
|
||||
border: 1px solid rgba(255, 255, 255, 0.2);
|
||||
border-radius: 6px;
|
||||
background: rgba(0, 0, 0, 0.3);
|
||||
color: #fff;
|
||||
font-size: 0.9rem;
|
||||
width: 250px;
|
||||
}
|
||||
|
||||
.dataTables_filter input::placeholder {
|
||||
color: rgba(255, 255, 255, 0.5);
|
||||
}
|
||||
|
||||
.dataTables_filter input:focus {
|
||||
outline: none;
|
||||
border-color: rgba(255, 255, 255, 0.4);
|
||||
background: rgba(0, 0, 0, 0.4);
|
||||
}
|
||||
|
||||
.dataTables_info {
|
||||
padding: 0.5rem 0;
|
||||
color: rgba(255, 255, 255, 0.6);
|
||||
font-size: 0.85rem;
|
||||
}
|
||||
|
||||
.dataTables_scroll {
|
||||
flex: 1;
|
||||
overflow: hidden;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
}
|
||||
|
||||
.dataTables_scrollHead {
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.dataTables_scrollBody {
|
||||
flex: 1;
|
||||
overflow-y: auto !important;
|
||||
}
|
||||
|
||||
.dataTables_scrollBody::-webkit-scrollbar {
|
||||
width: 6px;
|
||||
}
|
||||
|
||||
.dataTables_scrollBody::-webkit-scrollbar-track {
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
}
|
||||
|
||||
.dataTables_scrollBody::-webkit-scrollbar-thumb {
|
||||
background: rgba(255, 255, 255, 0.2);
|
||||
border-radius: 3px;
|
||||
}
|
||||
|
||||
table.dataTable.stripe tbody tr.odd,
|
||||
table.dataTable.display tbody tr.odd,
|
||||
table.dataTable tbody tr {
|
||||
background: transparent !important;
|
||||
}
|
||||
|
||||
table.dataTable.hover tbody tr:hover,
|
||||
table.dataTable.display tbody tr:hover {
|
||||
background: rgba(255, 255, 255, 0.1) !important;
|
||||
}
|
||||
|
||||
table.dataTable tbody td {
|
||||
background: transparent !important;
|
||||
}
|
||||
|
||||
#sortable {
|
||||
width: 100%;
|
||||
border-collapse: collapse;
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
#sortable thead {
|
||||
background: rgba(0, 0, 0, 0.4);
|
||||
}
|
||||
|
||||
#sortable thead th {
|
||||
padding: 0.5rem 0.75rem;
|
||||
text-align: left;
|
||||
font-weight: 600;
|
||||
font-size: 0.85rem;
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
cursor: pointer;
|
||||
border-bottom: 1px solid rgba(255, 255, 255, 0.1);
|
||||
white-space: nowrap;
|
||||
}
|
||||
|
||||
#sortable thead th:hover {
|
||||
background: rgba(255, 255, 255, 0.05);
|
||||
}
|
||||
|
||||
#sortable thead th.sorting,
|
||||
#sortable thead th.sorting_asc,
|
||||
#sortable thead th.sorting_desc {
|
||||
padding-right: 1.5rem;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
#sortable thead th.sorting:after {
|
||||
content: '⇅';
|
||||
position: absolute;
|
||||
right: 0.5rem;
|
||||
top: 50%;
|
||||
transform: translateY(-50%);
|
||||
font-size: 0.75rem;
|
||||
color: rgba(255, 255, 255, 0.5);
|
||||
}
|
||||
|
||||
#sortable thead th.sorting_asc:after {
|
||||
content: '↑';
|
||||
position: absolute;
|
||||
right: 0.5rem;
|
||||
top: 50%;
|
||||
transform: translateY(-50%);
|
||||
font-size: 0.75rem;
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
}
|
||||
|
||||
#sortable thead th.sorting_desc:after {
|
||||
content: '↓';
|
||||
position: absolute;
|
||||
right: 0.5rem;
|
||||
top: 50%;
|
||||
transform: translateY(-50%);
|
||||
font-size: 0.75rem;
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
}
|
||||
|
||||
#sortable tbody tr {
|
||||
border-bottom: 1px solid rgba(255, 255, 255, 0.05);
|
||||
transition: background 0.15s ease;
|
||||
background: transparent !important;
|
||||
}
|
||||
|
||||
#sortable tbody tr:hover {
|
||||
background: rgba(255, 255, 255, 0.1) !important;
|
||||
}
|
||||
|
||||
#sortable tbody td {
|
||||
padding: 0.5rem 0.75rem;
|
||||
font-size: 0.9rem;
|
||||
color: rgba(255, 255, 255, 0.9);
|
||||
background: transparent !important;
|
||||
}
|
||||
|
||||
#sortable tbody td:first-child {
|
||||
color: rgba(255, 255, 255, 0.5);
|
||||
font-size: 0.85rem;
|
||||
width: 50px;
|
||||
}
|
||||
|
||||
#sortable tbody td a {
|
||||
color: rgba(135, 206, 250, 0.9);
|
||||
text-decoration: none;
|
||||
}
|
||||
|
||||
#sortable tbody td a:hover {
|
||||
color: rgba(135, 206, 250, 1);
|
||||
text-decoration: underline;
|
||||
}
|
||||
|
||||
#sortable tbody td p[hidden] {
|
||||
display: none;
|
||||
}
|
||||
|
||||
#sortable tbody td:nth-child(4) {
|
||||
color: rgba(255, 193, 7, 0.9);
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.timestamp {
|
||||
text-align: center;
|
||||
margin-top: 0.5rem;
|
||||
padding-top: 0.5rem;
|
||||
border-top: 1px solid rgba(255, 255, 255, 0.1);
|
||||
color: rgba(255, 255, 255, 0.5);
|
||||
font-size: 0.8rem;
|
||||
}
|
||||
</style>
|
||||
<script>
|
||||
$(document).ready(function(){
|
||||
$('#sortable').DataTable({
|
||||
"paging": false,
|
||||
"info": true,
|
||||
"searching": true,
|
||||
"ordering": true,
|
||||
"order": [[0, "asc"]],
|
||||
"scrollY": "calc(100vh - 200px)",
|
||||
"scrollCollapse": true,
|
||||
"language": {
|
||||
"search": "Search:",
|
||||
"info": "Showing _TOTAL_ movies",
|
||||
"infoEmpty": "No movies",
|
||||
"infoFiltered": "(filtered from _MAX_)"
|
||||
}
|
||||
});
|
||||
});
|
||||
</script>
|
||||
</head>
|
||||
<body>
|
||||
<base target="_parent" />
|
||||
<div class="container">
|
||||
<h1>🎬 My Movie Collection</h1>
|
||||
<table id="sortable" class="sortable">
|
||||
<thead>
|
||||
<tr>
|
||||
<th>#</th>
|
||||
<th>Title</th>
|
||||
<th>Year</th>
|
||||
<th>Rating</th>
|
||||
<th>Genre</th>
|
||||
<th>Status</th>
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>"""
|
||||
self.src = src
|
||||
self.dst = Path(dst) if dst else Path(os.path.dirname(sys.argv[0])) / 'index.html'
|
||||
self.movie_list = []
|
||||
self.threads = []
|
||||
self.read_prev_output()
|
||||
self.html_table = None
|
||||
|
||||
def _worker(self, arg, index):
|
||||
# Scan IMDb for a given movie and append it to the html
|
||||
# This collects rating, genres, official name and a hyperlink
|
||||
imdb = IMDb()
|
||||
first_run = True
|
||||
while True:
|
||||
if not first_run:
|
||||
time.sleep(10)
|
||||
else:
|
||||
first_run = False
|
||||
try:
|
||||
query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
|
||||
break
|
||||
except IMDbDataAccessError as imdb_data_exc:
|
||||
exc = str(imdb_data_exc)
|
||||
if '503' in exc:
|
||||
sys.stderr.write('503 - Service Unavailable, retrying...')
|
||||
elif '403' in exc:
|
||||
sys.stderr.write('403 - Forbidden, retrying...\n')
|
||||
query = []
|
||||
time.sleep(10)
|
||||
except IMDbParserError as imdb_parser_exc:
|
||||
query = []
|
||||
break
|
||||
except Exception as exc:
|
||||
time.sleep(10)
|
||||
|
||||
movie = None
|
||||
for entry in query:
|
||||
try:
|
||||
imdb.update(entry)
|
||||
except Exception as e:
|
||||
sys.stderr.write('update err')
|
||||
# in case any of these keys is missing in the query, continue
|
||||
if not all(key in entry.keys() for key in ['kind', 'year', 'title']):
|
||||
continue
|
||||
if arg['status'] == 'DONE' and 'rating' not in entry.keys():
|
||||
continue
|
||||
# Try to eliminate episode results
|
||||
if [i for i in entry.keys() if 'episode' in i.lower()] or (
|
||||
'episode' in entry['title'].lower() and \
|
||||
'episode' not in arg['title'].lower()):
|
||||
continue
|
||||
if entry['kind'].lower() == arg['kind'].lower():
|
||||
movie = entry
|
||||
break
|
||||
if not movie:
|
||||
movie = {
|
||||
'title': arg['title'],
|
||||
'kind': arg['kind'],
|
||||
'year': arg['year'],
|
||||
'dummy': None
|
||||
}
|
||||
if 'genres' not in movie.keys():
|
||||
movie['genres'] = ['N/A']
|
||||
if 'rating' not in movie.keys():
|
||||
movie['rating'] = 'N/A'
|
||||
|
||||
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
|
||||
f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
|
||||
|
||||
self.html_table[index] = (
|
||||
f'\n <tr>'
|
||||
f'<td data-label="#">{index + 1}</td>'
|
||||
f'<td data-label="Title"><p hidden>{arg["title"]}</p>{html_title_td}</td>'
|
||||
f'<td data-label="Year">{movie["year"]}</td>'
|
||||
f'<td data-label="Rating" align="center">{movie["rating"]}</td>'
|
||||
f'<td data-label="Genre">{", ".join(movie["genres"])}</td>'
|
||||
f'<td data-label="Status" align="center">{arg["status"]}</td>'
|
||||
f'</tr>'
|
||||
)
|
||||
|
||||
def gen(self):
|
||||
""" Generate an HTML list based on input, using a threaded worker """
|
||||
if not self.src:
|
||||
self.src = Path(os.path.dirname(sys.argv[0])) / 'movie_list'
|
||||
else:
|
||||
self.src = Path(self.src)
|
||||
if not self.src.exists():
|
||||
sys.stderr.write(f'error: input does not exist - {self.src}\n')
|
||||
return False
|
||||
|
||||
self.movie_list = {}
|
||||
# Open the movie list & split the columns
|
||||
with open(self.src, 'r', encoding='utf-8') as fp_handle:
|
||||
mlist_raw = fp_handle.read()
|
||||
for raw_line in mlist_raw.splitlines():
|
||||
# In case the line is empty
|
||||
if not raw_line:
|
||||
continue
|
||||
self.movie_list.update({
|
||||
len(self.movie_list): {
|
||||
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
|
||||
'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
|
||||
'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
|
||||
'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
|
||||
}
|
||||
})
|
||||
self.html_table = [None] * len(self.movie_list)
|
||||
|
||||
# Progress bar
|
||||
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
|
||||
for idx, movie in self.movie_list.items():
|
||||
# More precise matching - look for the hidden <p> tag with exact title
|
||||
match = [html_row for html_row in self.prev_html
|
||||
if f'<p hidden>{movie["title"]}</p>' in html_row
|
||||
and 'N/A' not in html_row]
|
||||
if match:
|
||||
# Update the index and status from the cached row
|
||||
match_str = match[0]
|
||||
# Replace the status (* -> DONE or vice versa)
|
||||
match_str = match_str.replace('*', movie['status']).replace('DONE', movie['status'])
|
||||
# Update the index number
|
||||
if '<td data-label="#">' in match_str:
|
||||
# Extract everything after the index cell
|
||||
after_index = match_str.split('</td>', 1)[1] if '</td>' in match_str else match_str
|
||||
self.html_table[idx] = f'\n <tr><td data-label="#">{idx + 1}</td>{after_index}'
|
||||
else:
|
||||
self.html_table[idx] = match_str
|
||||
pbar.increment()
|
||||
else:
|
||||
thread = threading.Thread(target=self._worker, args=(movie, idx))
|
||||
self.threads.append(thread)
|
||||
|
||||
max_threads = 10
|
||||
while self.threads:
|
||||
threads_alive = self.get_alive_threads()
|
||||
threads_to_be_started = [i for i in self.threads if i not in threads_alive]
|
||||
for idx in range(max_threads if max_threads < len(threads_to_be_started) else len(threads_to_be_started)):
|
||||
threads_to_be_started[idx].start()
|
||||
pbar.increment()
|
||||
time.sleep(2)
|
||||
time.sleep(2)
|
||||
self.delete_finished_threads()
|
||||
|
||||
self.html += ''.join(self.html_table)
|
||||
|
||||
# Deduplicate entries before writing
|
||||
num_entries = self.deduplicate_html()
|
||||
print(f"\nDeduplicated to {num_entries} unique entries")
|
||||
|
||||
self.html = self.html.split('</tbody>')[0] # Remove everything after tbody if it exists
|
||||
self.html += ''.join(self.html_table)
|
||||
|
||||
return True
|
||||
|
||||
def delete_finished_threads(self):
|
||||
for idx, thread in enumerate(self.threads):
|
||||
if not thread.is_alive() and thread._started.is_set():
|
||||
thread.join()
|
||||
self.threads[idx] = None
|
||||
self.threads = list(filter(lambda a: a is not None, self.threads))
|
||||
|
||||
def get_alive_threads(self):
|
||||
threads = []
|
||||
for thread in self.threads:
|
||||
if thread.is_alive() or thread._started.is_set():
|
||||
threads.append(thread)
|
||||
return threads
|
||||
|
||||
def write(self, dst=None):
|
||||
""" Write the HTML list to index.html """
|
||||
out_path = dst if dst else self.dst
|
||||
timestamp = time.strftime('%Y-%m-%d %H:%M:%S', time.gmtime())
|
||||
self.html += f'''
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
<div class="timestamp">Generated {timestamp} UTC</div>
|
||||
</body>
|
||||
</html>'''
|
||||
with open(out_path, 'wb') as fp_handle:
|
||||
fp_handle.write(self.html.encode('utf8'))
|
||||
|
||||
def read_prev_output(self):
|
||||
""" Import a previous HTML table """
|
||||
if self.dst.exists():
|
||||
with open(self.dst, 'rb') as fp_handle:
|
||||
self.prev_html = fp_handle.read().decode('utf8').split('\n')
|
||||
|
||||
def deduplicate_html(self):
|
||||
""" Remove duplicate entries from html_table based on movie titles """
|
||||
seen_titles = set()
|
||||
deduplicated = []
|
||||
|
||||
for idx, row in enumerate(self.html_table):
|
||||
if row is None:
|
||||
continue
|
||||
|
||||
# Extract the hidden title from the row
|
||||
if '<p hidden>' in row and '</p>' in row:
|
||||
start = row.find('<p hidden>') + 10
|
||||
end = row.find('</p>', start)
|
||||
title = row[start:end]
|
||||
|
||||
if title not in seen_titles:
|
||||
seen_titles.add(title)
|
||||
deduplicated.append(row)
|
||||
else:
|
||||
# Skip duplicate
|
||||
continue
|
||||
else:
|
||||
# If we can't find the hidden title, keep the row anyway
|
||||
deduplicated.append(row)
|
||||
|
||||
# Update html_table with deduplicated content
|
||||
self.html_table = deduplicated
|
||||
return len(self.html_table)
|
||||
|
||||
|
||||
def main():
|
||||
""" Default run """
|
||||
src = dst = None
|
||||
if len(sys.argv) > 3:
|
||||
sys.stderr.write(f'error: max 2 variables, {len(sys.argv)-1} given!\n')
|
||||
sys.exit(1)
|
||||
|
||||
if len(sys.argv) > 1:
|
||||
src = sys.argv[1]
|
||||
if len(sys.argv) == 3:
|
||||
dst = sys.argv[2]
|
||||
|
||||
mlist = MovieList(src=src, dst=dst)
|
||||
if mlist.gen():
|
||||
mlist.write(dst=dst)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user