diff --git a/movie_list/index.py b/movie_list/index.py
index 1d26fe3..dfe1144 100755
--- a/movie_list/index.py
+++ b/movie_list/index.py
@@ -14,40 +14,300 @@ import threading
from pathlib import Path
import progressbar
from imdb import IMDb
-from imdb._exceptions import IMDbParserError
+from imdb._exceptions import IMDbParserError, IMDbDataAccessError
class MovieList:
""" Class to generate a movie list HTML table """
def __init__(self, src=None, dst=None):
self.prev_html = []
- self.html = """
-
- My Movie List
-
-
-
-
+
+
+
-
-
-
-
-
+ });
+
+
+
-
-
-
-
Index
Title
Year
IMDb Rating
Genre
Status
-
-
- """
+
+
🎬 My Movie Collection
+
+
+
+
#
+
Title
+
Year
+
Rating
+
Genre
+
Status
+
+
+ """
self.src = src
self.dst = Path(dst) if dst else Path(os.path.dirname(sys.argv[0])) / 'index.html'
self.movie_list = []
@@ -59,32 +319,41 @@ class MovieList:
# Scan IMDb for a given movie and append it to the html
# This collects rating, genres, official name and a hyperlink
imdb = IMDb()
+ first_run = True
while True:
+ if not first_run:
+ time.sleep(10)
+ else:
+ first_run = False
try:
query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
break
- except IMDbParserError as exc:
+ except IMDbDataAccessError as imdb_data_exc:
+ exc = str(imdb_data_exc)
+ if '503' in exc:
+ sys.stderr.write('503 - Service Unavailable, retrying...')
+ elif '403' in exc:
+ sys.stderr.write('403 - Forbidden, retrying...\n')
+ query = []
+ time.sleep(10)
+ except IMDbParserError as imdb_parser_exc:
query = []
- #print(exc)
break
except Exception as exc:
- #print(f'error: {exc.__class__.__name__}: {arg["title"]}')
time.sleep(10)
movie = None
for entry in query:
- #print(entry)
- imdb.update(entry)
+ try:
+ imdb.update(entry)
+ except Exception as e:
+ sys.stderr.write('update err')
# in case any of these keys is missing in the query, continue
if not all(key in entry.keys() for key in ['kind', 'year', 'title']):
- #print(f'missing key {entry.keys()}')
continue
if arg['status'] == 'DONE' and 'rating' not in entry.keys():
continue
# Try to eliminate episode results
- # Must not have "episode" in the object keys
- # Must not have "episode" in the query title key,
- # unless "episode" is in the query search string
if [i for i in entry.keys() if 'episode' in i.lower()] or (
'episode' in entry['title'].lower() and \
'episode' not in arg['title'].lower()):
@@ -106,12 +375,16 @@ class MovieList:
html_title_td = movie['title'] if 'dummy' in movie.keys() else \
f'{movie["title"]}'
+
self.html_table[index] = (
- f'\n{" "*8}
{index + 1}
'
- f'
{arg["title"]}
{html_title_td}
'
- f'
{movie["year"]}
{movie["rating"]}
'
- f'
{", ".join(movie["genres"])}
'
- f'
{arg["status"]}
'
+ f'\n
'
+ f'
{index + 1}
'
+ f'
{arg["title"]}
{html_title_td}
'
+ f'
{movie["year"]}
'
+ f'
{movie["rating"]}
'
+ f'
{", ".join(movie["genres"])}
'
+ f'
{arg["status"]}
'
+ f'
'
)
def gen(self):
@@ -129,6 +402,9 @@ class MovieList:
with open(self.src, 'r', encoding='utf-8') as fp_handle:
mlist_raw = fp_handle.read()
for raw_line in mlist_raw.splitlines():
+ # In case the line is empty
+ if not raw_line:
+ continue
self.movie_list.update({
len(self.movie_list): {
'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
@@ -139,16 +415,25 @@ class MovieList:
})
self.html_table = [None] * len(self.movie_list)
- # Progress bar. Enough said
+ # Progress bar
pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
for idx, movie in self.movie_list.items():
- match = [html_row for html_row in self.prev_html if movie['title'] in html_row and 'N/A' not in html_row]
+ # More precise matching - look for the hidden
tag with exact title
+ match = [html_row for html_row in self.prev_html
+ if f'
{movie["title"]}
' in html_row
+ and 'N/A' not in html_row]
if match:
- # Update movies as DONE in case of change
- match = match[0].replace('*', movie['status'])
- # Directly insert the current HTML line from the older output
- self.html_table[idx] = \
- f'\n{" "*8}
{idx + 1}
{match[match.find("") + 5:]}'
+ # Update the index and status from the cached row
+ match_str = match[0]
+ # Replace the status (* -> DONE or vice versa)
+ match_str = match_str.replace('*', movie['status']).replace('DONE', movie['status'])
+ # Update the index number
+ if '
' in match_str:
+ # Extract everything after the index cell
+ after_index = match_str.split('
', 1)[1] if '' in match_str else match_str
+ self.html_table[idx] = f'\n
')[0] # Remove everything after tbody if it exists
+ self.html += ''.join(self.html_table)
+
return True
def delete_finished_threads(self):
@@ -185,11 +478,14 @@ class MovieList:
def write(self, dst=None):
""" Write the HTML list to index.html """
out_path = dst if dst else self.dst
- # Just a fancy scrollbar for the html
- scroll = ''
- self.html += ('\n\t\n