Improve movie list generator, add more movies

Signed-off-by: Bogomil Vasilev <smirky@smirky.net>
2022-12-07 23:36:36 +02:00
parent 4bfc45c100
commit 3e7dafb4b2
2 changed files with 62 additions and 36 deletions
@@ -12,9 +12,9 @@ import sys
 import time
 import threading
 from pathlib import Path
 from http.client import IncompleteRead
 import progressbar
 from imdb import IMDb
 from imdb._exceptions import IMDbParserError
 class MovieList:
@@ -59,27 +59,35 @@ class MovieList:
        # Scan IMDb for a given movie and append it to the html
        # This collects rating, genres, official name and a hyperlink
        imdb = IMDb()
        save_stdout = sys.stdout
        with open(os.devnull, 'wb') as sys.stdout:
        while True:
            try:
                query = imdb.search_movie(f'{arg["title"]} {arg["year"]}')
                break
-                except IncompleteRead:
+            except IMDbParserError as exc:
-                    pass
+                query = []
-        sys.stdout = save_stdout
+                #print(exc)
                break
            except Exception as exc:
                #print(f'error: {exc.__class__.__name__}: {arg["title"]}')
                time.sleep(10)
        movie = None
        for entry in query:
-            has_minimum_keys = True
+            #print(entry)
-            for key in ['kind', 'year', 'title']:
+            imdb.update(entry)
-                if key not in entry.keys():
+            # in case any of these keys is missing in the query, continue
-                    has_minimum_keys = False
+            if not all(key in entry.keys() for key in ['kind', 'year', 'title']):
-            if not has_minimum_keys:
+                #print(f'missing key {entry.keys()}')
                continue
            if arg['status'] == 'DONE' and 'rating' not in entry.keys():
                continue
            # Try to eliminate episode results
-            if [i for i in entry.keys() if 'episode' in i.lower()] or \
+            # Must not have "episode" in the object keys
-                    'episode' in entry['title'].lower():
+            # Must not have "episode" in the query title key,
            # unless "episode" is in the query search string
            if [i for i in entry.keys() if 'episode' in i.lower()] or (
                    'episode' in entry['title'].lower() and \
                    'episode' not in arg['title'].lower()):
                continue
            if entry['kind'].lower() == arg['kind'].lower():
                movie = entry
@@ -91,7 +99,6 @@ class MovieList:
                'year': arg['year'],
                'dummy': None
            }
        if 'genres' not in movie.keys():
            movie['genres'] = ['N/A']
        if 'rating' not in movie.keys():
@@ -101,7 +108,7 @@ class MovieList:
                f'<a href="https://www.imdb.com/title/tt{movie.movieID}" target="_blank">{movie["title"]}</a>'
        self.html_table[index] = (
            f'\n{" "*8}<tr><td>{index + 1}</td>'
-            f'<td><p hidden>{movie["title"]}</p>{html_title_td}</td>'
+            f'<td><p hidden>{arg["title"]}</p>{html_title_td}</td>'
            f'<td>{movie["year"]}</td><td align="center">{movie["rating"]}</td>'
            f'<td>{", ".join(movie["genres"])}</td>'
            f'<td align="center">{arg["status"]}</td></tr>'
@@ -124,7 +131,7 @@ class MovieList:
            for raw_line in mlist_raw.splitlines():
                self.movie_list.update({
                    len(self.movie_list): {
-                        'title': raw_line.split('(', 1)[0].strip(),
+                        'title': raw_line[0:next((i for i, ch in enumerate(raw_line) if ch in {'<', '('}), None) - 1],
                        'kind': raw_line[raw_line.find('<')+1:raw_line.rfind('>')+1].strip('<>') or 'movie',
                        'year': raw_line[raw_line.find('(')+1:raw_line.find(')')],
                        'status': raw_line[raw_line.find('[')+1:raw_line.find(']')],
@@ -135,28 +142,46 @@ class MovieList:
        # Progress bar. Enough said
        pbar = progressbar.ProgressBar(max_value=len(self.movie_list))
        for idx, movie in self.movie_list.items():
-            match = [html_row for html_row in self.prev_html if movie['title'] in html_row]
+            match = [html_row for html_row in self.prev_html if movie['title'] in html_row and 'N/A' not in html_row]
            if match:
                # Update movies as DONE in case of change
                match = match[0].replace('*', movie['status'])
                # Directly insert the current HTML line from the older output
                self.html_table[idx] = \
                        f'\n{" "*8}<tr><td>{idx + 1}</td>{match[match.find("</td>") + 5:]}'
-                pbar.update(idx + 1)
+                pbar.increment()
            else:
                thread = threading.Thread(target=self._worker, args=(movie, idx))
                self.threads.append(thread)
                thread.start()
                pbar.update(idx+1)
                time.sleep(0.2)
                if len(self.threads) % 16 == 0:
                    time.sleep(6)
-        for thread in self.threads:
+        max_threads = 10
-            thread.join()
+        while self.threads:
            threads_alive = self.get_alive_threads()
            threads_to_be_started = [i for i in self.threads if i not in threads_alive]
            for idx in range(max_threads if max_threads < len(threads_to_be_started) else len(threads_to_be_started)):
                threads_to_be_started[idx].start()
                pbar.increment()
                time.sleep(1)
            time.sleep(1)
            self.delete_finished_threads()
        self.html += ''.join(self.html_table)
        return True
    def delete_finished_threads(self):
        for idx, thread in enumerate(self.threads):
            if not thread.is_alive() and thread._started.is_set():
                thread.join()
                self.threads[idx] = None
        self.threads = list(filter(lambda a: a is not None, self.threads))
    def get_alive_threads(self):
        threads = []
        for thread in self.threads:
            if thread.is_alive() or thread._started.is_set():
                threads.append(thread)
        return threads
    def write(self, dst=None):
        """ Write the HTML list to index.html """
        out_path = dst if dst else self.dst
@@ -18,7 +18,7 @@ Mary and Max (2009)						[DONE]
 Lucky Number Slevin (2006)					[DONE]
 The Road (2009)							[DONE]
 Magnolia (1999)							[DONE]
-Il Mostro (1994)						[DONE]
+The Monster (1994)						[DONE]
 The Tiger and the Snow (2005)					[DONE]
 Lucy (2014)							[DONE]
 End of Watch (2012)						[DONE]
@@ -115,7 +115,7 @@ Star Wars: Episode III - Revenge of the Sith (2005)		[DONE]
 Star Wars: Episode IV - A New Hope (1977)			[DONE]
 Star Wars: Episode V - The Empire Strikes Back (1980)		[DONE]
 Star Wars: Episode VI - Return of the Jedi (1983)		[DONE]
-Star Wars: Episode: The Force Awakens (2015)			[DONE]
+Star Wars: Episode VII: The Force Awakens (2015)		[DONE]
 Rogue One: A Star Wars Story (2016)				[DONE]
 Men in Black (1997)						[DONE]
 Men in Black II (2002)						[DONE]
@@ -160,7 +160,7 @@ Unforgiven (1992)						[DONE]
 Scarface (1983)							[DONE]
 Die Hard (1988)							[DONE]
 Die Hard 2 (1990)						[DONE]
-Die Hard: With a Vengeance (1995)				[DONE]
+Die Hard with a Vengeance (1995)				[DONE]
 Live Free or Die Hard (2007)					[DONE]
 A Good Day to Die Hard (2013)					[DONE]
 Die Another Day (2002)						[DONE]
@@ -184,13 +184,13 @@ Pan (2015)							[DONE]
 The Cobbler (2014)						[DONE]
 The Conjuring (2013)						[DONE]
 Starred Up (2013)						[DONE]
-Kraftidioten (2014)						[DONE]
+In Order of Disappearance (2014)				[DONE]
 The Imitation Game (2014)					[DONE]
 Begin Again (2013)						[DONE]
 A Walk Among the Tombstones (2014)				[DONE]
 Detachment (2011)						[DONE]
 Identity (2003)							[DONE]
-Turist (Force Majeure) (2014)					[DONE]
+Force Majeure (2014)						[DONE]
 Gone Girl (2014)						[DONE]
 The Equalizer (2014)						[DONE]
 The Equalizer 2 (2018)						[DONE]
@@ -351,7 +351,6 @@ Togo (2019)							[*]
 Knives Out (2019)						[DONE]
 Children of Men (2006)						[DONE]
 Bogowie (2014)							[DONE]
 Spiral (2021)							[*]
 Ip Man 4 (2019)							[DONE]
 The Collini Case (2019)						[DONE]
 The Traitor (2019)						[DONE]
@@ -397,7 +396,7 @@ Arrival (2016)							[DONE]
 Duel (1971)							[DONE]
 The Courier (2020)						[DONE]
 The Bone Collector (1999)					[DONE]
-Oslo (2021)							[DONE]
+Oslo <TV Movie> (2021)						[DONE]
 Wrath of Man (2021)						[DONE]
 Nobody (2021)							[DONE]
 Голата истина за група Жигули (2021)				[DONE]
@@ -411,3 +410,5 @@ Boss Level (2020)						[DONE]
 Boite Noire (2021)						[DONE]
 Fresh (2022)							[DONE]
 Death on the Nile (2022)					[DONE]
 All Quiet on the Western Front (2022)				[DONE]
 Triangle of Sadness (2022)					[DONE]