#!/usr/bin/env python3 # coding: utf-8 # python3 works as well import sys assert sys.version.startswith("3."), "Requires python3" import os import re import json import time import argparse import locale import math import shutil import logging import hashlib from io import BytesIO from multiprocessing.dummy import Pool from urllib.parse import quote as urlencode from PIL import Image, ImageFilter import tmdbsimple as tmdb import requests import guessit def read_key(): if "TMDB_KEY" in os.environ.keys(): return os.environ["TMDB_KEY"] if "XDG_CONFIG_HOME" in os.environ.keys(): cfg_home = os.environ["XDG_CONFIG_HOME"] else: cfg_home = os.path.join(os.path.expanduser("~"), ".config") if os.path.exists(os.path.join(cfg_home, "tmdbkey")): return open(os.path.join(cfg_home, "tmdbkey"), "r").read().strip() if os.path.exists(os.path.join(os.path.expanduser("~"), ".tmdbkey")): return open(os.path.join(os.path.expanduser("~"), ".tmdbkey")).read().strip() raise Exception("No TMDB Key defined. " + "Set TMDB_KEY=.. or {} or ~/.tmdbkey file".format(cfg_home)) class TMDBCache(object): def __init__(self): logger_name = self.__class__.__module__ + "." + self.__class__.__name__ self.logger = logging.getLogger(logger_name) def _get_cachedir(self, movie_id): if "XDG_CACHE_HOME" in os.environ.keys(): cachedir = os.environ["XDG_CACHE_HOME"] else: cachedir = os.path.join(os.path.expanduser("~"), ".cache") cachedir = os.path.join(cachedir, "movies") cachedir = os.path.join(cachedir, movie_id) if not os.path.exists(cachedir): os.makedirs(cachedir) return cachedir def _cache(self, movie_id, key, callable_func): cachedir = self._get_cachedir(movie_id) cachefile = os.path.join(cachedir, key) if os.path.exists(cachefile): fh = open(cachefile, "rb") data = fh.read() fh.close() return data else: fh = open(cachefile, 'wb') data = callable_func() fh.write(data) fh.close() return data def _cache_json(self, movie_id, key, callable_func): def json_wrapper(): return json.dumps(callable_func()).encode('utf-8') return json.loads(self._cache(movie_id, key, json_wrapper).decode('utf-8')) def prune(self, movie_id): self.logger.debug("prune {}".format(movie_id)) keys = [ "tmdb_info.json", "tmdb_alternative_title.json", ] for key in keys: cachefile = os.path.join(self._get_cachedir(movie_id), key) if os.path.exists(cachefile): self.logger.warn("Remove {}".format(cachefile)) os.unlink(cachefile) else: self.logger.debug("Key in db {}".format(cachefile)) def infos(self, movie_id): try: self.logger.debug("movie %s", movie_id) return self._cache_json(movie_id, "tmdb_info.json", tmdb.Movies(movie_id).info) except Exception as e: raise Exception("Failed to query movie-id {id}: {reason}".format( id=movie_id, reason=str(e))) def tmdb_search(self, query): results = tmdb.Search().movie(query=query)["results"] results = map(lambda i: self.infos(str(i['id'])), results) return list(results) def alternative_title(self, movie_id, locale): """Returns the title in selected locale or original title otherwise""" try: key = movie_id + "tmdb_alternative_title.json" search = tmdb.Movies(movie_id).alternative_titles titles = self._cache_json(movie_id, key, search) alt_title = list(filter(lambda l: l["iso_3166_1"] == locale, titles["titles"])) if alt_title: return alt_title[0]["title"] else: infos = self.infos(movie_id) return infos["title"] or infos["original_title"] except Exception as e: raise Exception("Failed to query movie-id {id}: {reason}".format( id=movie_id, reason=str(e))) def poster(self, movie_id, f="w154"): poster_path = self.infos(movie_id)['poster_path'] assert poster_path, "No poster_path defined for {}".format(movie_id) url = "http://image.tmdb.org/t/p/{}/{}".format(f, poster_path) self.logger.debug("poster %s", poster_path) def download(): r = requests.get(url) # r.headers['content-type'] r.close() return r.content return self._cache(movie_id, "tmdb_poster_{}.jpg".format(f), download) def poster_low(self, movie_id, format="w92"): data = self.poster(movie_id, format) if not data: return None image = Image.open(BytesIO(data)) image = image.filter(ImageFilter.GaussianBlur(radius=1)) buf = BytesIO() image.save(buf, "JPEG", quality=18, optimize=True) return buf.getvalue() def _imdb_request(self, path, query): # see also https://github.com/richardasaurus/imdb-pie # nice library but a bit strange API, # so we chose to reimplement stuff here BASE_URI = 'app.imdb.com' API_KEY = '2wex6aeu6a8q9e49k7sfvufd6rhh0n' SHA1_KEY = hashlib.sha1(API_KEY.encode('utf8')).hexdigest() HEADERS = { 'user-agent': 'AppleWebKit/534.46 (KHTML, like Gecko) ' + 'Version/5.1 Mobile/9A405', } PARAMS = { "api": "v1", "appid": "iphone1_1", "apiPolicy": "app1_1", "apiKey": SHA1_KEY, "locale": "en_US", "timestamp": "{:.0f}".format(time.time()) } q = query.copy() q.update(PARAMS) return requests.get("https://{}{}".format(BASE_URI, path), params=q, headers=HEADERS) def imdb_movie(self, movie_id): def do_request(): r = self._imdb_request("/title/maindetails", {'tconst': movie_id}) assert r.status_code == 200, "Request must return status-code 200" data = json.loads(r.text) r.close() assert data is not None and data['data'], "Data must not be empty" assert 'rating' in data['data'] return data['data'] return self._cache_json(movie_id, "imdb_maindetails.json", do_request) def omdb_movie(self, movie_id): def do_request(): BASE_URI = "http://www.omdbapi.com/" params = {'i': movie_id, 'plot': 'short', 'r': 'json', 'tomatoes': 'true'} r = requests.get(BASE_URI, params=params) assert r.status_code == 200, "Request must return status-code 200" data = json.loads(r.text) r.close() assert data is not None and data['Response'] == 'True', "Data must not be empty" return data return self._cache_json(movie_id, "omdb.json", do_request) def weight_rating(infos): """ add 'rating' to all infos""" infos = list(infos) maxvotes = max(map(lambda i: i["vote_count"], infos)) for info in infos: f = math.sin(math.pi * (info["vote_average"]/10.0)) d = (float(info["vote_count"]) / maxvotes) - 0.5 info['rating'] = info["vote_average"] + 2 * d * f return infos def filter_filename(filename): """remove possibly everything except the movie name""" info = guessit.guess_movie_info(filename).to_dict() print("Parsed: {}".format(", ".join(map(lambda kv: "{}={}".format(*kv), info.items())))) if "title" in info: return info["title"] else: return os.path.basename(filename).replace(".", " ") class Protector(object): """ The Protector saves the caller from exception. All callable attributes of child are dynamically replaced by function returning 'None' in case of an exception - except for KeyboardInterrupt exceptions. >>> class Thrower(object): ... def some_func(self): ... raise Exception("I'm evil") >>> t = Thrower() >>> t.some_func() == None Traceback (most recent call last): Exception: I'm evil >>> p = Protector(t) >>> p.some_func() == None True """ def __init__(self, child): self.child = child def __getattr__(self, name): attr = getattr(self.child, name) def protected(*a, **kw): try: return attr(*a, **kw) except KeyboardInterrupt as e: raise e except Exception as e: logging.error("Error calling %s: '%s': '%s' with args '(%s, %s)'", name, type(e), e, a, kw) return None if callable(attr): return protected else: return attr def do_aka(args, imdb_ids): tmdbcache = TMDBCache() for (filename, imdb_id) in imdb_ids: print(tmdbcache.alternative_title(imdb_id, locale=args.lang)) def do_data(args, imdb_ids): def print_data(data, io, indent=0): if isinstance(data, dict): for (key, val) in data.items(): if val is None: continue else: io.write("\n{}{}: ".format(indent*" ", key)) print_data(val, io, indent+1) elif isinstance(data, list) and len(data) > 0: for val in data: print_data(val, io, indent+2) elif type(data) in (bool, str, int, float): io.write(str(data)) tmdbcache = TMDBCache() for (filename, imdb_id) in imdb_ids: print_data({"TMDB": tmdbcache.infos(imdb_id), "IMDB": tmdbcache.imdb_movie(imdb_id), "OMDB": tmdbcache.omdb_movie(imdb_id)}, sys.stdout) def do_year(args, imdb_ids): tmdbcache = TMDBCache() for (filename, imdb_id) in imdb_ids: print(tmdbcache.infos(imdb_id)["release_date"].split("-")[0]) def do_prune(args, imdb_ids): tmdbcache = TMDBCache() for (filename, imdb_id) in imdb_ids: tmdbcache.prune(imdb_id) def do_rating(args, imdb_ids): """Calculates a rating based on vote_average and vote_count""" tmdbcache = TMDBCache() infos = filter(lambda i: "vote_average" in i and "vote_count" in i, map(lambda fid: tmdbcache.infos(fid[1]), imdb_ids)) weight_rating(infos) for info in infos: print("{rating:.02f} {imdb_id} {title:30s} avg=" + "{vote_average:.1f} count={vote_count:.0f}".format(**info)) def do_index(args, imdb_ids): """creates a index website""" assert not os.path.exists("movies.html") and not os.path.exists("movies-files"), \ "movies.html or folder movies-files already exists" def write_data(callbackName): filename = "movies-files/data.js" if not os.path.exists(os.path.dirname(filename)): os.makedirs(os.path.dirname(filename)) out = open(filename, "w") out.write(callbackName) out.write("(") pool = Pool(processes=12) # parallel fetchers infos = pool.map(getInfo, imdb_ids) json.dump(list(filter(bool, infos)), out) out.write(")") out.close() return filename def getInfo(a): def get_tags(path): def find(): for root, dirs, files in os.walk(path): for curfile in files: if curfile.lower()[-3:] in ["mkv", "avi", "mp4"]: info = guessit.guess_movie_info(os.path.join(root, curfile)).to_dict() for key, val in info.items(): if key in ["format", "videoCodec", "releaseGroup", "container", "container"]: if not isinstance(val, list): val = [val] for lval in val: yield "{}:{}".format(key, lval) l = list(set(find())) l.sort() return l def poster(imdb_id): data = db.poster_low(imdb_id) if data: out = os.path.join("movies-files/poster", imdb_id+".jpg") open(out, "wb").write(data) return out return None def poster_grid(imdb_id): data = db.poster_low(imdb_id, format="w154") if data: out = os.path.join("movies-files/poster", imdb_id+"-grid.jpg") open(out, "wb").write(data) return out return None def compile_summary(imdb, omdb, tmdb): buf = [] if "directors_summary" in imdb: buf += ["By " + ", ".join(map(lambda i: i["name"]["name"], imdb["directors_summary"]))] if "cast_summary" in imdb: buf += ["Starring: " + ", ".join(map(lambda i: i["name"]["name"], imdb["cast_summary"]))] if omdb["Rated"] != "N/A": buf += ["Rated: " + omdb["Rated"]] return ". ".join(buf) path, imdb_id = a db = Protector(TMDBCache()) imdb = db.imdb_movie(imdb_id) omdb = db.omdb_movie(imdb_id) tmdb = db.infos(imdb_id) if not imdb or not omdb or not tmdb: print("Error in {} {}".format(path, imdb_id)) return None else: try: return { 'id': imdb_id, 'title': tmdb['title'], 'summary': compile_summary(imdb, omdb, tmdb), 'poster': poster(imdb_id), 'posterGrid': poster_grid(imdb_id), 'tagline': 'tagline' in imdb and imdb['tagline'] or None, 'plot': 'plot' in imdb and imdb['plot']['outline'] or None, 'website': 'homepage' in imdb and imdb['homepage'] or omdb['Website'] != 'N/A' and omdb['Website'] or None, 'release': 'release_date' in tmdb and tmdb['release_date'] or None, 'path': {'label': path, 'path': urlencode(path) + "/"}, # '/' suffix required here for some reason 'tags': get_tags(path), 'imdbRating': imdb['rating'], 'imdbVotes': imdb['num_votes'], 'omdbTomatoConsensus': (omdb['tomatoConsensus'] != 'N/A') and omdb['tomatoConsensus'] or None, 'omdbTomatoMeter': (omdb['tomatoMeter'] != 'N/A') and float(omdb['tomatoMeter']) or None, 'omdbTomatoRating': (omdb['tomatoRating'] != 'N/A') and float(omdb['tomatoRating']) or None, 'tmdbId': tmdb['id'], } except Exception as e: print("Error in {} {}: {}".format(path, imdb_id, str(e))) return None datadir = os.path.join(os.path.dirname(__file__), "html") index_files = os.path.join(datadir, "movies-files") index_html = os.path.join(datadir, "movies.html") index_html_grid = os.path.join(datadir, "movies-grid.html") shutil.copytree(index_files, "movies-files", copy_function=shutil.copyfile) shutil.copyfile(index_html, "movies.html") shutil.copyfile(index_html_grid, "movies-grid.html") os.mkdir("movies-files/poster") write_data("dataCb") class HelpAction(argparse._HelpAction): def __call__(self, parser, namespace, values, option_string=None): formatter = parser._get_formatter() formatter.add_usage(parser.usage, parser._actions, parser._mutually_exclusive_groups) formatter.start_section(parser._optionals.title) formatter.add_text(parser._optionals.description) formatter.add_arguments(parser._optionals._group_actions) formatter.end_section() subparsers_actions = [ action for action in parser._actions if isinstance(action, argparse._SubParsersAction)] for subparsers_action in subparsers_actions: # get all subparsers and print help subparsers = subparsers_action.choices for subaction in subparsers_action._get_subactions(): subparser = subparsers[subaction.dest] usage = formatter._format_actions_usage(subparser._actions, []) formatter.start_section("{} {} {}".format(formatter._prog, subaction.dest, usage)) formatter.add_text(subaction.help) formatter.add_arguments(subparser._positionals._group_actions) formatter.add_arguments(subparser._optionals._group_actions) formatter.end_section() print(formatter.format_help()) parser.exit(0) def do_rename(args): def substitute_filename(filename): """remove potentially harmful characters from filename""" return re.sub(u"[^a-zA-Z0-9()\- _#]", u"_", filename) def make_filename(result): main_title = result["alternative_title"] orig_title = result["original_title"] filename = "{main_title} ({result[year]})".format(**locals()) if main_title != orig_title: filename += " #{orig_title}".format(**locals()) filename += " #{result[imdb_id]}".format(**locals()) return filename for input_file in args.file: countrycode = locale.getdefaultlocale()[0][3:5] db = Protector(TMDBCache()) search_term = filter_filename(input_file) search_term = input("Search [{}]: ".format(search_term)) or search_term results = db.tmdb_search(search_term) results.sort(key=lambda i: i["popularity"], reverse=True) results = filter(lambda i: i["imdb_id"] and i["release_date"], results) results = list(results) select = None while not select: for (i, result) in zip(range(len(results)), results): result["i"] = i+1 result["year"] = result["release_date"].split("-")[0] print("{i} {title} ({year})" " (orig.: {original_title}) - check: " "http://www.imdb.com/title/{imdb_id}".format(**result)) result["alternative_title"] = db.alternative_title(result["imdb_id"], locale=countrycode) if result["alternative_title"] != result["title"]: print(" a.k.a.: {}".format(result["alternative_title"])) try: select = input("Choose: ") if not re.match("^[0-9]+$", select): select = None break else: select = int(select) except ValueError: pass if not select: print("Skip {}".format(input_file)) continue filename = make_filename(results[select-1]) filename = substitute_filename(filename) if os.path.exists(input_file): print("Move '{}' -> '{}'".format(input_file, filename)) os.mkdir(filename) shutil.move(input_file, filename) else: print("mkdir '{}'".format(filename)) os.mkdir(filename) def do_test(args): import doctest doctest.testmod()