diff options
Diffstat (limited to 'movietool/__init__.py')
-rwxr-xr-x | movietool/__init__.py | 481 |
1 files changed, 481 insertions, 0 deletions
diff --git a/movietool/__init__.py b/movietool/__init__.py new file mode 100755 index 0000000..3547423 --- /dev/null +++ b/movietool/__init__.py @@ -0,0 +1,481 @@ +#!/usr/bin/env python3 +# coding: utf-8 +# python3 works as well +import sys +assert sys.version.startswith("3."), "Requires python3" +import os +import re +import json +import time +import argparse +import locale +import math +import shutil +import logging +import hashlib +from io import BytesIO +from multiprocessing.dummy import Pool +from urllib.parse import quote as urlencode +from PIL import Image, ImageFilter +import tmdbsimple as tmdb +import requests +import guessit + + +def read_key(): + if "TMDB_KEY" in os.environ.keys(): + return os.environ["TMDB_KEY"] + if "XDG_CONFIG_HOME" in os.environ.keys(): + cfg_home = os.environ["XDG_CONFIG_HOME"] + else: + cfg_home = os.path.join(os.path.expanduser("~"), ".config") + if os.path.exists(os.path.join(cfg_home, "tmdbkey")): + return open(os.path.join(cfg_home, "tmdbkey"), "r").read().strip() + if os.path.exists(os.path.join(os.path.expanduser("~"), ".tmdbkey")): + return open(os.path.join(os.path.expanduser("~"), + ".tmdbkey")).read().strip() + raise Exception("No TMDB Key defined. " + + "Set TMDB_KEY=.. or {} or ~/.tmdbkey file".format(cfg_home)) + + +class TMDBCache(object): + def __init__(self): + logger_name = self.__class__.__module__ + "." + self.__class__.__name__ + self.logger = logging.getLogger(logger_name) + + def _get_cachedir(self, movie_id): + if "XDG_CACHE_HOME" in os.environ.keys(): + cachedir = os.environ["XDG_CACHE_HOME"] + else: + cachedir = os.path.join(os.path.expanduser("~"), ".cache") + + cachedir = os.path.join(cachedir, "movies") + cachedir = os.path.join(cachedir, movie_id) + if not os.path.exists(cachedir): + os.makedirs(cachedir) + + return cachedir + + def _cache(self, movie_id, key, callable_func): + cachedir = self._get_cachedir(movie_id) + cachefile = os.path.join(cachedir, key) + if os.path.exists(cachefile): + fh = open(cachefile, "rb") + data = fh.read() + fh.close() + return data + else: + fh = open(cachefile, 'wb') + data = callable_func() + fh.write(data) + fh.close() + return data + + def _cache_json(self, movie_id, key, callable_func): + def json_wrapper(): + return json.dumps(callable_func()).encode('utf-8') + return json.loads(self._cache(movie_id, key, json_wrapper).decode('utf-8')) + + def prune(self, movie_id): + self.logger.debug("prune {}".format(movie_id)) + keys = [ + "tmdb_info.json", + "tmdb_alternative_title.json", + + ] + for key in keys: + cachefile = os.path.join(self._get_cachedir(movie_id), key) + if os.path.exists(cachefile): + self.logger.warn("Remove {}".format(cachefile)) + os.unlink(cachefile) + else: + self.logger.debug("Key in db {}".format(cachefile)) + + def infos(self, movie_id): + try: + self.logger.debug("movie %s", movie_id) + return self._cache_json(movie_id, "tmdb_info.json", + tmdb.Movies(movie_id).info) + except Exception as e: + raise Exception("Failed to query movie-id {id}: {reason}".format( + id=movie_id, reason=str(e))) + + def tmdb_search(self, query): + results = tmdb.Search().movie(query=query)["results"] + results = map(lambda i: self.infos(str(i['id'])), results) + return list(results) + + def alternative_title(self, movie_id, locale): + """Returns the title in selected locale or original title otherwise""" + try: + key = movie_id + "tmdb_alternative_title.json" + search = tmdb.Movies(movie_id).alternative_titles + titles = self._cache_json(movie_id, key, search) + alt_title = list(filter(lambda l: l["iso_3166_1"] == locale, + titles["titles"])) + if alt_title: + return alt_title[0]["title"] + else: + infos = self.infos(movie_id) + return infos["title"] or infos["original_title"] + except Exception as e: + raise Exception("Failed to query movie-id {id}: {reason}".format( + id=movie_id, reason=str(e))) + + def poster(self, movie_id, f="w154"): + poster_path = self.infos(movie_id)['poster_path'] + assert poster_path, "No poster_path defined for {}".format(movie_id) + + url = "http://image.tmdb.org/t/p/{}/{}".format(f, poster_path) + self.logger.debug("poster %s", poster_path) + + def download(): + r = requests.get(url) # r.headers['content-type'] + r.close() + return r.content + + return self._cache(movie_id, "tmdb_poster_{}.jpg".format(f), download) + + def poster_low(self, movie_id, format="w92"): + data = self.poster(movie_id, format) + if not data: + return None + image = Image.open(BytesIO(data)) + image = image.filter(ImageFilter.GaussianBlur(radius=1)) + buf = BytesIO() + image.save(buf, "JPEG", quality=18, optimize=True) + return buf.getvalue() + + def _imdb_request(self, path, query): + # see also https://github.com/richardasaurus/imdb-pie + # nice library but a bit strange API, + # so we chose to reimplement stuff here + BASE_URI = 'app.imdb.com' + API_KEY = '2wex6aeu6a8q9e49k7sfvufd6rhh0n' + SHA1_KEY = hashlib.sha1(API_KEY.encode('utf8')).hexdigest() + HEADERS = { + 'user-agent': 'AppleWebKit/534.46 (KHTML, like Gecko) ' + + 'Version/5.1 Mobile/9A405', + } + PARAMS = { + "api": "v1", + "appid": "iphone1_1", + "apiPolicy": "app1_1", + "apiKey": SHA1_KEY, + "locale": "en_US", + "timestamp": "{:.0f}".format(time.time()) + } + q = query.copy() + q.update(PARAMS) + return requests.get("https://{}{}".format(BASE_URI, path), + params=q, headers=HEADERS) + + def imdb_movie(self, movie_id): + def do_request(): + r = self._imdb_request("/title/maindetails", {'tconst': movie_id}) + assert r.status_code == 200, "Request must return status-code 200" + data = json.loads(r.text) + r.close() + assert data is not None and data['data'], "Data must not be empty" + assert 'rating' in data['data'] + return data['data'] + return self._cache_json(movie_id, "imdb_maindetails.json", do_request) + + def omdb_movie(self, movie_id): + def do_request(): + BASE_URI = "http://www.omdbapi.com/" + params = {'i': movie_id, 'plot': 'short', 'r': 'json', 'tomatoes': 'true'} + r = requests.get(BASE_URI, params=params) + assert r.status_code == 200, "Request must return status-code 200" + data = json.loads(r.text) + r.close() + assert data is not None and data['Response'] == 'True', "Data must not be empty" + return data + return self._cache_json(movie_id, "omdb.json", do_request) + + +def weight_rating(infos): + """ add 'rating' to all infos""" + infos = list(infos) + maxvotes = max(map(lambda i: i["vote_count"], infos)) + for info in infos: + f = math.sin(math.pi * (info["vote_average"]/10.0)) + d = (float(info["vote_count"]) / maxvotes) - 0.5 + info['rating'] = info["vote_average"] + 2 * d * f + return infos + + +def filter_filename(filename): + """remove possibly everything except the movie name""" + info = guessit.guess_movie_info(filename).to_dict() + print("Parsed: {}".format(", ".join(map(lambda kv: "{}={}".format(*kv), info.items())))) + if "title" in info: + return info["title"] + else: + return os.path.basename(filename).replace(".", " ") + + +class Protector(object): + """ The Protector saves the caller from exception. + All callable attributes of child are dynamically + replaced by function returning 'None' in case of an + exception - except for KeyboardInterrupt exceptions. + >>> class Thrower(object): + ... def some_func(self): + ... raise Exception("I'm evil") + >>> t = Thrower() + >>> t.some_func() == None + Traceback (most recent call last): + Exception: I'm evil + >>> p = Protector(t) + >>> p.some_func() == None + True + """ + def __init__(self, child): + self.child = child + + def __getattr__(self, name): + attr = getattr(self.child, name) + + def protected(*a, **kw): + try: + return attr(*a, **kw) + except KeyboardInterrupt as e: + raise e + except Exception as e: + logging.error("Error calling %s: '%s': '%s' with args '(%s, %s)'", name, type(e), e, a, kw) + return None + if callable(attr): + return protected + else: + return attr + + +def do_aka(args, imdb_ids): + tmdbcache = TMDBCache() + for (filename, imdb_id) in imdb_ids: + print(tmdbcache.alternative_title(imdb_id, locale=args.lang)) + + +def do_data(args, imdb_ids): + def print_data(data, io, indent=0): + if isinstance(data, dict): + for (key, val) in data.items(): + if val is None: + continue + else: + io.write("\n{}{}: ".format(indent*" ", key)) + print_data(val, io, indent+1) + elif isinstance(data, list) and len(data) > 0: + for val in data: + print_data(val, io, indent+2) + elif type(data) in (bool, str, int, float): + io.write(str(data)) + + tmdbcache = TMDBCache() + for (filename, imdb_id) in imdb_ids: + print_data({"TMDB": tmdbcache.infos(imdb_id), + "IMDB": tmdbcache.imdb_movie(imdb_id), + "OMDB": tmdbcache.omdb_movie(imdb_id)}, sys.stdout) + + +def do_year(args, imdb_ids): + tmdbcache = TMDBCache() + for (filename, imdb_id) in imdb_ids: + print(tmdbcache.infos(imdb_id)["release_date"].split("-")[0]) + + +def do_prune(args, imdb_ids): + tmdbcache = TMDBCache() + for (filename, imdb_id) in imdb_ids: + tmdbcache.prune(imdb_id) + + +def do_rating(args, imdb_ids): + """Calculates a rating based on vote_average and vote_count""" + tmdbcache = TMDBCache() + infos = filter(lambda i: "vote_average" in i and "vote_count" in i, + map(lambda fid: tmdbcache.infos(fid[1]), imdb_ids)) + weight_rating(infos) + for info in infos: + print("{rating:.02f} {imdb_id} {title:30s} avg=" + + "{vote_average:.1f} count={vote_count:.0f}".format(**info)) + + +def do_index(args, imdb_ids): + """creates a index website""" + assert not os.path.exists("movies.html") and not os.path.exists("movies-files"), \ + "movies.html or folder movies-files already exists" + + def write_data(callbackName): + filename = "movies-files/data.js" + if not os.path.exists(os.path.dirname(filename)): + os.makedirs(os.path.dirname(filename)) + + out = open(filename, "w") + out.write(callbackName) + out.write("(") + pool = Pool(processes=12) # parallel fetchers + infos = pool.map(getInfo, imdb_ids) + json.dump(list(filter(bool, infos)), out) + out.write(")") + out.close() + return filename + + def getInfo(a): + def get_tags(path): + def find(): + for root, dirs, files in os.walk(path): + for curfile in files: + if curfile.lower()[-3:] in ["mkv", "avi", "mp4"]: + info = guessit.guess_movie_info(os.path.join(root, curfile)).to_dict() + for key, val in info.items(): + if key in ["format", "videoCodec", "releaseGroup", "container", "container"]: + if not isinstance(val, list): + val = [val] + for lval in val: + yield "{}:{}".format(key, lval) + l = list(set(find())) + l.sort() + return l + + def poster(imdb_id): + data = db.poster_low(imdb_id) + if data: + out = os.path.join("movies-files/poster", imdb_id+".jpg") + open(out, "wb").write(data) + return out + else: + return None + + def compile_summary(imdb, omdb, tmdb): + buf = [] + if "directors_summary" in imdb: + buf += ["By " + ", ".join(map(lambda i: i["name"]["name"], imdb["directors_summary"]))] + if "cast_summary" in imdb: + buf += ["Starring: " + ", ".join(map(lambda i: i["name"]["name"], imdb["cast_summary"]))] + if omdb["Rated"] != "N/A": + buf += ["Rated: " + omdb["Rated"]] + return ". ".join(buf) + + path, imdb_id = a + db = Protector(TMDBCache()) + imdb = db.imdb_movie(imdb_id) + omdb = db.omdb_movie(imdb_id) + tmdb = db.infos(imdb_id) + if not imdb or not omdb or not tmdb: + print("Error in {} {}".format(path, imdb_id)) + return None + else: + try: + return { + 'id': imdb_id, + 'title': tmdb['title'], + 'summary': compile_summary(imdb, omdb, tmdb), + 'poster': poster(imdb_id), + 'tagline': 'tagline' in imdb and imdb['tagline'] or None, + 'plot': 'plot' in imdb and imdb['plot']['outline'] or None, + 'website': 'homepage' in imdb and imdb['homepage'] or omdb['Website'] != 'N/A' and omdb['Website'] or None, + 'release': 'release_date' in tmdb and tmdb['release_date'] or None, + 'path': {'label': path, 'path': urlencode(path) + "/"}, # '/' suffix required here for some reason + 'tags': get_tags(path), + 'imdbRating': imdb['rating'], + 'imdbVotes': imdb['num_votes'], + 'omdbTomatoConsensus': (omdb['tomatoConsensus'] != 'N/A') and omdb['tomatoConsensus'] or None, + 'omdbTomatoMeter': (omdb['tomatoMeter'] != 'N/A') and float(omdb['tomatoMeter']) or None, + 'omdbTomatoRating': (omdb['tomatoRating'] != 'N/A') and float(omdb['tomatoRating']) or None, + 'tmdbId': tmdb['id'], + } + except Exception as e: + print("Error in {} {}: {}".format(path, imdb_id, str(e))) + return None + + datadir = os.path.join(os.path.dirname(__file__), "html") + index_files = os.path.join(datadir, "movies-files") + index_html = os.path.join(datadir, "movies.html") + shutil.copytree(index_files, "movies-files", copy_function=shutil.copyfile) + shutil.copyfile(index_html, ".") + + os.mkdir("movies-files/poster") + write_data("dataCb") + + +class HelpAction(argparse._HelpAction): + def __call__(self, parser, namespace, values, option_string=None): + formatter = parser._get_formatter() + formatter.add_usage(parser.usage, + parser._actions, + parser._mutually_exclusive_groups) + + formatter.start_section(parser._optionals.title) + formatter.add_text(parser._optionals.description) + formatter.add_arguments(parser._optionals._group_actions) + formatter.end_section() + + subparsers_actions = [ + action for action in parser._actions + if isinstance(action, argparse._SubParsersAction)] + + for subparsers_action in subparsers_actions: + # get all subparsers and print help + subparsers = subparsers_action.choices + for subaction in subparsers_action._get_subactions(): + subparser = subparsers[subaction.dest] + usage = formatter._format_actions_usage(subparser._actions, []) + formatter.start_section("{} {} {}".format(formatter._prog, + subaction.dest, + usage)) + formatter.add_text(subaction.help) + formatter.add_arguments(subparser._positionals._group_actions) + formatter.add_arguments(subparser._optionals._group_actions) + formatter.end_section() + + print(formatter.format_help()) + parser.exit(0) + + +def do_rename(args): + def substitute_filename(filename): + """remove potentially harmful characters from filename""" + return re.sub(u"[^a-zA-Z0-9()\- _#]", u"_", filename) + + for input_file in args.file: + countrycode = locale.getdefaultlocale()[0][3:5] + db = Protector(TMDBCache()) + search_term = filter_filename(input_file) + search_term = input("Search [{}]: ".format(search_term)) or search_term + results = db.tmdb_search(search_term) + results.sort(key=lambda i: i["popularity"], reverse=True) + results = filter(lambda i: i["imdb_id"] and i["release_date"], results) + results = list(results) + + select = None + while not select: + for (i, result) in zip(range(len(results)), results): + result["i"] = i+1 + result["year"] = result["release_date"].split("-")[0] + print("{i} {title} ({year}) - check: http://www.imdb.com/title/{imdb_id}".format(**result)) + result["alternative_title"] = db.alternative_title(result["imdb_id"], locale=countrycode) + if result["alternative_title"] != result["title"]: + print(" a.k.a.: {}".format(result["alternative_title"])) + try: + select = int(input("Choose: ")) + except ValueError: + pass + + filename = "{alternative_title} ({year}) #{imdb_id}".format(**results[select-1]) + filename = substitute_filename(filename) + + if os.path.exists(input_file): + choice = input("Move '{}' -> '{}' ? [y/N]: ".format(input_file, filename)).lower() + if choice == "y": + os.mkdir(filename) + shutil.move(input_file, filename) + else: + print("mkdir '{}'".format(filename)) + os.mkdir(filename) + + +def do_test(args): + import doctest + doctest.testmod() |