simpliefied caching

author: Yves Fischer <yvesf-git@xapek.org> 2014-12-21 22:47:26 +0100
committer: Yves Fischer <yvesf-git@xapek.org> 2014-12-21 22:47:26 +0100
commit: 2d0e49183a5c49badb96a0168dab575f7db70b21 (patch)
tree: 521662aaeca0357aa19b39076f7383ccc8839221 /imdb-lookup/imdbinfo.py
parent: 3008170f2bd120c43a876b1f1adba885db594618 (diff)
download: scripts-2d0e49183a5c49badb96a0168dab575f7db70b21.tar.gz
scripts-2d0e49183a5c49badb96a0168dab575f7db70b21.zip
1 files changed, 101 insertions, 92 deletions
diff --git a/imdb-lookup/imdbinfo.py b/imdb-lookup/imdbinfo.py
index e1fe5be..7419103 100755
--- a/imdb-lookup/imdbinfo.py
+++ b/imdb-lookup/imdbinfo.py
@@ -5,7 +5,6 @@ import sys
 assert sys.version.startswith("3."), "Requires python3"
 import os
 import re
-import dbm
 import json
 import time
 import argparse
@@ -42,36 +41,62 @@ def read_key():
 
 
 class TMDBCache(object):
-    def __enter__(self):
+    def __init__(self):
         logger_name = self.__class__.__module__ + "." + self.__class__.__name__
         self.logger = logging.getLogger(logger_name)
-        self.logger.info("Open db")
-        self.db = dbm.open(self._get_db_filename("tmdbmovie.dbm"), "c")
-        self.db_images = dbm.open(self._get_db_filename("tmdbposter.dbm"), "c")
-        return self
 
-    def __exit__(self, type, value, traceback):
-        self.logger.info("Close db")
-        self.db.close()
-        self.db_images.close()
-
-    def _get_db_filename(self, name):
+    def _get_cachedir(self, movie_id):
         if "XDG_CACHE_HOME" in os.environ.keys():
             cachedir = os.environ["XDG_CACHE_HOME"]
         else:
             cachedir = os.path.join(os.path.expanduser("~"), ".cache")
-        return os.path.join(cachedir, name)
 
-    def _cache(self, key, callable_func):
-        if key not in self.db:
-            self.db[key] = json.dumps(callable_func())
-        d = self.db[key].decode('utf-8')
-        return json.loads(d)
+        cachedir = os.path.join(cachedir, "movies")
+        cachedir = os.path.join(cachedir, movie_id)
+        if not os.path.exists(cachedir):
+            os.makedirs(cachedir)
+
+        return cachedir
+
+    def _cache(self, movie_id, key, callable_func):
+        cachedir = self._get_cachedir(movie_id)
+        cachefile = os.path.join(cachedir, key)
+        if os.path.exists(cachefile):
+            fh = open(cachefile, "rb")
+            data = fh.read()
+            fh.close()
+            return data
+        else:
+            fh = open(cachefile, 'wb')
+            data = callable_func()
+            fh.write(data)
+            fh.close()
+            return data
+
+    def _cache_json(self, movie_id, key, callable_func):
+        def json_wrapper():
+            return json.dumps(callable_func()).encode('utf-8')
+        return json.loads(self._cache(movie_id, key, json_wrapper).decode('utf-8'))
+
+    def prune(self, movie_id):
+        self.logger.debug("prune {}".format(movie_id))
+        keys = [
+            "tmdb_info.json",
+            "tmdb_alternative_title.json",
+
+        ]
+        for key in keys:
+            cachefile = os.path.join(self._get_cachedir(movie_id), key)
+            if os.path.exists(cachefile):
+                self.logger.warn("Remove {}".format(cachefile))
+                os.unlink(cachefile)
+            else:
+                self.logger.debug("Key in db {}".format(cachefile))
 
     def infos(self, movie_id):
         try:
             self.logger.debug("movie %s", movie_id)
-            return self._cache(movie_id + "movies.info",
+            return self._cache_json(movie_id, "tmdb_info.json",
                                tmdb.Movies(movie_id).info)
         except Exception as e:
             raise Exception("Failed to query movie-id {id}: {reason}".format(
@@ -80,9 +105,9 @@ class TMDBCache(object):
     def alternative_title(self, movie_id, locale):
         """Returns the title in selected locale or original title otherwise"""
         try:
-            key = movie_id + "movies.alt_titles"
+            key = movie_id + "tmdb_alternative_title.json"
             search = tmdb.Movies(movie_id).alternative_titles
-            titles = self._cache(key, search)
+            titles = self._cache_json(movie_id, key, search)
             alt_title = list(filter(lambda l: l["iso_3166_1"] == locale,
                                     titles["titles"]))
             if alt_title:
@@ -94,42 +119,28 @@ class TMDBCache(object):
             raise Exception("Failed to query movie-id {id}: {reason}".format(
                             id=movie_id, reason=str(e)))
 
-    def prune(self, movie_id):
-        self.logger.debug("prune {}".format(movie_id))
-        keys = [
-            "imdb_maindetails_{}".format(movie_id),
-            movie_id + "movies.info",
-            movie_id + "movies.alt_titles",
-        ]
-        for key in keys:
-            if key in self.db:
-                self.logger.warn("Remove {}".format(key))
-                del self.db[key]
-            else:
-                self.logger.debug("Key not in db {}".format(key))
+    def poster(self, movie_id, f="w154"):
+        poster_path = self.infos(movie_id)['poster_path']
+        assert poster_path, "No poster_path defined for {}".format(movie_id)
 
-    def poster(self, poster_path, f="w154"):
-        self.logger.debug("poster %s", poster_path)
-        key = "poster_{}_{}".format(f, hashlib.md5(poster_path.encode('utf-8')).hexdigest()[0:10])
-        keyContentType = "{}_ct".format(key)
         url = "http://image.tmdb.org/t/p/{}/{}".format(f, poster_path)
-        if key not in self.db_images or keyContentType not in self.db_images:
-            r = requests.get(url)
-            self.db_images[key] = r.content
-            self.db_images[keyContentType] = r.headers['content-type']
+        self.logger.debug("poster %s", poster_path)
+        def download():
+            r = requests.get(url) # r.headers['content-type']
             r.close()
-        return (self.db_images[keyContentType], self.db_images[key])
+            return r.content
 
-    def poster_low(self, poster_path, format="w154"):
-        p = self.poster(poster_path, format)
-        if not p:
+        return self._cache(movie_id, "tmdb_poster.jpg", download)
+
+    def poster_low(self, movie_id, format="w154"):
+        data = self.poster(movie_id, format)
+        if not data:
             return None
-        contentType, data = p
         image = Image.open(BytesIO(data))
         image = image.filter(ImageFilter.GaussianBlur(radius=1))
         buf = BytesIO()
         image.save(buf, "JPEG", quality=18, optimize=True)
-        return ("image/jpeg", buf.getvalue())
+        return buf.getvalue()
 
     def _imdb_request(self, path, query):
         # see also https://github.com/richardasaurus/imdb-pie
@@ -163,8 +174,7 @@ class TMDBCache(object):
             r.close()
             assert data is not None and data['data'], "Data must not be empty"
             return data
-        key = "imdb_maindetails_{}".format(movie_id)
-        return self._cache(key, do_request)
+        return self._cache_json(movie_id, "imdb_maindetails", do_request)
 
     def omdb_movie(self, movie_id):
         def do_request():
@@ -176,8 +186,7 @@ class TMDBCache(object):
             r.close()
             assert data is not None and data['Response'] == 'True', "Data must not be empty"
             return data
-        key = "omdb_{}".format(movie_id)
-        return self._cache(key, do_request)
+        return self._cache_json(movie_id, "omdb.json", do_request)
 
 
 def weight_rating(infos):
@@ -228,9 +237,9 @@ class Protector(object):
 
 
 def do_aka(args, imdb_ids):
-    with TMDBCache() as tmdbcache:
-        for (filename, imdb_id) in imdb_ids:
-            print(tmdbcache.alternative_title(imdb_id, locale=args.lang))
+    tmdbcache = TMDBCache()
+    for (filename, imdb_id) in imdb_ids:
+        print(tmdbcache.alternative_title(imdb_id, locale=args.lang))
 
 
 def do_data(args, imdb_ids):
@@ -248,34 +257,34 @@ def do_data(args, imdb_ids):
         elif type(data) in (bool, str, int, float):
             io.write(str(data))
 
-    with TMDBCache() as tmdbcache:
-        for (filename, imdb_id) in imdb_ids:
-            print_data({"TMDB": tmdbcache.infos(imdb_id),
-                        "IMDB": tmdbcache.imdb_movie(imdb_id),
-                        "OMDB": tmdbcache.omdb_movie(imdb_id)}, sys.stdout)
+    tmdbcache = TMDBCache()
+    for (filename, imdb_id) in imdb_ids:
+        print_data({"TMDB": tmdbcache.infos(imdb_id),
+                    "IMDB": tmdbcache.imdb_movie(imdb_id),
+                    "OMDB": tmdbcache.omdb_movie(imdb_id)}, sys.stdout)
 
 
 def do_year(args, imdb_ids):
-    with TMDBCache() as tmdbcache:
-        for (filename, imdb_id) in imdb_ids:
-            print(tmdbcache.infos(imdb_id)["release_date"].split("-")[0])
+    tmdbcache = TMDBCache()
+    for (filename, imdb_id) in imdb_ids:
+        print(tmdbcache.infos(imdb_id)["release_date"].split("-")[0])
 
 
 def do_prune(args, imdb_ids):
-    with TMDBCache() as tmdbcache:
-        for (filename, imdb_id) in imdb_ids:
-            tmdbcache.prune(imdb_id)
+    tmdbcache = TMDBCache()
+    for (filename, imdb_id) in imdb_ids:
+        tmdbcache.prune(imdb_id)
 
 
 def do_rating(args, imdb_ids):
     """Calculates a rating based on vote_average and vote_count"""
-    with TMDBCache() as tmdbcache:
-        infos = filter(lambda i: "vote_average" in i and "vote_count" in i,
-                       map(lambda fid: tmdbcache.infos(fid[1]), imdb_ids))
-        weight_rating(infos)
-        for info in infos:
-            print("{rating:.02f} {imdb_id} {title:30s} avg=" +
-                  "{vote_average:.1f} count={vote_count:.0f}".format(**info))
+    tmdbcache = TMDBCache()
+    infos = filter(lambda i: "vote_average" in i and "vote_count" in i,
+                   map(lambda fid: tmdbcache.infos(fid[1]), imdb_ids))
+    weight_rating(infos)
+    for info in infos:
+        print("{rating:.02f} {imdb_id} {title:30s} avg=" +
+              "{vote_average:.1f} count={vote_count:.0f}".format(**info))
 
 
 def do_index(args, imdb_ids):
@@ -298,14 +307,15 @@ def do_index(args, imdb_ids):
         return out
 
     def data(callbackName):
-        db = Protector(tmdbcache)
-        def poster(imdb_id, data):
+        db = TMDBCache() #Protector(tmdbcache)
+        def poster(imdb_id):
+            data = db.poster_low(imdb_id)
             if data:
                 if not os.path.exists(".index.html/poster"):
                     os.makedirs(".index.html/poster")
 
                 out = os.path.join(".index.html/poster",imdb_id+".jpg")
-                open(out, "wb").write(data[1])
+                open(out, "wb").write(data)
                 return out
             else:
                 return None
@@ -323,7 +333,7 @@ def do_index(args, imdb_ids):
                 'id': imdb_id,
                 'title': tmdb['title'],
                 'path': path,
-                'poster': poster(imdb_id, db.poster_low(tmdb['poster_path'])),
+                'poster': poster(imdb_id),
                 'tagline': 'tagline' in imdb and imdb['tagline'] or None,
                 'plot': 'plot' in imdb and imdb['plot']['outline'] or None,
                 'website': 'homepage' in imdb and imdb['homepage'] or omdb['Website'] != 'N/A' and omdb['Website'] or None,
@@ -336,7 +346,6 @@ def do_index(args, imdb_ids):
                 'omdbUserTomato': (omdb['tomatoUserMeter'] != 'N/A') and float(omdb['tomatoUserMeter']) or None,
                 'omdbTomatoRating': (omdb['tomatoRating'] != 'N/A') and float(omdb['tomatoRating']) or None,
                 'omdbTomatoUserRating': (omdb['tomatoUserRating'] != 'N/A') and float(omdb['tomatoUserRating']) or None,
-                'omdbTomatoFresh': (omdb['tomatoFresh'] != 'N/A') and int(omdb['tomatoFresh']) or None,
                 'tmdbId': tmdb['id'],
             }
 
@@ -360,20 +369,20 @@ def do_index(args, imdb_ids):
     template_file = os.path.join(os.path.dirname(__file__),
                                  "index.jinja2.html")
     template = Template(open(template_file, "r").read())
-    with TMDBCache() as tmdbcache:
-        mapping = {
-            'title': 'Movie overview',
-            'install': install,
-            'data': data,
-        }
-        assert not os.path.exists("index.html") and not os.path.exists(".index.html"), \
-            "index.html or folder .index.html already exists"
-        stream = template.generate(mapping)
-        outfile = open("index.html", "wb")
-        for output in stream:
-            out = output.strip()
-            out = re.sub(" {2,}", " ", out)
-            outfile.write(out.encode('utf-8'))
+    tmdbcache = TMDBCache()
+    mapping = {
+        'title': 'Movie overview',
+        'install': install,
+        'data': data,
+    }
+    assert not os.path.exists("index.html") and not os.path.exists(".index.html"), \
+        "index.html or folder .index.html already exists"
+    stream = template.generate(mapping)
+    outfile = open("index.html", "wb")
+    for output in stream:
+        out = output.strip()
+        out = re.sub(" {2,}", " ", out)
+        outfile.write(out.encode('utf-8'))
 
 
 class HelpAction(argparse._HelpAction):
author	Yves Fischer <yvesf-git@xapek.org>	2014-12-21 22:47:26 +0100
committer	Yves Fischer <yvesf-git@xapek.org>	2014-12-21 22:47:26 +0100
commit	2d0e49183a5c49badb96a0168dab575f7db70b21 (patch)
tree	521662aaeca0357aa19b39076f7383ccc8839221 /imdb-lookup/imdbinfo.py
parent	3008170f2bd120c43a876b1f1adba885db594618 (diff)
download	scripts-2d0e49183a5c49badb96a0168dab575f7db70b21.tar.gz scripts-2d0e49183a5c49badb96a0168dab575f7db70b21.zip