From d4118bd1d74dc623de7d59746d95e9f521d543c1 Mon Sep 17 00:00:00 2001
From: Yves Fischer <yvesf-git@xapek.org>
Date: Sat, 10 Jan 2015 20:30:25 +0100
Subject: rename improved

---
 imdb-lookup/imdbinfo.py | 49 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 19 deletions(-)

(limited to 'imdb-lookup')

diff --git a/imdb-lookup/imdbinfo.py b/imdb-lookup/imdbinfo.py
index 63956f7..b2691cd 100755
--- a/imdb-lookup/imdbinfo.py
+++ b/imdb-lookup/imdbinfo.py
@@ -205,6 +205,35 @@ def weight_rating(infos):
         info['rating'] = info["vote_average"] + 2 * d * f
     return infos
 
+def filter_filename(filename):
+    """ remove possibly everything except the movie name
+    >>> filter_filename("The.Interview.2014.1080p")
+    'The Interview'
+    >>> filter_filename("Hello World.webm")
+    'Hello World'
+    >>> filter_filename("Foo bar Blah ldbla (2014) #tt2123123")
+    'Foo bar Blah ldbla'
+    >>> filter_filename("When.Bla.Foo.1948.German.1080p.BluRay.x264-Foobar")
+    'When Bla Foo'
+    >>> filter_filename("St.Franziskus.2008.WEBRip.MD.German.x264-NSane")
+    'St Franziskus'
+    """
+    stopwords = [ 
+        "dvd", "ac3", "r5", "unrated", "720p", "md",
+        "ts", "ld", "bdrip", "tvrip", "dvdrip", "dvdscr", "uncut",
+        "german", "english", "telesync", "x264", "hdtv", "ws", "webm",
+        "20[0-1][0-9]", "19[0-9][0-9]", "\\(20[0-1][0-9]\\)", "\\(19[0-9][0-9]\\)",
+    ]   
+    def findword(word):
+        return re.findall("(.*?)[\. ]" + word + "($|[\.\- ])", filename, re.IGNORECASE)
+    matches = [[findword(word), word] for word in stopwords]
+    matches = [match for match in matches if match[0] != []]
+    matches.sort() # make the shortest the first in list
+    if len(matches) > 0:
+        name = matches[0][0][0][0]
+    else:
+        name = filename
+    return name.replace(u".", u" ").strip()
 
 class Protector(object):
     """ The Protector saves the caller from exception.
@@ -425,24 +454,6 @@ class HelpAction(argparse._HelpAction):
         parser.exit(0)
 
 def do_rename(args):
-    def filter_filename(filename):
-        """remove possibly everything except the movie name"""
-        stopwords = [ 
-            "dvd", "ac3", "r5", "unrated", "ts", "720p", "md",
-            "ts", "ld", "bdrip", "tvrip", "dvdrip", "dvdscr", "uncut",
-            "german", "english", "telesync", "20[0-1][0-9]|19[0-9][0-9]",
-            "x264", "hdtv", "ws"
-        ]   
-        def findword(word):
-            return re.findall(u"(.*?)\.?" + word + u"[\.\- ]", filename, re.IGNORECASE)
-        matches = [i for i in map(findword, stopwords) if i!=[]]
-        matches.sort()
-        if len(matches) > 0:
-            name = matches[0][0]
-        else:
-            name = filename
-        return name.replace(u".", u" ")
-
     def substitute_filename(filename):
         """remove potentially harmful characters from filename"""
         return re.sub(u"[^a-zA-Z0-9()\- _#]", u"_", filename)
@@ -451,7 +462,7 @@ def do_rename(args):
     countrycode = locale.getdefaultlocale()[0][3:5]
     db = Protector(TMDBCache())
     search_term = filter_filename(args.file[0])
-    print("Search for: {}".format(search_term))
+    search_term = input("Search [{}]: ".format(search_term)) or search_term
     results = db.tmdb_search(search_term)
     results.sort(key=lambda i:i["popularity"],reverse=True)
     results = filter(lambda i: i["imdb_id"] and i["release_date"], results)
-- 
cgit v1.2.1