From d4118bd1d74dc623de7d59746d95e9f521d543c1 Mon Sep 17 00:00:00 2001 From: Yves Fischer Date: Sat, 10 Jan 2015 20:30:25 +0100 Subject: rename improved --- imdb-lookup/imdbinfo.py | 49 ++++++++++++++++++++++++++++++------------------- 1 file changed, 30 insertions(+), 19 deletions(-) (limited to 'imdb-lookup') diff --git a/imdb-lookup/imdbinfo.py b/imdb-lookup/imdbinfo.py index 63956f7..b2691cd 100755 --- a/imdb-lookup/imdbinfo.py +++ b/imdb-lookup/imdbinfo.py @@ -205,6 +205,35 @@ def weight_rating(infos): info['rating'] = info["vote_average"] + 2 * d * f return infos +def filter_filename(filename): + """ remove possibly everything except the movie name + >>> filter_filename("The.Interview.2014.1080p") + 'The Interview' + >>> filter_filename("Hello World.webm") + 'Hello World' + >>> filter_filename("Foo bar Blah ldbla (2014) #tt2123123") + 'Foo bar Blah ldbla' + >>> filter_filename("When.Bla.Foo.1948.German.1080p.BluRay.x264-Foobar") + 'When Bla Foo' + >>> filter_filename("St.Franziskus.2008.WEBRip.MD.German.x264-NSane") + 'St Franziskus' + """ + stopwords = [ + "dvd", "ac3", "r5", "unrated", "720p", "md", + "ts", "ld", "bdrip", "tvrip", "dvdrip", "dvdscr", "uncut", + "german", "english", "telesync", "x264", "hdtv", "ws", "webm", + "20[0-1][0-9]", "19[0-9][0-9]", "\\(20[0-1][0-9]\\)", "\\(19[0-9][0-9]\\)", + ] + def findword(word): + return re.findall("(.*?)[\. ]" + word + "($|[\.\- ])", filename, re.IGNORECASE) + matches = [[findword(word), word] for word in stopwords] + matches = [match for match in matches if match[0] != []] + matches.sort() # make the shortest the first in list + if len(matches) > 0: + name = matches[0][0][0][0] + else: + name = filename + return name.replace(u".", u" ").strip() class Protector(object): """ The Protector saves the caller from exception. @@ -425,24 +454,6 @@ class HelpAction(argparse._HelpAction): parser.exit(0) def do_rename(args): - def filter_filename(filename): - """remove possibly everything except the movie name""" - stopwords = [ - "dvd", "ac3", "r5", "unrated", "ts", "720p", "md", - "ts", "ld", "bdrip", "tvrip", "dvdrip", "dvdscr", "uncut", - "german", "english", "telesync", "20[0-1][0-9]|19[0-9][0-9]", - "x264", "hdtv", "ws" - ] - def findword(word): - return re.findall(u"(.*?)\.?" + word + u"[\.\- ]", filename, re.IGNORECASE) - matches = [i for i in map(findword, stopwords) if i!=[]] - matches.sort() - if len(matches) > 0: - name = matches[0][0] - else: - name = filename - return name.replace(u".", u" ") - def substitute_filename(filename): """remove potentially harmful characters from filename""" return re.sub(u"[^a-zA-Z0-9()\- _#]", u"_", filename) @@ -451,7 +462,7 @@ def do_rename(args): countrycode = locale.getdefaultlocale()[0][3:5] db = Protector(TMDBCache()) search_term = filter_filename(args.file[0]) - print("Search for: {}".format(search_term)) + search_term = input("Search [{}]: ".format(search_term)) or search_term results = db.tmdb_search(search_term) results.sort(key=lambda i:i["popularity"],reverse=True) results = filter(lambda i: i["imdb_id"] and i["release_date"], results) -- cgit v1.2.1