summaryrefslogtreecommitdiff
path: root/aiml/WordSub.py
diff options
context:
space:
mode:
authoryvesf <yvesf@d0e8fea9-7529-0410-93fb-d39fd5b9c1dd>2009-12-04 23:40:36 +0000
committeryvesf <yvesf@d0e8fea9-7529-0410-93fb-d39fd5b9c1dd>2009-12-04 23:40:36 +0000
commit400958e540150b65cbf59467ea61aa4e654f4542 (patch)
tree3cc5ed7ce8d183534dfa58cf39caa4dd420d4d12 /aiml/WordSub.py
parent6153ba1277632d74105170c182594d3833636fa7 (diff)
downloadomegle-400958e540150b65cbf59467ea61aa4e654f4542.tar.gz
omegle-400958e540150b65cbf59467ea61aa4e654f4542.zip
aiml bot
git-svn-id: http://xapek.org/svn/common/omegle@1473 d0e8fea9-7529-0410-93fb-d39fd5b9c1dd
Diffstat (limited to 'aiml/WordSub.py')
-rw-r--r--aiml/WordSub.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/aiml/WordSub.py b/aiml/WordSub.py
new file mode 100644
index 0000000..9cae856
--- /dev/null
+++ b/aiml/WordSub.py
@@ -0,0 +1,95 @@
+"""This module implements the WordSub class, modelled after a recipe
+in "Python Cookbook" (Recipe 3.14, "Replacing Multiple Patterns in a
+Single Pass" by Xavier Defrang).
+
+Usage:
+Use this class like a dictionary to add before/after pairs:
+ > subber = TextSub()
+ > subber["before"] = "after"
+ > subber["begin"] = "end"
+Use the sub() method to perform the substitution:
+ > print subber.sub("before we begin")
+ after we end
+All matching is intelligently case-insensitive:
+ > print subber.sub("Before we BEGIN")
+ After we END
+The 'before' words must be complete words -- no prefixes.
+The following example illustrates this point:
+ > subber["he"] = "she"
+ > print subber.sub("he says he'd like to help her")
+ she says she'd like to help her
+Note that "he" and "he'd" were replaced, but "help" and "her" were
+not.
+"""
+
+# 'dict' objects weren't available to subclass from until version 2.2.
+# Get around this by importing UserDict.UserDict if the built-in dict
+# object isn't available.
+try: dict
+except: from UserDict import UserDict as dict
+
+import ConfigParser
+import re
+import string
+
+class WordSub(dict):
+ """All-in-one multiple-string-substitution class."""
+
+ def _wordToRegex(self, word):
+ """Convert a word to a regex object which matches the word."""
+ return r"\b%s\b" % re.escape(word)
+
+ def _update_regex(self):
+ """Build re object based on the keys of the current
+ dictionary.
+
+ """
+ self._regex = re.compile("|".join(map(self._wordToRegex, self.keys())))
+ self._regexIsDirty = False
+
+ def __init__(self, defaults = {}):
+ """Initialize the object, and populate it with the entries in
+ the defaults dictionary.
+
+ """
+ self._regex = None
+ self._regexIsDirty = True
+ for k,v in defaults.items():
+ self[k] = v
+
+ def __call__(self, match):
+ """Handler invoked for each regex match."""
+ return self[match.group(0)]
+
+ def __setitem__(self, i, y):
+ self._regexIsDirty = True
+ # for each entry the user adds, we actually add three entrys:
+ super(type(self),self).__setitem__(string.lower(i),string.lower(y)) # key = value
+ super(type(self),self).__setitem__(string.capwords(i), string.capwords(y)) # Key = Value
+ super(type(self),self).__setitem__(string.upper(i), string.upper(y)) # KEY = VALUE
+
+ def sub(self, text):
+ """Translate text, returns the modified text."""
+ if self._regexIsDirty:
+ self._update_regex()
+ return self._regex.sub(self, text)
+
+# self-test
+if __name__ == "__main__":
+ subber = WordSub()
+ subber["apple"] = "banana"
+ subber["orange"] = "pear"
+ subber["banana" ] = "apple"
+ subber["he"] = "she"
+ subber["I'd"] = "I would"
+
+ # test case insensitivity
+ inStr = "I'd like one apple, one Orange and one BANANA."
+ outStr = "I Would like one banana, one Pear and one APPLE."
+ if subber.sub(inStr) == outStr: print "Test #1 PASSED"
+ else: print "Test #1 FAILED: '%s'" % subber.sub(inStr)
+
+ inStr = "He said he'd like to go with me"
+ outStr = "She said she'd like to go with me"
+ if subber.sub(inStr) == outStr: print "Test #2 PASSED"
+ else: print "Test #2 FAILED: '%s'" % subber.sub(inStr) \ No newline at end of file