summaryrefslogtreecommitdiff
path: root/aiml
diff options
context:
space:
mode:
Diffstat (limited to 'aiml')
-rw-r--r--aiml/AimlParser.py545
-rw-r--r--aiml/DefaultSubs.py156
-rw-r--r--aiml/Kernel.py1183
-rw-r--r--aiml/PatternMgr.py329
-rw-r--r--aiml/Utils.py32
-rw-r--r--aiml/WordSub.py95
-rw-r--r--aiml/__init__.py4
-rw-r--r--aiml/self-test.aiml270
8 files changed, 2614 insertions, 0 deletions
diff --git a/aiml/AimlParser.py b/aiml/AimlParser.py
new file mode 100644
index 0000000..75c2cf1
--- /dev/null
+++ b/aiml/AimlParser.py
@@ -0,0 +1,545 @@
+from xml.sax.handler import ContentHandler
+from xml.sax.xmlreader import Locator
+import sys
+import xml.sax
+import xml.sax.handler
+
+class AimlParserError(Exception): pass
+
+class AimlHandler(ContentHandler):
+ # The legal states of the AIML parser
+ _STATE_OutsideAiml = 0
+ _STATE_InsideAiml = 1
+ _STATE_InsideCategory = 2
+ _STATE_InsidePattern = 3
+ _STATE_AfterPattern = 4
+ _STATE_InsideThat = 5
+ _STATE_AfterThat = 6
+ _STATE_InsideTemplate = 7
+ _STATE_AfterTemplate = 8
+
+ def __init__(self, encoding = "UTF-8"):
+ self.categories = {}
+ self._encoding = encoding
+ self._state = self._STATE_OutsideAiml
+ self._version = ""
+ self._namespace = ""
+ self._forwardCompatibleMode = False
+ self._currentPattern = ""
+ self._currentThat = ""
+ self._currentTopic = ""
+ self._insideTopic = False
+ self._currentUnknown = "" # the name of the current unknown element
+
+ # This is set to true when a parse error occurs in a category.
+ self._skipCurrentCategory = False
+
+ # Counts the number of parse errors in a particular AIML document.
+ # query with getNumErrors(). If 0, the document is AIML-compliant.
+ self._numParseErrors = 0
+
+ # TODO: select the proper validInfo table based on the version number.
+ self._validInfo = self._validationInfo101
+
+ # This stack of bools is used when parsing <li> elements inside
+ # <condition> elements, to keep track of whether or not an
+ # attribute-less "default" <li> element has been found yet. Only
+ # one default <li> is allowed in each <condition> element. We need
+ # a stack in order to correctly handle nested <condition> tags.
+ self._foundDefaultLiStack = []
+
+ # This stack of strings indicates what the current whitespace-handling
+ # behavior should be. Each string in the stack is either "default" or
+ # "preserve". When a new AIML element is encountered, a new string is
+ # pushed onto the stack, based on the value of the element's "xml:space"
+ # attribute (if absent, the top of the stack is pushed again). When
+ # ending an element, pop an object off the stack.
+ self._whitespaceBehaviorStack = ["default"]
+
+ self._elemStack = []
+ self._locator = Locator()
+ self.setDocumentLocator(self._locator)
+
+ def getNumErrors(self):
+ "Return the number of errors found while parsing the current document."
+ return self._numParseErrors
+
+ def setEncoding(self, encoding):
+ """Set the text encoding to use when encoding strings read from XML.
+
+ Defaults to 'UTF-8'.
+
+ """
+ self._encoding = encoding
+
+ def _location(self):
+ "Return a string describing the current location in the source file."
+ line = self._locator.getLineNumber()
+ column = self._locator.getColumnNumber()
+ return "(line %d, column %d)" % (line, column)
+
+ def _pushWhitespaceBehavior(self, attr):
+ """Push a new string onto the whitespaceBehaviorStack.
+
+ The string's value is taken from the "xml:space" attribute, if it exists
+ and has a legal value ("default" or "preserve"). Otherwise, the previous
+ stack element is duplicated.
+
+ """
+ assert len(self._whitespaceBehaviorStack) > 0, "Whitespace behavior stack should never be empty!"
+ try:
+ if attr["xml:space"] == "default" or attr["xml:space"] == "preserve":
+ self._whitespaceBehaviorStack.append(attr["xml:space"])
+ else:
+ raise AimlParserError, "Invalid value for xml:space attribute "+self._location()
+ except KeyError:
+ self._whitespaceBehaviorStack.append(self._whitespaceBehaviorStack[-1])
+
+ def startElementNS(self, name, qname, attr):
+ print "QNAME:", qname
+ print "NAME:", name
+ uri,elem = name
+ if (elem == "bot"): print "name:", attr.getValueByQName("name"), "a'ite?"
+ self.startElement(elem, attr)
+ pass
+
+ def startElement(self, name, attr):
+ # Wrapper around _startElement, which catches errors in _startElement()
+ # and keeps going.
+
+ # If we're inside an unknown element, ignore everything until we're
+ # out again.
+ if self._currentUnknown != "":
+ return
+ # If we're skipping the current category, ignore everything until
+ # it's finished.
+ if self._skipCurrentCategory:
+ return
+
+ # process this start-element.
+ try: self._startElement(name, attr)
+ except AimlParserError, msg:
+ # Print the error message
+ sys.stderr.write("PARSE ERROR: %s\n" % msg)
+
+ self._numParseErrors += 1 # increment error count
+ # In case of a parse error, if we're inside a category, skip it.
+ if self._state >= self._STATE_InsideCategory:
+ self._skipCurrentCategory = True
+
+ def _startElement(self, name, attr):
+ if name == "aiml":
+ # <aiml> tags are only legal in the OutsideAiml state
+ if self._state != self._STATE_OutsideAiml:
+ raise AimlParserError, "Unexpected <aiml> tag "+self._location()
+ self._state = self._STATE_InsideAiml
+ self._insideTopic = False
+ self._currentTopic = u""
+ try: self._version = attr["version"]
+ except KeyError:
+ # This SHOULD be a syntax error, but so many AIML sets out there are missing
+ # "version" attributes that it just seems nicer to let it slide.
+ #raise AimlParserError, "Missing 'version' attribute in <aiml> tag "+self._location()
+ #print "WARNING: Missing 'version' attribute in <aiml> tag "+self._location()
+ #print " Defaulting to version 1.0"
+ self._version = "1.0"
+ self._forwardCompatibleMode = (self._version != "1.0.1")
+ self._pushWhitespaceBehavior(attr)
+ # Not sure about this namespace business yet...
+ #try:
+ # self._namespace = attr["xmlns"]
+ # if self._version == "1.0.1" and self._namespace != "http://alicebot.org/2001/AIML-1.0.1":
+ # raise AimlParserError, "Incorrect namespace for AIML v1.0.1 "+self._location()
+ #except KeyError:
+ # if self._version != "1.0":
+ # raise AimlParserError, "Missing 'version' attribute(s) in <aiml> tag "+self._location()
+ elif self._state == self._STATE_OutsideAiml:
+ # If we're outside of an AIML element, we ignore all tags.
+ return
+ elif name == "topic":
+ # <topic> tags are only legal in the InsideAiml state, and only
+ # if we're not already inside a topic.
+ if (self._state != self._STATE_InsideAiml) or self._insideTopic:
+ raise AimlParserError, "Unexpected <topic> tag", self._location()
+ try: self._currentTopic = unicode(attr['name'])
+ except KeyError:
+ raise AimlParserError, "Required \"name\" attribute missing in <topic> element "+self._location()
+ self._insideTopic = True
+ elif name == "category":
+ # <category> tags are only legal in the InsideAiml state
+ if self._state != self._STATE_InsideAiml:
+ raise AimlParserError, "Unexpected <category> tag "+self._location()
+ self._state = self._STATE_InsideCategory
+ self._currentPattern = u""
+ self._currentThat = u""
+ # If we're not inside a topic, the topic is implicitly set to *
+ if not self._insideTopic: self._currentTopic = u"*"
+ self._elemStack = []
+ self._pushWhitespaceBehavior(attr)
+ elif name == "pattern":
+ # <pattern> tags are only legal in the InsideCategory state
+ if self._state != self._STATE_InsideCategory:
+ raise AimlParserError, "Unexpected <pattern> tag "+self._location()
+ self._state = self._STATE_InsidePattern
+ elif name == "that" and self._state == self._STATE_AfterPattern:
+ # <that> are legal either inside a <template> element, or
+ # inside a <category> element, between the <pattern> and the
+ # <template> elements. This clause handles the latter case.
+ self._state = self._STATE_InsideThat
+ elif name == "template":
+ # <template> tags are only legal in the AfterPattern and AfterThat
+ # states
+ if self._state not in [self._STATE_AfterPattern, self._STATE_AfterThat]:
+ raise AimlParserError, "Unexpected <template> tag "+self._location()
+ # if no <that> element was specified, it is implicitly set to *
+ if self._state == self._STATE_AfterPattern:
+ self._currentThat = u"*"
+ self._state = self._STATE_InsideTemplate
+ self._elemStack.append(['template',{}])
+ self._pushWhitespaceBehavior(attr)
+ elif self._state == self._STATE_InsidePattern:
+ # Certain tags are allowed inside <pattern> elements.
+ if name == "bot" and attr.has_key("name") and attr["name"] == u"name":
+ # Insert a special character string that the PatternMgr will
+ # replace with the bot's name.
+ self._currentPattern += u" BOT_NAME "
+ else:
+ raise AimlParserError, ("Unexpected <%s> tag " % name)+self._location()
+ elif self._state == self._STATE_InsideThat:
+ # Certain tags are allowed inside <that> elements.
+ if name == "bot" and attr.has_key("name") and attr["name"] == u"name":
+ # Insert a special character string that the PatternMgr will
+ # replace with the bot's name.
+ self._currentThat += u" BOT_NAME "
+ else:
+ raise AimlParserError, ("Unexpected <%s> tag " % name)+self._location()
+ elif self._state == self._STATE_InsideTemplate and self._validInfo.has_key(name):
+ # Starting a new element inside the current pattern. First
+ # we need to convert 'attr' into a native Python dictionary,
+ # so it can later be marshaled.
+ attrDict = {}
+ for k,v in attr.items():
+ #attrDict[k[1].encode(self._encoding)] = v.encode(self._encoding)
+ attrDict[k.encode(self._encoding)] = unicode(v)
+ self._validateElemStart(name, attrDict, self._version)
+ # Push the current element onto the element stack.
+ self._elemStack.append([name.encode(self._encoding),attrDict])
+ self._pushWhitespaceBehavior(attr)
+ # If this is a condition element, push a new entry onto the
+ # foundDefaultLiStack
+ if name == "condition":
+ self._foundDefaultLiStack.append(False)
+ else:
+ # we're now inside an unknown element.
+ if self._forwardCompatibleMode:
+ # In Forward Compatibility Mode, we ignore the element and its
+ # contents.
+ self._currentUnknown = name
+ else:
+ # Otherwise, unknown elements are grounds for error!
+ raise AimlParserError, ("Unexpected <%s> tag " % name)+self._location()
+
+ def characters(self, ch):
+ # Wrapper around _characters which catches errors in _characters()
+ # and keeps going.
+ if self._state == self._STATE_OutsideAiml:
+ # If we're outside of an AIML element, we ignore all text
+ return
+ if self._currentUnknown != "":
+ # If we're inside an unknown element, ignore all text
+ return
+ if self._skipCurrentCategory:
+ # If we're skipping the current category, ignore all text.
+ return
+ try: self._characters(ch)
+ except AimlParserError, msg:
+ # Print the message
+ sys.stderr.write("PARSE ERROR: %s\n" % msg)
+ self._numParseErrors += 1 # increment error count
+ # In case of a parse error, if we're inside a category, skip it.
+ if self._state >= self._STATE_InsideCategory:
+ self._skipCurrentCategory = True
+
+ def _characters(self, ch):
+ text = unicode(ch)
+ if self._state == self._STATE_InsidePattern:
+ self._currentPattern += text
+ elif self._state == self._STATE_InsideThat:
+ self._currentThat += text
+ elif self._state == self._STATE_InsideTemplate:
+ # First, see whether the element at the top of the element stack
+ # is permitted to contain text.
+ try:
+ parent = self._elemStack[-1][0]
+ parentAttr = self._elemStack[-1][1]
+ required, optional, canBeParent = self._validInfo[parent]
+ nonBlockStyleCondition = (parent == "condition" and not (parentAttr.has_key("name") and parentAttr.has_key("value")))
+ if not canBeParent:
+ raise AimlParserError, ("Unexpected text inside <%s> element "%parent)+self._location()
+ elif parent == "random" or nonBlockStyleCondition:
+ # <random> elements can only contain <li> subelements. However,
+ # there's invariably some whitespace around the <li> that we need
+ # to ignore. Same for non-block-style <condition> elements (i.e.
+ # those which don't have both a "name" and a "value" attribute).
+ if len(text.strip()) == 0:
+ # ignore whitespace inside these elements.
+ return
+ else:
+ # non-whitespace text inside these elements is a syntax error.
+ raise AimlParserError, ("Unexpected text inside <%s> element "%parent)+self._location()
+ except IndexError:
+ # the element stack is empty. This should never happen.
+ raise AimlParserError, "Element stack is empty while validating text "+self._location()
+
+ # Add a new text element to the element at the top of the element
+ # stack. If there's already a text element there, simply append the
+ # new characters to its contents.
+ try: textElemOnStack = (self._elemStack[-1][-1][0] == "text")
+ except IndexError: textElemOnStack = False
+ except KeyError: textElemOnStack = False
+ if textElemOnStack:
+ self._elemStack[-1][-1][2] += text
+ else:
+ self._elemStack[-1].append(["text", {"xml:space": self._whitespaceBehaviorStack[-1]}, text])
+ else:
+ # all other text is ignored
+ pass
+
+ def endElementNS(self, name, qname):
+ uri, elem = name
+ self.endElement(elem)
+
+ def endElement(self, name):
+ """Wrapper around _endElement which catches errors in _characters()
+ and keeps going.
+
+ """
+ if self._state == self._STATE_OutsideAiml:
+ # If we're outside of an AIML element, ignore all tags
+ return
+ if self._currentUnknown != "":
+ # see if we're at the end of an unknown element. If so, we can
+ # stop ignoring everything.
+ if name == self._currentUnknown:
+ self._currentUnknown = ""
+ return
+ if self._skipCurrentCategory:
+ # If we're skipping the current category, see if it's ending. We
+ # stop on ANY </category> tag, since we're not keeping track of
+ # state in ignore-mode.
+ if name == "category":
+ self._skipCurrentCategory = False
+ self._state = self._STATE_InsideAiml
+ return
+ try: self._endElement(name)
+ except AimlParserError, msg:
+ # Print the message
+ sys.stderr.write("PARSE ERROR: %s\n" % msg)
+ self._numParseErrors += 1 # increment error count
+ # In case of a parse error, if we're inside a category, skip it.
+ if self._state >= self._STATE_InsideCategory:
+ self._skipCurrentCategory = True
+
+ def _endElement(self, name):
+ """Verify that an AIML end element is valid in the current
+ context.
+
+ Raises an AimlParserError if an illegal end element is encountered.
+
+ """
+ if name == "aiml":
+ # </aiml> tags are only legal in the InsideAiml state
+ if self._state != self._STATE_InsideAiml:
+ raise AimlParserError, "Unexpected </aiml> tag "+self._location()
+ self._state = self._STATE_OutsideAiml
+ self._whitespaceBehaviorStack.pop()
+ elif name == "topic":
+ # </topic> tags are only legal in the InsideAiml state, and
+ # only if _insideTopic is true.
+ if self._state != self._STATE_InsideAiml or not self._insideTopic:
+ raise AimlParserError, "Unexpected </topic> tag "+self._location()
+ self._insideTopic = False
+ self._currentTopic = u""
+ elif name == "category":
+ # </category> tags are only legal in the AfterTemplate state
+ if self._state != self._STATE_AfterTemplate:
+ raise AimlParserError, "Unexpected </category> tag "+self._location()
+ self._state = self._STATE_InsideAiml
+ # End the current category. Store the current pattern/that/topic and
+ # element in the categories dictionary.
+ key = (self._currentPattern.strip(), self._currentThat.strip(),self._currentTopic.strip())
+ self.categories[key] = self._elemStack[-1]
+ self._whitespaceBehaviorStack.pop()
+ elif name == "pattern":
+ # </pattern> tags are only legal in the InsidePattern state
+ if self._state != self._STATE_InsidePattern:
+ raise AimlParserError, "Unexpected </pattern> tag "+self._location()
+ self._state = self._STATE_AfterPattern
+ elif name == "that" and self._state == self._STATE_InsideThat:
+ # </that> tags are only allowed inside <template> elements or in
+ # the InsideThat state. This clause handles the latter case.
+ self._state = self._STATE_AfterThat
+ elif name == "template":
+ # </template> tags are only allowed in the InsideTemplate state.
+ if self._state != self._STATE_InsideTemplate:
+ raise AimlParserError, "Unexpected </template> tag "+self._location()
+ self._state = self._STATE_AfterTemplate
+ self._whitespaceBehaviorStack.pop()
+ elif self._state == self._STATE_InsidePattern:
+ # Certain tags are allowed inside <pattern> elements.
+ if name not in ["bot"]:
+ raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
+ elif self._state == self._STATE_InsideThat:
+ # Certain tags are allowed inside <that> elements.
+ if name not in ["bot"]:
+ raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
+ elif self._state == self._STATE_InsideTemplate:
+ # End of an element inside the current template. Append the
+ # element at the top of the stack onto the one beneath it.
+ elem = self._elemStack.pop()
+ self._elemStack[-1].append(elem)
+ self._whitespaceBehaviorStack.pop()
+ # If the element was a condition, pop an item off the
+ # foundDefaultLiStack as well.
+ if elem[0] == "condition": self._foundDefaultLiStack.pop()
+ else:
+ # Unexpected closing tag
+ raise AimlParserError, ("Unexpected </%s> tag " % name)+self._location()
+
+ # A dictionary containing a validation information for each AIML
+ # element. The keys are the names of the elements. The values are a
+ # tuple of three items. The first is a list containing the names of
+ # REQUIRED attributes, the second is a list of OPTIONAL attributes,
+ # and the third is a boolean value indicating whether or not the
+ # element can contain other elements and/or text (if False, the
+ # element can only appear in an atomic context, such as <date/>).
+ _validationInfo101 = {
+ "bot": ( ["name"], [], False ),
+ "condition": ( [], ["name", "value"], True ), # can only contain <li> elements
+ "date": ( [], [], False ),
+ "formal": ( [], [], True ),
+ "gender": ( [], [], True ),
+ "get": ( ["name"], [], False ),
+ "gossip": ( [], [], True ),
+ "id": ( [], [], False ),
+ "input": ( [], ["index"], False ),
+ "javascript": ( [], [], True ),
+ "learn": ( [], [], True ),
+ "li": ( [], ["name", "value"], True ),
+ "lowercase": ( [], [], True ),
+ "person": ( [], [], True ),
+ "person2": ( [], [], True ),
+ "random": ( [], [], True ), # can only contain <li> elements
+ "sentence": ( [], [], True ),
+ "set": ( ["name"], [], True),
+ "size": ( [], [], False ),
+ "sr": ( [], [], False ),
+ "srai": ( [], [], True ),
+ "star": ( [], ["index"], False ),
+ "system": ( [], [], True ),
+ "template": ( [], [], True ), # needs to be in the list because it can be a parent.
+ "that": ( [], ["index"], False ),
+ "thatstar": ( [], ["index"], False ),
+ "think": ( [], [], True ),
+ "topicstar": ( [], ["index"], False ),
+ "uppercase": ( [], [], True ),
+ "version": ( [], [], False ),
+ }
+
+ def _validateElemStart(self, name, attr, version):
+ """Test the validity of an element starting inside a <template>
+ element.
+
+ This function raises an AimlParserError exception if it the tag is
+ invalid. Otherwise, no news is good news.
+
+ """
+ # Check the element's attributes. Make sure that all required
+ # attributes are present, and that any remaining attributes are
+ # valid options.
+ required, optional, canBeParent = self._validInfo[name]
+ for a in required:
+ if a not in attr and not self._forwardCompatibleMode:
+ raise AimlParserError, ("Required \"%s\" attribute missing in <%s> element " % (a,name))+self._location()
+ for a in attr:
+ if a in required: continue
+ if a[0:4] == "xml:": continue # attributes in the "xml" namespace can appear anywhere
+ if a not in optional and not self._forwardCompatibleMode:
+ raise AimlParserError, ("Unexpected \"%s\" attribute in <%s> element " % (a,name))+self._location()
+
+ # special-case: several tags contain an optional "index" attribute.
+ # This attribute's value must be a positive integer.
+ if name in ["star", "thatstar", "topicstar"]:
+ for k,v in attr.items():
+ if k == "index":
+ temp = 0
+ try: temp = int(v)
+ except:
+ raise AimlParserError, ("Bad type for \"%s\" attribute (expected integer, found \"%s\") " % (k,v))+self._location()
+ if temp < 1:
+ raise AimlParserError, ("\"%s\" attribute must have non-negative value " % (k))+self._location()
+
+ # See whether the containing element is permitted to contain
+ # subelements. If not, this element is invalid no matter what it is.
+ try:
+ parent = self._elemStack[-1][0]
+ parentAttr = self._elemStack[-1][1]
+ except IndexError:
+ # If the stack is empty, no parent is present. This should never
+ # happen.
+ raise AimlParserError, ("Element stack is empty while validating <%s> " % name)+self._location()
+ required, optional, canBeParent = self._validInfo[parent]
+ nonBlockStyleCondition = (parent == "condition" and not (parentAttr.has_key("name") and parentAttr.has_key("value")))
+ if not canBeParent:
+ raise AimlParserError, ("<%s> elements cannot have any contents "%parent)+self._location()
+ # Special-case test if the parent element is <condition> (the
+ # non-block-style variant) or <random>: these elements can only
+ # contain <li> subelements.
+ elif (parent == "random" or nonBlockStyleCondition) and name!="li":
+ raise AimlParserError, ("<%s> elements can only contain <li> subelements "%parent)+self._location()
+ # Special-case test for <li> elements, which can only be contained
+ # by non-block-style <condition> and <random> elements, and whose
+ # required attributes are dependent upon which attributes are
+ # present in the <condition> parent.
+ elif name=="li":
+ if not (parent=="random" or nonBlockStyleCondition):
+ raise AimlParserError, ("Unexpected <li> element contained by <%s> element "%parent)+self._location()
+ if nonBlockStyleCondition:
+ if parentAttr.has_key("name"):
+ # Single-predicate condition. Each <li> element except the
+ # last must have a "value" attribute.
+ if len(attr) == 0:
+ # This could be the default <li> element for this <condition>,
+ # unless we've already found one.
+ if self._foundDefaultLiStack[-1]:
+ raise AimlParserError, "Unexpected default <li> element inside <condition> "+self._location()
+ else:
+ self._foundDefaultLiStack[-1] = True
+ elif len(attr) == 1 and attr.has_key("value"):
+ pass # this is the valid case
+ else:
+ raise AimlParserError, "Invalid <li> inside single-predicate <condition> "+self._location()
+ elif len(parentAttr) == 0:
+ # Multi-predicate condition. Each <li> element except the
+ # last must have a "name" and a "value" attribute.
+ if len(attr) == 0:
+ # This could be the default <li> element for this <condition>,
+ # unless we've already found one.
+ if self._foundDefaultLiStack[-1]:
+ raise AimlParserError, "Unexpected default <li> element inside <condition> "+self._location()
+ else:
+ self._foundDefaultLiStack[-1] = True
+ elif len(attr) == 2 and attr.has_key("value") and attr.has_key("name"):
+ pass # this is the valid case
+ else:
+ raise AimlParserError, "Invalid <li> inside multi-predicate <condition> "+self._location()
+ # All is well!
+ return True
+
+def create_parser():
+ """Create and return an AIML parser object."""
+ parser = xml.sax.make_parser()
+ handler = AimlHandler("UTF-8")
+ parser.setContentHandler(handler)
+ #parser.setFeature(xml.sax.handler.feature_namespaces, True)
+ return parser \ No newline at end of file
diff --git a/aiml/DefaultSubs.py b/aiml/DefaultSubs.py
new file mode 100644
index 0000000..5166cf6
--- /dev/null
+++ b/aiml/DefaultSubs.py
@@ -0,0 +1,156 @@
+"""This file contains the default (English) substitutions for the
+PyAIML kernel. These substitutions may be overridden by using the
+Kernel.loadSubs(filename) method. The filename specified should refer
+to a Windows-style INI file with the following format:
+
+ # lines that start with '#' are comments
+
+ # The 'gender' section contains the substitutions performed by the
+ # <gender> AIML tag, which swaps masculine and feminine pronouns.
+ [gender]
+ he = she
+ she = he
+ # and so on...
+
+ # The 'person' section contains the substitutions performed by the
+ # <person> AIML tag, which swaps 1st and 2nd person pronouns.
+ [person]
+ I = you
+ you = I
+ # and so on...
+
+ # The 'person2' section contains the substitutions performed by
+ # the <person2> AIML tag, which swaps 1st and 3nd person pronouns.
+ [person2]
+ I = he
+ he = I
+ # and so on...
+
+ # the 'normal' section contains subtitutions run on every input
+ # string passed into Kernel.respond(). It's mainly used to
+ # correct common misspellings, and to convert contractions
+ # ("WHAT'S") into a format that will match an AIML pattern ("WHAT
+ # IS").
+ [normal]
+ what's = what is
+"""
+
+defaultGender = {
+ # masculine -> feminine
+ "he": "she",
+ "him": "her",
+ "his": "her",
+ "himself": "herself",
+
+ # feminine -> masculine
+ "she": "he",
+ "her": "him",
+ "hers": "his",
+ "herself": "himself",
+}
+
+defaultPerson = {
+ # 1st->3rd (masculine)
+ "I": "he",
+ "me": "him",
+ "my": "his",
+ "mine": "his",
+ "myself": "himself",
+
+ # 3rd->1st (masculine)
+ "he":"I",
+ "him":"me",
+ "his":"my",
+ "himself":"myself",
+
+ # 3rd->1st (feminine)
+ "she":"I",
+ "her":"me",
+ "hers":"mine",
+ "herself":"myself",
+}
+
+defaultPerson2 = {
+ # 1st -> 2nd
+ "I": "you",
+ "me": "you",
+ "my": "your",
+ "mine": "yours",
+ "myself": "yourself",
+
+ # 2nd -> 1st
+ "you": "me",
+ "your": "my",
+ "yours": "mine",
+ "yourself": "myself",
+}
+
+
+# TODO: this list is far from complete
+defaultNormal = {
+ "wanna": "want to",
+ "gonna": "going to",
+
+ "I'm": "I am",
+ "I'd": "I would",
+ "I'll": "I will",
+ "I've": "I have",
+ "you'd": "you would",
+ "you're": "you are",
+ "you've": "you have",
+ "you'll": "you will",
+ "he's": "he is",
+ "he'd": "he would",
+ "he'll": "he will",
+ "she's": "she is",
+ "she'd": "she would",
+ "she'll": "she will",
+ "we're": "we are",
+ "we'd": "we would",
+ "we'll": "we will",
+ "we've": "we have",
+ "they're": "they are",
+ "they'd": "they would",
+ "they'll": "they will",
+ "they've": "they have",
+
+ "y'all": "you all",
+
+ "can't": "can not",
+ "cannot": "can not",
+ "couldn't": "could not",
+ "wouldn't": "would not",
+ "shouldn't": "should not",
+
+ "isn't": "is not",
+ "ain't": "is not",
+ "don't": "do not",
+ "aren't": "are not",
+ "won't": "will not",
+ "weren't": "were not",
+ "wasn't": "was not",
+ "didn't": "did not",
+ "hasn't": "has not",
+ "hadn't": "had not",
+ "haven't": "have not",
+
+ "where's": "where is",
+ "where'd": "where did",
+ "where'll": "where will",
+ "who's": "who is",
+ "who'd": "who did",
+ "who'll": "who will",
+ "what's": "what is",
+ "what'd": "what did",
+ "what'll": "what will",
+ "when's": "when is",
+ "when'd": "when did",
+ "when'll": "when will",
+ "why's": "why is",
+ "why'd": "why did",
+ "why'll": "why will",
+
+ "it's": "it is",
+ "it'd": "it would",
+ "it'll": "it will",
+} \ No newline at end of file
diff --git a/aiml/Kernel.py b/aiml/Kernel.py
new file mode 100644
index 0000000..413f26d
--- /dev/null
+++ b/aiml/Kernel.py
@@ -0,0 +1,1183 @@
+# -*- coding: latin-1 -*-
+"""This file contains the public interface to the aiml module."""
+import AimlParser
+import DefaultSubs
+import Utils
+from PatternMgr import PatternMgr
+from WordSub import WordSub
+
+from ConfigParser import ConfigParser
+import copy
+import glob
+import os
+import random
+import re
+import string
+import sys
+import time
+import threading
+import xml.sax
+
+
+class Kernel:
+ # module constants
+ _globalSessionID = "_global" # key of the global session (duh)
+ _maxHistorySize = 10 # maximum length of the _inputs and _responses lists
+ _maxRecursionDepth = 100 # maximum number of recursive <srai>/<sr> tags before the response is aborted.
+ # special predicate keys
+ _inputHistory = "_inputHistory" # keys to a queue (list) of recent user input
+ _outputHistory = "_outputHistory" # keys to a queue (list) of recent responses.
+ _inputStack = "_inputStack" # Should always be empty in between calls to respond()
+
+ def __init__(self):
+ self._verboseMode = True
+ self._version = "PyAIML 0.8.5"
+ self._brain = PatternMgr()
+ self._respondLock = threading.RLock()
+ self._textEncoding = "utf-8"
+
+ # set up the sessions
+ self._sessions = {}
+ self._addSession(self._globalSessionID)
+
+ # Set up the bot predicates
+ self._botPredicates = {}
+ self.setBotPredicate("name", "Nameless")
+
+ # set up the word substitutors (subbers):
+ self._subbers = {}
+ self._subbers['gender'] = WordSub(DefaultSubs.defaultGender)
+ self._subbers['person'] = WordSub(DefaultSubs.defaultPerson)
+ self._subbers['person2'] = WordSub(DefaultSubs.defaultPerson2)
+ self._subbers['normal'] = WordSub(DefaultSubs.defaultNormal)
+
+ # set up the element processors
+ self._elementProcessors = {
+ "bot": self._processBot,
+ "condition": self._processCondition,
+ "date": self._processDate,
+ "formal": self._processFormal,
+ "gender": self._processGender,
+ "get": self._processGet,
+ "gossip": self._processGossip,
+ "id": self._processId,
+ "input": self._processInput,
+ "javascript": self._processJavascript,
+ "learn": self._processLearn,
+ "li": self._processLi,
+ "lowercase": self._processLowercase,
+ "person": self._processPerson,
+ "person2": self._processPerson2,
+ "random": self._processRandom,
+ "text": self._processText,
+ "sentence": self._processSentence,
+ "set": self._processSet,
+ "size": self._processSize,
+ "sr": self._processSr,
+ "srai": self._processSrai,
+ "star": self._processStar,
+ "system": self._processSystem,
+ "template": self._processTemplate,
+ "that": self._processThat,
+ "thatstar": self._processThatstar,
+ "think": self._processThink,
+ "topicstar": self._processTopicstar,
+ "uppercase": self._processUppercase,
+ "version": self._processVersion,
+ }
+
+ def bootstrap(self, brainFile = None, learnFiles = [], commands = []):
+ """Prepare a Kernel object for use.
+
+ If a brainFile argument is provided, the Kernel attempts to
+ load the brain at the specified filename.
+
+ If learnFiles is provided, the Kernel attempts to load the
+ specified AIML files.
+
+ Finally, each of the input strings in the commands list is
+ passed to respond().
+
+ """
+ start = time.clock()
+ if brainFile:
+ self.loadBrain(brainFile)
+
+ # learnFiles might be a string, in which case it should be
+ # turned into a single-element list.
+ learns = learnFiles
+ try: learns = [ learnFiles + "" ]
+ except: pass
+ for file in learns:
+ self.learn(file)
+
+ # ditto for commands
+ cmds = commands
+ try: cmds = [ commands + "" ]
+ except: pass
+ for cmd in cmds:
+ print self._respond(cmd, self._globalSessionID)
+
+ if self._verboseMode:
+ print "Kernel bootstrap completed in %.2f seconds" % (time.clock() - start)
+
+ def verbose(self, isVerbose = True):
+ """Enable/disable verbose output mode."""
+ self._verboseMode = isVerbose
+
+ def version(self):
+ """Return the Kernel's version string."""
+ return self._version
+
+ def numCategories(self):
+ """Return the number of categories the Kernel has learned."""
+ # there's a one-to-one mapping between templates and categories
+ return self._brain.numTemplates()
+
+ def resetBrain(self):
+ """Reset the brain to its initial state.
+
+ This is essentially equivilant to:
+ del(kern)
+ kern = aiml.Kernel()
+
+ """
+ del(self._brain)
+ self.__init__()
+
+ def loadBrain(self, filename):
+ """Attempt to load a previously-saved 'brain' from the
+ specified filename.
+
+ NOTE: the current contents of the 'brain' will be discarded!
+
+ """
+ if self._verboseMode: print "Loading brain from %s..." % filename,
+ start = time.clock()
+ self._brain.restore(filename)
+ if self._verboseMode:
+ end = time.clock() - start
+ print "done (%d categories in %.2f seconds)" % (self._brain.numTemplates(), end)
+
+ def saveBrain(self, filename):
+ """Dump the contents of the bot's brain to a file on disk."""
+ if self._verboseMode: print "Saving brain to %s..." % filename,
+ start = time.clock()
+ self._brain.save(filename)
+ if self._verboseMode:
+ print "done (%.2f seconds)" % (time.clock() - start)
+
+ def getPredicate(self, name, sessionID = _globalSessionID):
+ """Retrieve the current value of the predicate 'name' from the
+ specified session.
+
+ If name is not a valid predicate in the session, the empty
+ string is returned.
+
+ """
+ try: return self._sessions[sessionID][name]
+ except KeyError: return ""
+
+ def setPredicate(self, name, value, sessionID = _globalSessionID):
+ """Set the value of the predicate 'name' in the specified
+ session.
+
+ If sessionID is not a valid session, it will be created. If
+ name is not a valid predicate in the session, it will be
+ created.
+
+ """
+ self._addSession(sessionID) # add the session, if it doesn't already exist.
+ self._sessions[sessionID][name] = value
+
+ def getBotPredicate(self, name):
+ """Retrieve the value of the specified bot predicate.
+
+ If name is not a valid bot predicate, the empty string is returned.
+
+ """
+ try: return self._botPredicates[name]
+ except KeyError: return ""
+
+ def setBotPredicate(self, name, value):
+ """Set the value of the specified bot predicate.
+
+ If name is not a valid bot predicate, it will be created.
+
+ """
+ self._botPredicates[name] = value
+ # Clumsy hack: if updating the bot name, we must update the
+ # name in the brain as well
+ if name == "name":
+ self._brain.setBotName(self.getBotPredicate("name"))
+
+ def setTextEncoding(self, encoding):
+ """Set the text encoding used when loading AIML files (Latin-1, UTF-8, etc.)."""
+ self._textEncoding = encoding
+
+ def loadSubs(self, filename):
+ """Load a substitutions file.
+
+ The file must be in the Windows-style INI format (see the
+ standard ConfigParser module docs for information on this
+ format). Each section of the file is loaded into its own
+ substituter.
+
+ """
+ inFile = file(filename)
+ parser = ConfigParser()
+ parser.readfp(inFile, filename)
+ inFile.close()
+ for s in parser.sections():
+ # Add a new WordSub instance for this section. If one already
+ # exists, delete it.
+ if self._subbers.has_key(s):
+ del(self._subbers[s])
+ self._subbers[s] = WordSub()
+ # iterate over the key,value pairs and add them to the subber
+ for k,v in parser.items(s):
+ self._subbers[s][k] = v
+
+ def _addSession(self, sessionID):
+ """Create a new session with the specified ID string."""
+ if self._sessions.has_key(sessionID):
+ return
+ # Create the session.
+ self._sessions[sessionID] = {
+ # Initialize the special reserved predicates
+ self._inputHistory: [],
+ self._outputHistory: [],
+ self._inputStack: []
+ }
+
+ def _deleteSession(self, sessionID):
+ """Delete the specified session."""
+ if self._sessions.has_key(sessionID):
+ _sessions.pop(sessionID)
+
+ def getSessionData(self, sessionID = None):
+ """Return a copy of the session data dictionary for the
+ specified session.
+
+ If no sessionID is specified, return a dictionary containing
+ *all* of the individual session dictionaries.
+
+ """
+ s = None
+ if sessionID is not None:
+ try: s = self._sessions[sessionID]
+ except KeyError: s = {}
+ else:
+ s = self._sessions
+ return copy.deepcopy(s)
+
+ def learn(self, filename):
+ """Load and learn the contents of the specified AIML file.
+
+ If filename includes wildcard characters, all matching files
+ will be loaded and learned.
+
+ """
+ for f in glob.glob(filename):
+ if self._verboseMode: print "Loading %s..." % f,
+ start = time.clock()
+ # Load and parse the AIML file.
+ parser = AimlParser.create_parser()
+ handler = parser.getContentHandler()
+ handler.setEncoding(self._textEncoding)
+ try: parser.parse(f)
+ except xml.sax.SAXParseException, msg:
+ err = "\nFATAL PARSE ERROR in file %s:\n%s\n" % (f,msg)
+ sys.stderr.write(err)
+ continue
+ # store the pattern/template pairs in the PatternMgr.
+ for key,tem in handler.categories.items():
+ self._brain.add(key,tem)
+ # Parsing was successful.
+ if self._verboseMode:
+ print "done (%.2f seconds)" % (time.clock() - start)
+
+ def respond(self, input, sessionID = _globalSessionID):
+ """Return the Kernel's response to the input string."""
+ if len(input) == 0:
+ return ""
+
+ #ensure that input is a unicode string
+ try: input = input.decode(self._textEncoding, 'replace')
+ except UnicodeError: pass
+ except AttributeError: pass
+
+ # prevent other threads from stomping all over us.
+ self._respondLock.acquire()
+
+ # Add the session, if it doesn't already exist
+ self._addSession(sessionID)
+
+ # split the input into discrete sentences
+ sentences = Utils.sentences(input)
+ finalResponse = ""
+ for s in sentences:
+ # Add the input to the history list before fetching the
+ # response, so that <input/> tags work properly.
+ inputHistory = self.getPredicate(self._inputHistory, sessionID)
+ inputHistory.append(s)
+ while len(inputHistory) > self._maxHistorySize:
+ inputHistory.pop(0)
+ self.setPredicate(self._inputHistory, inputHistory, sessionID)
+
+ # Fetch the response
+ response = self._respond(s, sessionID)
+
+ # add the data from this exchange to the history lists
+ outputHistory = self.getPredicate(self._outputHistory, sessionID)
+ outputHistory.append(response)
+ while len(outputHistory) > self._maxHistorySize:
+ outputHistory.pop(0)
+ self.setPredicate(self._outputHistory, outputHistory, sessionID)
+
+ # append this response to the final response.
+ finalResponse += (response + " ")
+ finalResponse = finalResponse.strip()
+
+ assert(len(self.getPredicate(self._inputStack, sessionID)) == 0)
+
+ # release the lock and return
+ self._respondLock.release()
+ try: return finalResponse.encode(self._textEncoding)
+ except UnicodeError: return finalResponse
+
+ # This version of _respond() just fetches the response for some input.
+ # It does not mess with the input and output histories. Recursive calls
+ # to respond() spawned from tags like <srai> should call this function
+ # instead of respond().
+ def _respond(self, input, sessionID):
+ """Private version of respond(), does the real work."""
+ if len(input) == 0:
+ return ""
+
+ # guard against infinite recursion
+ inputStack = self.getPredicate(self._inputStack, sessionID)
+ if len(inputStack) > self._maxRecursionDepth:
+ if self._verboseMode:
+ err = "WARNING: maximum recursion depth exceeded (input='%s')" % input.encode(self._textEncoding, 'replace')
+ sys.stderr.write(err)
+ return ""
+
+ # push the input onto the input stack
+ inputStack = self.getPredicate(self._inputStack, sessionID)
+ inputStack.append(input)
+ self.setPredicate(self._inputStack, inputStack, sessionID)
+
+ # run the input through the 'normal' subber
+ subbedInput = self._subbers['normal'].sub(input)
+
+ # fetch the bot's previous response, to pass to the match()
+ # function as 'that'.
+ outputHistory = self.getPredicate(self._outputHistory, sessionID)
+ try: that = outputHistory[-1]
+ except IndexError: that = ""
+ subbedThat = self._subbers['normal'].sub(that)
+
+ # fetch the current topic
+ topic = self.getPredicate("topic", sessionID)
+ subbedTopic = self._subbers['normal'].sub(topic)
+
+ # Determine the final response.
+ response = ""
+ elem = self._brain.match(subbedInput, subbedThat, subbedTopic)
+ if elem is None:
+ if self._verboseMode:
+ err = "WARNING: No match found for input: %s\n" % input.encode(self._textEncoding)
+ sys.stderr.write(err)
+ else:
+ # Process the element into a response string.
+ response += self._processElement(elem, sessionID).strip()
+ response += " "
+ response = response.strip()
+
+ # pop the top entry off the input stack.
+ inputStack = self.getPredicate(self._inputStack, sessionID)
+ inputStack.pop()
+ self.setPredicate(self._inputStack, inputStack, sessionID)
+
+ return response
+
+ def _processElement(self,elem, sessionID):
+ """Process an AIML element.
+
+ The first item of the elem list is the name of the element's
+ XML tag. The second item is a dictionary containing any
+ attributes passed to that tag, and their values. Any further
+ items in the list are the elements enclosed by the current
+ element's begin and end tags; they are handled by each
+ element's handler function.
+
+ """
+ try:
+ handlerFunc = self._elementProcessors[elem[0]]
+ except:
+ # Oops -- there's no handler function for this element
+ # type!
+ if self._verboseMode:
+ err = "WARNING: No handler found for <%s> element\n" % elem[0].encode(self._textEncoding, 'replace')
+ sys.stderr.write(err)
+ return ""
+ return handlerFunc(elem, sessionID)
+
+
+ ######################################################
+ ### Individual element-processing functions follow ###
+ ######################################################
+
+ # <bot>
+ def _processBot(self, elem, sessionID):
+ """Process a <bot> AIML element.
+
+ Required element attributes:
+ name: The name of the bot predicate to retrieve.
+
+ <bot> elements are used to fetch the value of global,
+ read-only "bot predicates." These predicates cannot be set
+ from within AIML; you must use the setBotPredicate() function.
+
+ """
+ attrName = elem[1]['name']
+ return self.getBotPredicate(attrName)
+
+ # <condition>
+ def _processCondition(self, elem, sessionID):
+ """Process a <condition> AIML element.
+
+ Optional element attributes:
+ name: The name of a predicate to test.
+ value: The value to test the predicate for.
+
+ <condition> elements come in three flavors. Each has different
+ attributes, and each handles their contents differently.
+
+ The simplest case is when the <condition> tag has both a 'name'
+ and a 'value' attribute. In this case, if the predicate
+ 'name' has the value 'value', then the contents of the element
+ are processed and returned.
+
+ If the <condition> element has only a 'name' attribute, then
+ its contents are a series of <li> elements, each of which has
+ a 'value' attribute. The list is scanned from top to bottom
+ until a match is found. Optionally, the last <li> element can
+ have no 'value' attribute, in which case it is processed and
+ returned if no other match is found.
+
+ If the <condition> element has neither a 'name' nor a 'value'
+ attribute, then it behaves almost exactly like the previous
+ case, except that each <li> subelement (except the optional
+ last entry) must now include both 'name' and 'value'
+ attributes.
+
+ """
+ attr = None
+ response = ""
+ attr = elem[1]
+
+ # Case #1: test the value of a specific predicate for a
+ # specific value.
+ if attr.has_key('name') and attr.has_key('value'):
+ val = self.getPredicate(attr['name'], sessionID)
+ if val == attr['value']:
+ for e in elem[2:]:
+ response += self._processElement(e,sessionID)
+ return response
+ else:
+ # Case #2 and #3: Cycle through <li> contents, testing a
+ # name and value pair for each one.
+ try:
+ name = None
+ if attr.has_key('name'):
+ name = attr['name']
+ # Get the list of <li> elemnents
+ listitems = []
+ for e in elem[2:]:
+ if e[0] == 'li':
+ listitems.append(e)
+ # if listitems is empty, return the empty string
+ if len(listitems) == 0:
+ return ""
+ # iterate through the list looking for a condition that
+ # matches.
+ foundMatch = False
+ for li in listitems:
+ try:
+ liAttr = li[1]
+ # if this is the last list item, it's allowed
+ # to have no attributes. We just skip it for now.
+ if len(liAttr.keys()) == 0 and li == listitems[-1]:
+ continue
+ # get the name of the predicate to test
+ liName = name
+ if liName == None:
+ liName = liAttr['name']
+ # get the value to check against
+ liValue = liAttr['value']
+ # do the test
+ if self.getPredicate(liName, sessionID) == liValue:
+ foundMatch = True
+ response += self._processElement(li,sessionID)
+ break
+ except:
+ # No attributes, no name/value attributes, no
+ # such predicate/session, or processing error.
+ if self._verboseMode: print "Something amiss -- skipping listitem", li
+ raise
+ if not foundMatch:
+ # Check the last element of listitems. If it has
+ # no 'name' or 'value' attribute, process it.
+ try:
+ li = listitems[-1]
+ liAttr = li[1]
+ if not (liAttr.has_key('name') or liAttr.has_key('value')):
+ response += self._processElement(li, sessionID)
+ except:
+ # listitems was empty, no attributes, missing
+ # name/value attributes, or processing error.
+ if self._verboseMode: print "error in default listitem"
+ raise
+ except:
+ # Some other catastrophic cataclysm
+ if self._verboseMode: print "catastrophic condition failure"
+ raise
+ return response
+
+ # <date>
+ def _processDate(self, elem, sessionID):
+ """Process a <date> AIML element.
+
+ <date> elements resolve to the current date and time. The
+ AIML specification doesn't require any particular format for
+ this information, so I go with whatever's simplest.
+
+ """
+ return time.asctime()
+
+ # <formal>
+ def _processFormal(self, elem, sessionID):
+ """Process a <formal> AIML element.
+
+ <formal> elements process their contents recursively, and then
+ capitalize the first letter of each word of the result.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ return string.capwords(response)
+
+ # <gender>
+ def _processGender(self,elem, sessionID):
+ """Process a <gender> AIML element.
+
+ <gender> elements process their contents, and then swap the
+ gender of any third-person singular pronouns in the result.
+ This subsitution is handled by the aiml.WordSub module.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ return self._subbers['gender'].sub(response)
+
+ # <get>
+ def _processGet(self, elem, sessionID):
+ """Process a <get> AIML element.
+
+ Required element attributes:
+ name: The name of the predicate whose value should be
+ retrieved from the specified session and returned. If the
+ predicate doesn't exist, the empty string is returned.
+
+ <get> elements return the value of a predicate from the
+ specified session.
+
+ """
+ return self.getPredicate(elem[1]['name'], sessionID)
+
+ # <gossip>
+ def _processGossip(self, elem, sessionID):
+ """Process a <gossip> AIML element.
+
+ <gossip> elements are used to capture and store user input in
+ an implementation-defined manner, theoretically allowing the
+ bot to learn from the people it chats with. I haven't
+ descided how to define my implementation, so right now
+ <gossip> behaves identically to <think>.
+
+ """
+ return self._processThink(elem, sessionID)
+
+ # <id>
+ def _processId(self, elem, sessionID):
+ """ Process an <id> AIML element.
+
+ <id> elements return a unique "user id" for a specific
+ conversation. In PyAIML, the user id is the name of the
+ current session.
+
+ """
+ return sessionID
+
+ # <input>
+ def _processInput(self, elem, sessionID):
+ """Process an <input> AIML element.
+
+ Optional attribute elements:
+ index: The index of the element from the history list to
+ return. 1 means the most recent item, 2 means the one
+ before that, and so on.
+
+ <input> elements return an entry from the input history for
+ the current session.
+
+ """
+ inputHistory = self.getPredicate(self._inputHistory, sessionID)
+ try: index = int(elem[1]['index'])
+ except: index = 1
+ try: return inputHistory[-index]
+ except IndexError:
+ if self._verboseMode:
+ err = "No such index %d while processing <input> element.\n" % index
+ sys.stderr.write(err)
+ return ""
+
+ # <javascript>
+ def _processJavascript(self, elem, sessionID):
+ """Process a <javascript> AIML element.
+
+ <javascript> elements process their contents recursively, and
+ then run the results through a server-side Javascript
+ interpreter to compute the final response. Implementations
+ are not required to provide an actual Javascript interpreter,
+ and right now PyAIML doesn't; <javascript> elements are behave
+ exactly like <think> elements.
+
+ """
+ return self._processThink(elem, sessionID)
+
+ # <learn>
+ def _processLearn(self, elem, sessionID):
+ """Process a <learn> AIML element.
+
+ <learn> elements process their contents recursively, and then
+ treat the result as an AIML file to open and learn.
+
+ """
+ filename = ""
+ for e in elem[2:]:
+ filename += self._processElement(e, sessionID)
+ self.learn(filename)
+ return ""
+
+ # <li>
+ def _processLi(self,elem, sessionID):
+ """Process an <li> AIML element.
+
+ Optional attribute elements:
+ name: the name of a predicate to query.
+ value: the value to check that predicate for.
+
+ <li> elements process their contents recursively and return
+ the results. They can only appear inside <condition> and
+ <random> elements. See _processCondition() and
+ _processRandom() for details of their usage.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ return response
+
+ # <lowercase>
+ def _processLowercase(self,elem, sessionID):
+ """Process a <lowercase> AIML element.
+
+ <lowercase> elements process their contents recursively, and
+ then convert the results to all-lowercase.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ return string.lower(response)
+
+ # <person>
+ def _processPerson(self,elem, sessionID):
+ """Process a <person> AIML element.
+
+ <person> elements process their contents recursively, and then
+ convert all pronouns in the results from 1st person to 2nd
+ person, and vice versa. This subsitution is handled by the
+ aiml.WordSub module.
+
+ If the <person> tag is used atomically (e.g. <person/>), it is
+ a shortcut for <person><star/></person>.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ if len(elem[2:]) == 0: # atomic <person/> = <person><star/></person>
+ response = self._processElement(['star',{}], sessionID)
+ return self._subbers['person'].sub(response)
+
+ # <person2>
+ def _processPerson2(self,elem, sessionID):
+ """Process a <person2> AIML element.
+
+ <person2> elements process their contents recursively, and then
+ convert all pronouns in the results from 1st person to 3rd
+ person, and vice versa. This subsitution is handled by the
+ aiml.WordSub module.
+
+ If the <person2> tag is used atomically (e.g. <person2/>), it is
+ a shortcut for <person2><star/></person2>.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ if len(elem[2:]) == 0: # atomic <person2/> = <person2><star/></person2>
+ response = self._processElement(['star',{}], sessionID)
+ return self._subbers['person2'].sub(response)
+
+ # <random>
+ def _processRandom(self, elem, sessionID):
+ """Process a <random> AIML element.
+
+ <random> elements contain zero or more <li> elements. If
+ none, the empty string is returned. If one or more <li>
+ elements are present, one of them is selected randomly to be
+ processed recursively and have its results returned. Only the
+ chosen <li> element's contents are processed. Any non-<li> contents are
+ ignored.
+
+ """
+ listitems = []
+ for e in elem[2:]:
+ if e[0] == 'li':
+ listitems.append(e)
+ if len(listitems) == 0:
+ return ""
+
+ # select and process a random listitem.
+ random.shuffle(listitems)
+ return self._processElement(listitems[0], sessionID)
+
+ # <sentence>
+ def _processSentence(self,elem, sessionID):
+ """Process a <sentence> AIML element.
+
+ <sentence> elements process their contents recursively, and
+ then capitalize the first letter of the results.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ try:
+ response = response.strip()
+ words = string.split(response, " ", 1)
+ words[0] = string.capitalize(words[0])
+ response = string.join(words)
+ return response
+ except IndexError: # response was empty
+ return ""
+
+ # <set>
+ def _processSet(self, elem, sessionID):
+ """Process a <set> AIML element.
+
+ Required element attributes:
+ name: The name of the predicate to set.
+
+ <set> elements process their contents recursively, and assign the results to a predicate
+ (given by their 'name' attribute) in the current session. The contents of the element
+ are also returned.
+
+ """
+ value = ""
+ for e in elem[2:]:
+ value += self._processElement(e, sessionID)
+ self.setPredicate(elem[1]['name'], value, sessionID)
+ return value
+
+ # <size>
+ def _processSize(self,elem, sessionID):
+ """Process a <size> AIML element.
+
+ <size> elements return the number of AIML categories currently
+ in the bot's brain.
+
+ """
+ return str(self.numCategories())
+
+ # <sr>
+ def _processSr(self,elem,sessionID):
+ """Process an <sr> AIML element.
+
+ <sr> elements are shortcuts for <srai><star/></srai>.
+
+ """
+ star = self._processElement(['star',{}], sessionID)
+ response = self._respond(star, sessionID)
+ return response
+
+ # <srai>
+ def _processSrai(self,elem, sessionID):
+ """Process a <srai> AIML element.
+
+ <srai> elements recursively process their contents, and then
+ pass the results right back into the AIML interpreter as a new
+ piece of input. The results of this new input string are
+ returned.
+
+ """
+ newInput = ""
+ for e in elem[2:]:
+ newInput += self._processElement(e, sessionID)
+ return self._respond(newInput, sessionID)
+
+ # <star>
+ def _processStar(self, elem, sessionID):
+ """Process a <star> AIML element.
+
+ Optional attribute elements:
+ index: Which "*" character in the current pattern should
+ be matched?
+
+ <star> elements return the text fragment matched by the "*"
+ character in the current input pattern. For example, if the
+ input "Hello Tom Smith, how are you?" matched the pattern
+ "HELLO * HOW ARE YOU", then a <star> element in the template
+ would evaluate to "Tom Smith".
+
+ """
+ try: index = int(elem[1]['index'])
+ except KeyError: index = 1
+ # fetch the user's last input
+ inputStack = self.getPredicate(self._inputStack, sessionID)
+ input = self._subbers['normal'].sub(inputStack[-1])
+ # fetch the Kernel's last response (for 'that' context)
+ outputHistory = self.getPredicate(self._outputHistory, sessionID)
+ try: that = self._subbers['normal'].sub(outputHistory[-1])
+ except: that = "" # there might not be any output yet
+ topic = self.getPredicate("topic", sessionID)
+ response = self._brain.star("star", input, that, topic, index)
+ return response
+
+ # <system>
+ def _processSystem(self,elem, sessionID):
+ """Process a <system> AIML element.
+
+ <system> elements process their contents recursively, and then
+ attempt to execute the results as a shell command on the
+ server. The AIML interpreter blocks until the command is
+ complete, and then returns the command's output.
+
+ For cross-platform compatibility, any file paths inside
+ <system> tags should use Unix-style forward slashes ("/") as a
+ directory separator.
+
+ """
+ # build up the command string
+ command = ""
+ for e in elem[2:]:
+ command += self._processElement(e, sessionID)
+
+ # normalize the path to the command. Under Windows, this
+ # switches forward-slashes to back-slashes; all system
+ # elements should use unix-style paths for cross-platform
+ # compatibility.
+ #executable,args = command.split(" ", 1)
+ #executable = os.path.normpath(executable)
+ #command = executable + " " + args
+ command = os.path.normpath(command)
+
+ # execute the command.
+ response = ""
+ try:
+ out = os.popen(command)
+ except RuntimeError, msg:
+ if self._verboseMode:
+ err = "WARNING: RuntimeError while processing \"system\" element:\n%s\n" % msg.encode(self._textEncoding, 'replace')
+ sys.stderr.write(err)
+ return "There was an error while computing my response. Please inform my botmaster."
+ for line in out:
+ response += line + "\n"
+ response = string.join(response.splitlines()).strip()
+ return response
+
+ # <template>
+ def _processTemplate(self,elem, sessionID):
+ """Process a <template> AIML element.
+
+ <template> elements recursively process their contents, and
+ return the results. <template> is the root node of any AIML
+ response tree.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ return response
+
+ # text
+ def _processText(self,elem, sessionID):
+ """Process a raw text element.
+
+ Raw text elements aren't really AIML tags. Text elements cannot contain
+ other elements; instead, the third item of the 'elem' list is a text
+ string, which is immediately returned. They have a single attribute,
+ automatically inserted by the parser, which indicates whether whitespace
+ in the text should be preserved or not.
+
+ """
+ try: elem[2] + ""
+ except TypeError: raise TypeError, "Text element contents are not text"
+
+ # If the the whitespace behavior for this element is "default",
+ # we reduce all stretches of >1 whitespace characters to a single
+ # space. To improve performance, we do this only once for each
+ # text element encountered, and save the results for the future.
+ if elem[1]["xml:space"] == "default":
+ elem[2] = re.sub("\s+", " ", elem[2])
+ elem[1]["xml:space"] = "preserve"
+ return elem[2]
+
+ # <that>
+ def _processThat(self,elem, sessionID):
+ """Process a <that> AIML element.
+
+ Optional element attributes:
+ index: Specifies which element from the output history to
+ return. 1 is the most recent response, 2 is the next most
+ recent, and so on.
+
+ <that> elements (when they appear inside <template> elements)
+ are the output equivilant of <input> elements; they return one
+ of the Kernel's previous responses.
+
+ """
+ outputHistory = self.getPredicate(self._outputHistory, sessionID)
+ index = 1
+ try:
+ # According to the AIML spec, the optional index attribute
+ # can either have the form "x" or "x,y". x refers to how
+ # far back in the output history to go. y refers to which
+ # sentence of the specified response to return.
+ index = int(elem[1]['index'].split(',')[0])
+ except:
+ pass
+ try: return outputHistory[-index]
+ except IndexError:
+ if self._verboseMode:
+ err = "No such index %d while processing <that> element.\n" % index
+ sys.stderr.write(err)
+ return ""
+
+ # <thatstar>
+ def _processThatstar(self, elem, sessionID):
+ """Process a <thatstar> AIML element.
+
+ Optional element attributes:
+ index: Specifies which "*" in the <that> pattern to match.
+
+ <thatstar> elements are similar to <star> elements, except
+ that where <star/> returns the portion of the input string
+ matched by a "*" character in the pattern, <thatstar/> returns
+ the portion of the previous input string that was matched by a
+ "*" in the current category's <that> pattern.
+
+ """
+ try: index = int(elem[1]['index'])
+ except KeyError: index = 1
+ # fetch the user's last input
+ inputStack = self.getPredicate(self._inputStack, sessionID)
+ input = self._subbers['normal'].sub(inputStack[-1])
+ # fetch the Kernel's last response (for 'that' context)
+ outputHistory = self.getPredicate(self._outputHistory, sessionID)
+ try: that = self._subbers['normal'].sub(outputHistory[-1])
+ except: that = "" # there might not be any output yet
+ topic = self.getPredicate("topic", sessionID)
+ response = self._brain.star("thatstar", input, that, topic, index)
+ return response
+
+ # <think>
+ def _processThink(self,elem, sessionID):
+ """Process a <think> AIML element.
+
+ <think> elements process their contents recursively, and then
+ discard the results and return the empty string. They're
+ useful for setting predicates and learning AIML files without
+ generating any output.
+
+ """
+ for e in elem[2:]:
+ self._processElement(e, sessionID)
+ return ""
+
+ # <topicstar>
+ def _processTopicstar(self, elem, sessionID):
+ """Process a <topicstar> AIML element.
+
+ Optional element attributes:
+ index: Specifies which "*" in the <topic> pattern to match.
+
+ <topicstar> elements are similar to <star> elements, except
+ that where <star/> returns the portion of the input string
+ matched by a "*" character in the pattern, <topicstar/>
+ returns the portion of current topic string that was matched
+ by a "*" in the current category's <topic> pattern.
+
+ """
+ try: index = int(elem[1]['index'])
+ except KeyError: index = 1
+ # fetch the user's last input
+ inputStack = self.getPredicate(self._inputStack, sessionID)
+ input = self._subbers['normal'].sub(inputStack[-1])
+ # fetch the Kernel's last response (for 'that' context)
+ outputHistory = self.getPredicate(self._outputHistory, sessionID)
+ try: that = self._subbers['normal'].sub(outputHistory[-1])
+ except: that = "" # there might not be any output yet
+ topic = self.getPredicate("topic", sessionID)
+ response = self._brain.star("topicstar", input, that, topic, index)
+ return response
+
+ # <uppercase>
+ def _processUppercase(self,elem, sessionID):
+ """Process an <uppercase> AIML element.
+
+ <uppercase> elements process their contents recursively, and
+ return the results with all lower-case characters converted to
+ upper-case.
+
+ """
+ response = ""
+ for e in elem[2:]:
+ response += self._processElement(e, sessionID)
+ return string.upper(response)
+
+ # <version>
+ def _processVersion(self,elem, sessionID):
+ """Process a <version> AIML element.
+
+ <version> elements return the version number of the AIML
+ interpreter.
+
+ """
+ return self.version()
+
+
+##################################################
+### Self-test functions follow ###
+##################################################
+def _testTag(kern, tag, input, outputList):
+ """Tests 'tag' by feeding the Kernel 'input'. If the result
+ matches any of the strings in 'outputList', the test passes.
+
+ """
+ global _numTests, _numPassed
+ _numTests += 1
+ print "Testing <" + tag + ">:",
+ response = kern.respond(input).decode(kern._textEncoding)
+ if response in outputList:
+ print "PASSED"
+ _numPassed += 1
+ return True
+ else:
+ print "FAILED (response: '%s')" % response.encode(kern._textEncoding, 'replace')
+ return False
+
+if __name__ == "__main__":
+ # Run some self-tests
+ k = Kernel()
+ k.bootstrap(learnFiles="self-test.aiml")
+
+ global _numTests, _numPassed
+ _numTests = 0
+ _numPassed = 0
+
+ _testTag(k, 'bot', 'test bot', ["My name is Nameless"])
+
+ k.setPredicate('gender', 'male')
+ _testTag(k, 'condition test #1', 'test condition name value', ['You are handsome'])
+ k.setPredicate('gender', 'female')
+ _testTag(k, 'condition test #2', 'test condition name value', [''])
+ _testTag(k, 'condition test #3', 'test condition name', ['You are beautiful'])
+ k.setPredicate('gender', 'robot')
+ _testTag(k, 'condition test #4', 'test condition name', ['You are genderless'])
+ _testTag(k, 'condition test #5', 'test condition', ['You are genderless'])
+ k.setPredicate('gender', 'male')
+ _testTag(k, 'condition test #6', 'test condition', ['You are handsome'])
+
+ # the date test will occasionally fail if the original and "test"
+ # times cross a second boundary. There's no good way to avoid
+ # this problem and still do a meaningful test, so we simply
+ # provide a friendly message to be printed if the test fails.
+ date_warning = """
+ NOTE: the <date> test will occasionally report failure even if it
+ succeeds. So long as the response looks like a date/time string,
+ there's nothing to worry about.
+ """
+ if not _testTag(k, 'date', 'test date', ["The date is %s" % time.asctime()]):
+ print date_warning
+
+ _testTag(k, 'formal', 'test formal', ["Formal Test Passed"])
+ _testTag(k, 'gender', 'test gender', ["He'd told her he heard that her hernia is history"])
+ _testTag(k, 'get/set', 'test get and set', ["I like cheese. My favorite food is cheese"])
+ _testTag(k, 'gossip', 'test gossip', ["Gossip is not yet implemented"])
+ _testTag(k, 'id', 'test id', ["Your id is _global"])
+ _testTag(k, 'input', 'test input', ['You just said: test input'])
+ _testTag(k, 'javascript', 'test javascript', ["Javascript is not yet implemented"])
+ _testTag(k, 'lowercase', 'test lowercase', ["The Last Word Should Be lowercase"])
+ _testTag(k, 'person', 'test person', ['HE think i knows that my actions threaten him and his.'])
+ _testTag(k, 'person2', 'test person2', ['YOU think me know that my actions threaten you and yours.'])
+ _testTag(k, 'person2 (no contents)', 'test person2 I Love Lucy', ['YOU Love Lucy'])
+ _testTag(k, 'random', 'test random', ["response #1", "response #2", "response #3"])
+ _testTag(k, 'random empty', 'test random empty', ["Nothing here!"])
+ _testTag(k, 'sentence', "test sentence", ["My first letter should be capitalized."])
+ _testTag(k, 'size', "test size", ["I've learned %d categories" % k.numCategories()])
+ _testTag(k, 'sr', "test sr test srai", ["srai results: srai test passed"])
+ _testTag(k, 'sr nested', "test nested sr test srai", ["srai results: srai test passed"])
+ _testTag(k, 'srai', "test srai", ["srai test passed"])
+ _testTag(k, 'srai infinite', "test srai infinite", [""])
+ _testTag(k, 'star test #1', 'You should test star begin', ['Begin star matched: You should'])
+ _testTag(k, 'star test #2', 'test star creamy goodness middle', ['Middle star matched: creamy goodness'])
+ _testTag(k, 'star test #3', 'test star end the credits roll', ['End star matched: the credits roll'])
+ _testTag(k, 'star test #4', 'test star having multiple stars in a pattern makes me extremely happy',
+ ['Multiple stars matched: having, stars in a pattern, extremely happy'])
+ _testTag(k, 'system', "test system", ["The system says hello!"])
+ _testTag(k, 'that test #1', "test that", ["I just said: The system says hello!"])
+ _testTag(k, 'that test #2', "test that", ["I have already answered this question"])
+ _testTag(k, 'thatstar test #1', "test thatstar", ["I say beans"])
+ _testTag(k, 'thatstar test #2', "test thatstar", ["I just said \"beans\""])
+ _testTag(k, 'thatstar test #3', "test thatstar multiple", ['I say beans and franks for everybody'])
+ _testTag(k, 'thatstar test #4', "test thatstar multiple", ['Yes, beans and franks for all!'])
+ _testTag(k, 'think', "test think", [""])
+ k.setPredicate("topic", "fruit")
+ _testTag(k, 'topic', "test topic", ["We were discussing apples and oranges"])
+ k.setPredicate("topic", "Soylent Green")
+ _testTag(k, 'topicstar test #1', 'test topicstar', ["Solyent Green is made of people!"])
+ k.setPredicate("topic", "Soylent Ham and Cheese")
+ _testTag(k, 'topicstar test #2', 'test topicstar multiple', ["Both Soylents Ham and Cheese are made of people!"])
+ _testTag(k, 'unicode support', u"郧上好", [u"Hey, you speak Chinese! 郧上好"])
+ _testTag(k, 'uppercase', 'test uppercase', ["The Last Word Should Be UPPERCASE"])
+ _testTag(k, 'version', 'test version', ["PyAIML is version %s" % k.version()])
+ _testTag(k, 'whitespace preservation', 'test whitespace', ["Extra Spaces\n Rule! (but not in here!) But Here They Do!"])
+
+ # Report test results
+ print "--------------------"
+ if _numTests == _numPassed:
+ print "%d of %d tests passed!" % (_numPassed, _numTests)
+ else:
+ print "%d of %d tests passed (see above for detailed errors)" % (_numPassed, _numTests)
+
+ # Run an interactive interpreter
+ #print "\nEntering interactive mode (ctrl-c to exit)"
+ #while True: print k.respond(raw_input("> "))
diff --git a/aiml/PatternMgr.py b/aiml/PatternMgr.py
new file mode 100644
index 0000000..21b73f1
--- /dev/null
+++ b/aiml/PatternMgr.py
@@ -0,0 +1,329 @@
+# This class implements the AIML pattern-matching algorithm described
+# by Dr. Richard Wallace at the following site:
+# http://www.alicebot.org/documentation/matching.html
+
+import marshal
+import pprint
+import re
+import string
+import sys
+
+class PatternMgr:
+ # special dictionary keys
+ _UNDERSCORE = 0
+ _STAR = 1
+ _TEMPLATE = 2
+ _THAT = 3
+ _TOPIC = 4
+ _BOT_NAME = 5
+
+ def __init__(self):
+ self._root = {}
+ self._templateCount = 0
+ self._botName = u"Nameless"
+ punctuation = "\"`~!@#$%^&*()-_=+[{]}\|;:',<.>/?"
+ self._puncStripRE = re.compile("[" + re.escape(punctuation) + "]")
+ self._whitespaceRE = re.compile("\s", re.LOCALE | re.UNICODE)
+
+ def numTemplates(self):
+ """Return the number of templates currently stored."""
+ return self._templateCount
+
+ def setBotName(self, name):
+ """Set the name of the bot, used to match <bot name="name"> tags in
+ patterns. The name must be a single word!
+
+ """
+ # Collapse a multi-word name into a single word
+ self._botName = unicode(string.join(name.split()))
+
+ def dump(self):
+ """Print all learned patterns, for debugging purposes."""
+ pprint.pprint(self._root)
+
+ def save(self, filename):
+ """Dump the current patterns to the file specified by filename. To
+ restore later, use restore().
+
+ """
+ try:
+ outFile = open(filename, "wb")
+ marshal.dump(self._templateCount, outFile)
+ marshal.dump(self._botName, outFile)
+ marshal.dump(self._root, outFile)
+ outFile.close()
+ except Exception, e:
+ print "Error saving PatternMgr to file %s:" % filename
+ raise Exception, e
+
+ def restore(self, filename):
+ """Restore a previously save()d collection of patterns."""
+ try:
+ inFile = open(filename, "rb")
+ self._templateCount = marshal.load(inFile)
+ self._botName = marshal.load(inFile)
+ self._root = marshal.load(inFile)
+ inFile.close()
+ except Exception, e:
+ print "Error restoring PatternMgr from file %s:" % filename
+ raise Exception, e
+
+ def add(self, (pattern,that,topic), template):
+ """Add a [pattern/that/topic] tuple and its corresponding template
+ to the node tree.
+
+ """
+ # TODO: make sure words contains only legal characters
+ # (alphanumerics,*,_)
+
+ # Navigate through the node tree to the template's location, adding
+ # nodes if necessary.
+ node = self._root
+ for word in string.split(pattern):
+ key = word
+ if key == u"_":
+ key = self._UNDERSCORE
+ elif key == u"*":
+ key = self._STAR
+ elif key == u"BOT_NAME":
+ key = self._BOT_NAME
+ if not node.has_key(key):
+ node[key] = {}
+ node = node[key]
+
+ # navigate further down, if a non-empty "that" pattern was included
+ if len(that) > 0:
+ if not node.has_key(self._THAT):
+ node[self._THAT] = {}
+ node = node[self._THAT]
+ for word in string.split(that):
+ key = word
+ if key == u"_":
+ key = self._UNDERSCORE
+ elif key == u"*":
+ key = self._STAR
+ if not node.has_key(key):
+ node[key] = {}
+ node = node[key]
+
+ # navigate yet further down, if a non-empty "topic" string was included
+ if len(topic) > 0:
+ if not node.has_key(self._TOPIC):
+ node[self._TOPIC] = {}
+ node = node[self._TOPIC]
+ for word in string.split(topic):
+ key = word
+ if key == u"_":
+ key = self._UNDERSCORE
+ elif key == u"*":
+ key = self._STAR
+ if not node.has_key(key):
+ node[key] = {}
+ node = node[key]
+
+
+ # add the template.
+ if not node.has_key(self._TEMPLATE):
+ self._templateCount += 1
+ node[self._TEMPLATE] = template
+
+ def match(self, pattern, that, topic):
+ """Return the template which is the closest match to pattern. The
+ 'that' parameter contains the bot's previous response. The 'topic'
+ parameter contains the current topic of conversation.
+
+ Returns None if no template is found.
+
+ """
+ if len(pattern) == 0:
+ return None
+ # Mutilate the input. Remove all punctuation and convert the
+ # text to all caps.
+ input = string.upper(pattern)
+ input = re.sub(self._puncStripRE, "", input)
+ if that.strip() == u"": that = u"ULTRABOGUSDUMMYTHAT" # 'that' must never be empty
+ thatInput = string.upper(that)
+ thatInput = re.sub(self._whitespaceRE, " ", thatInput)
+ thatInput = re.sub(self._puncStripRE, "", thatInput)
+ if topic.strip() == u"": topic = u"ULTRABOGUSDUMMYTOPIC" # 'topic' must never be empty
+ topicInput = string.upper(topic)
+ topicInput = re.sub(self._puncStripRE, "", topicInput)
+
+ # Pass the input off to the recursive call
+ patMatch, template = self._match(input.split(), thatInput.split(), topicInput.split(), self._root)
+ return template
+
+ def star(self, starType, pattern, that, topic, index):
+ """Returns a string, the portion of pattern that was matched by a *.
+
+ The 'starType' parameter specifies which type of star to find.
+ Legal values are:
+ - 'star': matches a star in the main pattern.
+ - 'thatstar': matches a star in the that pattern.
+ - 'topicstar': matches a star in the topic pattern.
+
+ """
+ # Mutilate the input. Remove all punctuation and convert the
+ # text to all caps.
+ input = string.upper(pattern)
+ input = re.sub(self._puncStripRE, "", input)
+ if that.strip() == u"": that = u"ULTRABOGUSDUMMYTHAT" # 'that' must never be empty
+ thatInput = string.upper(that)
+ thatInput = re.sub(self._whitespaceRE, " ", thatInput)
+ thatInput = re.sub(self._puncStripRE, "", thatInput)
+ if topic.strip() == u"": topic = u"ULTRABOGUSDUMMYTOPIC" # 'topic' must never be empty
+ topicInput = string.upper(topic)
+ topicInput = re.sub(self._puncStripRE, "", topicInput)
+
+ # Pass the input off to the recursive pattern-matcher
+ patMatch, template = self._match(input.split(), thatInput.split(), topicInput.split(), self._root)
+ if template == None:
+ return ""
+
+ # Extract the appropriate portion of the pattern, based on the
+ # starType argument.
+ words = None
+ if starType == 'star':
+ patMatch = patMatch[:patMatch.index(self._THAT)]
+ words = input.split()
+ elif starType == 'thatstar':
+ patMatch = patMatch[patMatch.index(self._THAT)+1 : patMatch.index(self._TOPIC)]
+ words = thatInput.split()
+ elif starType == 'topicstar':
+ patMatch = patMatch[patMatch.index(self._TOPIC)+1 :]
+ words = topicInput.split()
+ else:
+ # unknown value
+ raise ValueError, "starType must be in ['star', 'thatstar', 'topicstar']"
+
+ # compare the input string to the matched pattern, word by word.
+ # At the end of this loop, if foundTheRightStar is true, start and
+ # end will contain the start and end indices (in "words") of
+ # the substring that the desired star matched.
+ foundTheRightStar = False
+ start = end = j = numStars = k = 0
+ for i in range(len(words)):
+ # This condition is true after processing a star
+ # that ISN'T the one we're looking for.
+ if i < k:
+ continue
+ # If we're reached the end of the pattern, we're done.
+ if j == len(patMatch):
+ break
+ if not foundTheRightStar:
+ if patMatch[j] in [self._STAR, self._UNDERSCORE]: #we got a star
+ numStars += 1
+ if numStars == index:
+ # This is the star we care about.
+ foundTheRightStar = True
+ start = i
+ # Iterate through the rest of the string.
+ for k in range (i, len(words)):
+ # If the star is at the end of the pattern,
+ # we know exactly where it ends.
+ if j+1 == len (patMatch):
+ end = len (words)
+ break
+ # If the words have started matching the
+ # pattern again, the star has ended.
+ if patMatch[j+1] == words[k]:
+ end = k - 1
+ i = k
+ break
+ # If we just finished processing the star we cared
+ # about, we exit the loop early.
+ if foundTheRightStar:
+ break
+ # Move to the next element of the pattern.
+ j += 1
+
+ # extract the star words from the original, unmutilated input.
+ if foundTheRightStar:
+ #print string.join(pattern.split()[start:end+1])
+ if starType == 'star': return string.join(pattern.split()[start:end+1])
+ elif starType == 'thatstar': return string.join(that.split()[start:end+1])
+ elif starType == 'topicstar': return string.join(topic.split()[start:end+1])
+ else: return ""
+
+ def _match(self, words, thatWords, topicWords, root):
+ """Return a tuple (pat, tem) where pat is a list of nodes, starting
+ at the root and leading to the matching pattern, and tem is the
+ matched template.
+
+ """
+ # base-case: if the word list is empty, return the current node's
+ # template.
+ if len(words) == 0:
+ # we're out of words.
+ pattern = []
+ template = None
+ if len(thatWords) > 0:
+ # If thatWords isn't empty, recursively
+ # pattern-match on the _THAT node with thatWords as words.
+ try:
+ pattern, template = self._match(thatWords, [], topicWords, root[self._THAT])
+ if pattern != None:
+ pattern = [self._THAT] + pattern
+ except KeyError:
+ pattern = []
+ template = None
+ elif len(topicWords) > 0:
+ # If thatWords is empty and topicWords isn't, recursively pattern
+ # on the _TOPIC node with topicWords as words.
+ try:
+ pattern, template = self._match(topicWords, [], [], root[self._TOPIC])
+ if pattern != None:
+ pattern = [self._TOPIC] + pattern
+ except KeyError:
+ pattern = []
+ template = None
+ if template == None:
+ # we're totally out of input. Grab the template at this node.
+ pattern = []
+ try: template = root[self._TEMPLATE]
+ except KeyError: template = None
+ return (pattern, template)
+
+ first = words[0]
+ suffix = words[1:]
+
+ # Check underscore.
+ # Note: this is causing problems in the standard AIML set, and is
+ # currently disabled.
+ if root.has_key(self._UNDERSCORE):
+ # Must include the case where suf is [] in order to handle the case
+ # where a * or _ is at the end of the pattern.
+ for j in range(len(suffix)+1):
+ suf = suffix[j:]
+ pattern, template = self._match(suf, thatWords, topicWords, root[self._UNDERSCORE])
+ if template is not None:
+ newPattern = [self._UNDERSCORE] + pattern
+ return (newPattern, template)
+
+ # Check first
+ if root.has_key(first):
+ pattern, template = self._match(suffix, thatWords, topicWords, root[first])
+ if template is not None:
+ newPattern = [first] + pattern
+ return (newPattern, template)
+
+ # check bot name
+ if root.has_key(self._BOT_NAME) and first == self._botName:
+ pattern, template = self._match(suffix, thatWords, topicWords, root[self._BOT_NAME])
+ if template is not None:
+ newPattern = [first] + pattern
+ return (newPattern, template)
+
+ # check star
+ if root.has_key(self._STAR):
+ # Must include the case where suf is [] in order to handle the case
+ # where a * or _ is at the end of the pattern.
+ for j in range(len(suffix)+1):
+ suf = suffix[j:]
+ pattern, template = self._match(suf, thatWords, topicWords, root[self._STAR])
+ if template is not None:
+ newPattern = [self._STAR] + pattern
+ return (newPattern, template)
+
+ # No matches were found.
+ return (None, None) \ No newline at end of file
diff --git a/aiml/Utils.py b/aiml/Utils.py
new file mode 100644
index 0000000..75e5a1c
--- /dev/null
+++ b/aiml/Utils.py
@@ -0,0 +1,32 @@
+"""This file contains assorted general utility functions used by other
+modules in the PyAIML package.
+
+"""
+
+def sentences(s):
+ """Split the string s into a list of sentences."""
+ try: s+""
+ except: raise TypeError, "s must be a string"
+ pos = 0
+ sentenceList = []
+ l = len(s)
+ while pos < l:
+ try: p = s.index('.', pos)
+ except: p = l+1
+ try: q = s.index('?', pos)
+ except: q = l+1
+ try: e = s.index('!', pos)
+ except: e = l+1
+ end = min(p,q,e)
+ sentenceList.append( s[pos:end].strip() )
+ pos = end+1
+ # If no sentences were found, return a one-item list containing
+ # the entire input string.
+ if len(sentenceList) == 0: sentenceList.append(s)
+ return sentenceList
+
+# Self test
+if __name__ == "__main__":
+ # sentences
+ sents = sentences("First. Second, still? Third and Final! Well, not really")
+ assert(len(sents) == 4) \ No newline at end of file
diff --git a/aiml/WordSub.py b/aiml/WordSub.py
new file mode 100644
index 0000000..9cae856
--- /dev/null
+++ b/aiml/WordSub.py
@@ -0,0 +1,95 @@
+"""This module implements the WordSub class, modelled after a recipe
+in "Python Cookbook" (Recipe 3.14, "Replacing Multiple Patterns in a
+Single Pass" by Xavier Defrang).
+
+Usage:
+Use this class like a dictionary to add before/after pairs:
+ > subber = TextSub()
+ > subber["before"] = "after"
+ > subber["begin"] = "end"
+Use the sub() method to perform the substitution:
+ > print subber.sub("before we begin")
+ after we end
+All matching is intelligently case-insensitive:
+ > print subber.sub("Before we BEGIN")
+ After we END
+The 'before' words must be complete words -- no prefixes.
+The following example illustrates this point:
+ > subber["he"] = "she"
+ > print subber.sub("he says he'd like to help her")
+ she says she'd like to help her
+Note that "he" and "he'd" were replaced, but "help" and "her" were
+not.
+"""
+
+# 'dict' objects weren't available to subclass from until version 2.2.
+# Get around this by importing UserDict.UserDict if the built-in dict
+# object isn't available.
+try: dict
+except: from UserDict import UserDict as dict
+
+import ConfigParser
+import re
+import string
+
+class WordSub(dict):
+ """All-in-one multiple-string-substitution class."""
+
+ def _wordToRegex(self, word):
+ """Convert a word to a regex object which matches the word."""
+ return r"\b%s\b" % re.escape(word)
+
+ def _update_regex(self):
+ """Build re object based on the keys of the current
+ dictionary.
+
+ """
+ self._regex = re.compile("|".join(map(self._wordToRegex, self.keys())))
+ self._regexIsDirty = False
+
+ def __init__(self, defaults = {}):
+ """Initialize the object, and populate it with the entries in
+ the defaults dictionary.
+
+ """
+ self._regex = None
+ self._regexIsDirty = True
+ for k,v in defaults.items():
+ self[k] = v
+
+ def __call__(self, match):
+ """Handler invoked for each regex match."""
+ return self[match.group(0)]
+
+ def __setitem__(self, i, y):
+ self._regexIsDirty = True
+ # for each entry the user adds, we actually add three entrys:
+ super(type(self),self).__setitem__(string.lower(i),string.lower(y)) # key = value
+ super(type(self),self).__setitem__(string.capwords(i), string.capwords(y)) # Key = Value
+ super(type(self),self).__setitem__(string.upper(i), string.upper(y)) # KEY = VALUE
+
+ def sub(self, text):
+ """Translate text, returns the modified text."""
+ if self._regexIsDirty:
+ self._update_regex()
+ return self._regex.sub(self, text)
+
+# self-test
+if __name__ == "__main__":
+ subber = WordSub()
+ subber["apple"] = "banana"
+ subber["orange"] = "pear"
+ subber["banana" ] = "apple"
+ subber["he"] = "she"
+ subber["I'd"] = "I would"
+
+ # test case insensitivity
+ inStr = "I'd like one apple, one Orange and one BANANA."
+ outStr = "I Would like one banana, one Pear and one APPLE."
+ if subber.sub(inStr) == outStr: print "Test #1 PASSED"
+ else: print "Test #1 FAILED: '%s'" % subber.sub(inStr)
+
+ inStr = "He said he'd like to go with me"
+ outStr = "She said she'd like to go with me"
+ if subber.sub(inStr) == outStr: print "Test #2 PASSED"
+ else: print "Test #2 FAILED: '%s'" % subber.sub(inStr) \ No newline at end of file
diff --git a/aiml/__init__.py b/aiml/__init__.py
new file mode 100644
index 0000000..75e188d
--- /dev/null
+++ b/aiml/__init__.py
@@ -0,0 +1,4 @@
+__all__ = []
+
+# The Kernel class is the only class most implementations should need.
+from Kernel import Kernel
diff --git a/aiml/self-test.aiml b/aiml/self-test.aiml
new file mode 100644
index 0000000..8ad0ee1
--- /dev/null
+++ b/aiml/self-test.aiml
@@ -0,0 +1,270 @@
+<?xml version="1.0" encoding="ISO-8859-1"?>
+<!--
+This file contains some simple AIML categories designed to test the Python
+AIML interpreter kernel. Do not make any modifications to this file without
+updating the tests in Kernel.py!
+-->
+
+<aiml version="1.0.1" xmlns:aiml="http://alicebot.org/2001/AIML-1.0.1">
+<!-- bot -->
+<category>
+<pattern>TEST BOT</pattern>
+<template>My name is <bot name="name"/></template>
+</category>
+
+<!-- condition -->
+<category>
+<pattern>TEST CONDITION NAME VALUE</pattern>
+<template>
+<condition name="gender" value="male">You are handsome</condition>
+</template>
+</category>
+<category>
+<pattern>TEST CONDITION NAME</pattern>
+<template>
+ <condition name="gender">
+ <li value="male">You are handsome</li>
+ <li value="female">You are beautiful</li>
+ <li>You are genderless</li>
+ </condition>
+</template>
+</category>
+<category>
+<pattern>TEST CONDITION</pattern>
+<template>
+ <condition>
+ <li name="gender" value="male">You are handsome</li>
+ <li name="gender" value="female">You are beautiful</li>
+ <li>You are genderless</li>
+ </condition>
+</template>
+</category>
+
+<!-- date -->
+<category>
+<pattern>TEST DATE</pattern>
+<template>The date is <date/></template>
+</category>
+ <!-- formal --> <category> <pattern>TEST FORMAL</pattern> <template><formal>formal test passed</formal></template> </category>
+<!-- gender -->
+<category>
+<pattern>TEST GENDER</pattern>
+<template><gender>She'd told him she heard that his hernia is history</gender></template>
+</category>
+
+<!-- get/set -->
+<category>
+<pattern>TEST GET AND SET</pattern>
+<template>I like <set name="food">cheese</set>. My favorite food is <get name="food"/></template>
+</category>
+
+<!-- gossip -->
+<category>
+<pattern>TEST GOSSIP</pattern>
+<template>Gossip is not yet implemented<gossip>stuff</gossip></template>
+</category>
+
+<!-- id -->
+<category>
+<pattern>TEST ID</pattern>
+<template>Your id is <id/></template>
+</category>
+
+<!-- input -->
+<category>
+<pattern>TEST INPUT</pattern>
+<template>You just said: <input index="1"/></template>
+</category>
+
+<!-- javascript -->
+<category>
+<pattern>TEST JAVASCRIPT</pattern>
+<template>Javascript is not yet implemented<javascript>var stuff</javascript></template>
+</category>
+
+<!-- lowercase --> <category> <pattern>TEST LOWERCASE</pattern> <template>The Last Word Should Be <lowercase>Lowercase</lowercase></template> </category>
+
+<!-- person -->
+<category>
+<pattern>TEST PERSON</pattern>
+<template><person>I think he knows that his actions threaten me and mine.</person></template>
+</category>
+
+<!-- person2 -->
+<category>
+<pattern>TEST PERSON2</pattern>
+<template><person2>I think you know that your actions threaten me and mine.</person2></template>
+</category>
+<category>
+<pattern>TEST PERSON2 *</pattern>
+<template><person2/></template>
+</category>
+ <!-- random -->
+<category>
+<pattern>TEST RANDOM</pattern>
+<template>
+<random>
+ <li>response #1</li>
+ <li>response #2</li>
+ <li>response #3</li>
+</random>
+</template>
+</category>
+<category>
+<pattern>TEST RANDOM EMPTY</pattern>
+<template>
+Nothing <random></random>here!
+</template>
+</category>
+
+<!-- sentence -->
+<category>
+<pattern>TEST SENTENCE</pattern>
+<template>
+<sentence>my first letter should be capitalized.</sentence>
+</template>
+</category>
+
+<!-- size -->
+<category>
+<pattern>TEST SIZE</pattern>
+<template>I've learned <size/> categories</template>
+</category>
+
+<!-- sr -->
+<category>
+<pattern>TEST SR *</pattern>
+<template>srai results: <sr/></template>
+</category>
+<category>
+<pattern>TEST NESTED SR *</pattern>
+<template><srai>test sr <star/></srai></template>
+</category>
+
+<!-- srai -->
+<category>
+<pattern>SRAI TARGET</pattern>
+<template>srai test passed</template>
+</category> <category>
+<pattern>TEST SRAI</pattern>
+<template><srai>srai target</srai></template>
+</category>
+<category>
+<pattern>TEST SRAI INFINITE</pattern>
+<template><srai>test srai infinite</srai></template>
+</category>
+
+<!-- star -->
+<category>
+<pattern>* TEST STAR BEGIN</pattern>
+<template>Begin star matched: <star/></template>
+</category>
+<category>
+<pattern>TEST STAR * MIDDLE</pattern>
+<template>Middle star matched: <star/></template>
+</category>
+<category>
+<pattern>TEST STAR END *</pattern>
+<template>End star matched: <star/></template>
+</category>
+<category>
+<pattern>TEST STAR * MULTIPLE * MAKES ME *</pattern>
+<template>Multiple stars matched: <star index="1"/>, <star index="2"/>, <star index="3"/></template>
+</category>
+
+
+
+<!-- system -->
+<category>
+<pattern>TEST SYSTEM</pattern>
+<template>The system says <system>echo hello</system>!</template>
+</category>
+
+<!-- that -->
+<category>
+<pattern>TEST THAT</pattern>
+<template>I just said: <that index="1,1"/></template>
+</category>
+<category>
+<pattern>TEST THAT</pattern>
+<that>I JUST SAID *</that>
+<template>I have already answered this question</template>
+</category>
+
+<!-- thatstar -->
+<category>
+<pattern>TEST THATSTAR</pattern>
+<template>I say beans</template>
+</category>
+<category>
+<pattern>TEST THATSTAR</pattern>
+<that>I SAY *</that>
+<template>I just said "<thatstar/>"</template>
+</category>
+<category>
+<pattern>TEST THATSTAR MULTIPLE</pattern>
+<template>I say beans and franks for everybody</template>
+</category>
+<category>
+<pattern>TEST THATSTAR MULTIPLE</pattern>
+<that>I SAY * AND * FOR EVERYBODY</that>
+<template>Yes, <thatstar index="1"/> and <thatstar index="2"/> for all!</template>
+</category>
+
+
+<!-- think -->
+<category>
+<pattern>TEST THINK</pattern>
+<template><think>You should not see this response.</think></template>
+</category>
+
+<!-- topic -->
+<category>
+<pattern>TEST TOPIC</pattern>
+<template>What are we talking about?</template>
+</category>
+<topic name="FRUIT">
+<category>
+<pattern>TEST TOPIC</pattern>
+<template>We were discussing apples and oranges</template>
+</category>
+</topic>
+
+<!-- topicstar -->
+<category>
+<pattern>TEST TOPICSTAR</pattern>
+<template>I have no topic</template>
+</category>
+<topic name="SOYLENT *">
+<category>
+<pattern>TEST TOPICSTAR</pattern>
+<template>Solyent <topicstar/> is made of people!</template>
+</category>
+</topic>
+<topic name="SOYLENT * AND *">
+<category>
+<pattern>TEST TOPICSTAR MULTIPLE</pattern>
+<template>Both Soylents <topicstar index="1"/> and <topicstar index="2"/> are made of people!</template>
+</category>
+</topic>
+
+<!-- uppercase --> <category> <pattern>TEST UPPERCASE</pattern> <template>The Last Word Should Be <uppercase>Uppercase</uppercase></template> </category>
+
+<!-- version --> <category> <pattern>TEST VERSION</pattern> <template>PyAIML is version <version/></template> </category>
+
+<!-- unicode support -->
+<category>
+<pattern>郧上好</pattern>
+<template>Hey, you speak Chinese! 郧上好</template>
+</category>
+
+<!-- whitespace preservation -->
+<category>
+<pattern>TEST WHITESPACE</pattern>
+<template xml:space="preserve">Extra Spaces
+ Rule! <lowercase xml:space="default">(but not <set name="ignored">in
+ here!</set>)
+ </lowercase> But <set name="ignored">Here They</set> Do!</template>
+</category>
+
+</aiml> \ No newline at end of file