xoxo-sample-code: Difference between revisions
Jump to navigation
Jump to search
No edit summary |
|||
Line 1: | Line 1: | ||
= XOXO Sample Code = | = XOXO Sample Code = | ||
A whole bunch of open source ([http://creativecommons.org/licenses/by/2.0/ CC-by-2.0], [http://www.apache.org/licenses/LICENSE-2.0 Apache 2.0]) sample code to read and write [ | A whole bunch of open source ([http://creativecommons.org/licenses/by/2.0/ CC-by-2.0], [http://www.apache.org/licenses/LICENSE-2.0 Apache 2.0]) sample code to read and write [xoxo files in Python (with Perl, PHP, ... to follow). | ||
== xoxo.py == | == xoxo.py == | ||
<pre><nowiki> | <pre><nowiki> |
Revision as of 07:26, 20 June 2005
XOXO Sample Code
A whole bunch of open source (CC-by-2.0, Apache 2.0) sample code to read and write [xoxo files in Python (with Perl, PHP, ... to follow).
xoxo.py
# -*- coding: utf-8 -*- """xoxo.py - a utility module for transforming to and from the XHTMLOutlines format XOXO toXOXO takes a Python datastructure (tuples, lists or dictionaries, arbitrarily nested) and returns a XOXO representation of it. fromXOXO parses an XHTML file for a xoxo list and returns the structure """ __version__ = "0.8" __date__ = "2004-10-05" __author__ = "Kevin Marks <kmarks@technorati.com>" __copyright__ = "Copyright 2004, Kevin marks & Technorati" __license__ = "http://creativecommons.org/licenses/by/2.0/ CC-by-2.0], [http://www.apache.org/licenses/LICENSE-2.0 Apache 2.0" __credits__ = """Tantek Çelik and Mark Pilgrim for data structure""" __history__ = """ TODO: add <title> tag TODO: add a proper profile link 0.8 work in unicode then render to utf-8 0.7 initial encoding support - just utf-8 for now 0.6 support the special behaviour for url properties to/from <a> 0.5 fix some awkward side effects of whitespace and text outside our expected tags; simplify writing code 0.4 add correct XHTML headers so it validates 0.3 read/write version; fixed invlaid nested list generation; 0.1 first write-only version """ try: True, False except NameError: True, False = not not 1, not 1 containerTags={'ol':False,'ul':False,'dl':False} import sgmllib, urllib, urlparse, re def makeXOXO(struct,className=None,depth=0): s=u'' if isinstance(struct,list) or isinstance(struct,tuple): if className: s += u'<ol class="%s">' % className else: s+= u"<ol>" if isinstance(struct,dict): d=struct.copy() if d.has_key('url'): s+=u'<a href="%s" ' % d['url'] text = d.get('text',d.get('title',d['url'])) for attr in ('title','rel','type'): if d.has_key(attr): xVal = makeXOXO(d[attr],None,depth+1) s +=u'%s="%s" ' % (attr,xVal) del d[attr] s +=u'>%s</a>' % makeXOXO(text,None,depth+1) if d.has_key('text'): del d['text'] del d['url'] if len(d): s +=u"<dl>" for key,value in d.items(): xVal = makeXOXO(value,None,depth+1) s+= u'<dt>%s</dt><dd>%s</dd>' % (key, xVal) s +=u"</dl>" elif type(struct) ==type((1,))or type(struct) ==type([1,]): for item in struct: s+=u"<li>" + makeXOXO(item,None,depth+1)+"</li>" s +=u"</ol>" elif type(struct) == type(u'unicode'): s+=struct else: if not type(struct)==type(' '): struct=str(struct) try: s+=unicode(struct,'utf-8') except: s+=unicode(struct,'windows_1252') return s class xoxoParser(sgmllib.SGMLParser): def __init__(self): sgmllib.SGMLParser.__init__(self) self.structs=[] self.xostack=[] self.textstack=[''] def normalize_attrs(self, attrs): attrs = [(k.lower(), sgmllib.charref.sub(lambda m: chr(int(m.groups()[0])), v).strip()) for k, v in attrs] attrs = [(k, k in ('rel','type') and v.lower() or v) for k, v in attrs] return attrs def pushStruct(self,struct): if type(struct) == type({}) and len(struct)==0 and len(self.structs) and type(self.structs[-1]) == type({}) and self.structs[-1].has_key('url'): self.xostack.append(self.structs[-1]) # put back the <a>-made one for extra def's else: self.structs.append(struct) self.xostack.append(self.structs[-1]) def start_a(self,attrs): attrsD = dict(self.normalize_attrs(attrs)) attrsD['url']= attrsD.get('href','') del attrsD['href'] self.pushStruct(attrsD) self.textstack.append('') def end_a(self): val = self.textstack.pop() if val: if self.xostack[-1].get('title','') == val: val='' if self.xostack[-1]['url'] == val: val='' if val: self.xostack[-1]['text']=val self.xostack.pop() def start_dl(self,attrs): self.pushStruct({}) def end_dl(self): self.xostack.pop() def start_ol(self,attrs): self.pushStruct([]) def end_ol(self): self.xostack.pop() def start_ul(self,attrs): self.pushStruct([]) def end_ul(self): self.xostack.pop() def start_li(self,attrs): self.textstack.append('') def end_li(self): val = self.textstack.pop() if self.structs[-1] != self.xostack[-1]: val = self.structs.pop() self.xostack[-1].append(val) def start_dt(self,attrs): self.textstack.append('') def end_dt(self): pass def start_dd(self,attrs): self.textstack.append('') def end_dd(self): val = self.textstack.pop() key = self.textstack.pop() if self.structs[-1] != self.xostack[-1]: val = self.structs.pop() self.xostack[-1][key]=val def handle_data(self, text): if len(self.stack) and containerTags.get(self.stack[-1],True): #skip text not within an element self.textstack[-1] += text def toXOXO(struct,addHTMLWrapper=False,cssUrl=''): if type(struct) ==type((1,))or type(struct) ==type([1,]): inStruct = struct else: inStruct = [struct] if addHTMLWrapper: s= '''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />''' if cssUrl: s+='<style type="text/css" >@import "%s";</style>' % cssUrl s+="</head><body>%s</body></html>" % makeXOXO(inStruct,'xoxo') return s.encode('utf-8') else: return makeXOXO(inStruct,'xoxo').encode('utf-8') def fromXOXO(html): parser = xoxoParser() parser.feed(unicode(html,'utf-8')) #print parser.structs structs=[struct for struct in parser.structs if struct] #print structs while (len(structs) ==1 and type(structs)==type([1,])): structs=structs[0] return structs
testxoxo.py
# -*- coding: utf-8 -*- """testxoxo.py Unit tests for xoxo.py This file tests the functions in xoxo.py The underlying model here is http://diveintopython.org/unit_testing/index.html run from command line with python testxoxo.py -v """ import xoxo reload(xoxo) import unittest class xoxoTestCases(unittest.TestCase): def testSimpleList(self): '''make a xoxo file from a list''' l = ['1','2','3'] html = xoxo.toXOXO(l) self.assertEqual(html,'<ol class="xoxo"><li>1</li><li>2</li><li>3</li></ol>') def testNestedList(self): '''make a xoxo file from a list with a list in''' l = ['1',['2','3']] html = xoxo.toXOXO(l) self.assertEqual(html,'<ol class="xoxo"><li>1</li><li><ol><li>2</li><li>3</li></ol></li></ol>') def testDictionary(self): '''make a xoxo file from a dictionary''' d = {'test':'1','name':'Kevin'} html = xoxo.toXOXO(d) self.assertEqual(html,'<ol class="xoxo"><li><dl><dt>test</dt><dd>1</dd><dt>name</dt><dd>Kevin</dd></dl></li></ol>') def testSingleItem(self): '''make a xoxo file from a string''' l = "test" html = xoxo.toXOXO(l) self.assertEqual(html,'<ol class="xoxo"><li>test</li></ol>') def testWrapDiffers(self): '''make a xoxo file from a string with and without html wrapper and check they are different''' l = "test" html = xoxo.toXOXO(l) htmlwrap = xoxo.toXOXO(l,addHTMLWrapper=True) self.failIfEqual(html,htmlwrap) def testWrapSingleItem(self): '''make a wrapped xoxo file from a string''' l = "test" html = xoxo.toXOXO(l,addHTMLWrapper=True) self.assertEqual(html,'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"><head profile=""></head><body><ol class="xoxo"><li>test</li></ol></body></html>''') def testDictionaryRoundTrip(self): ''' make a dictionary into a xoxo file and back again; check it is the same''' d = {'test':'1','name':'Kevin'} html = xoxo.toXOXO(d) newd = xoxo.fromXOXO(html) self.assertEqual(d,newd) def testListRoundTrip(self): ''' make a list into a xoxo file and back again; check it is the same''' l = ['3','2','1'] html = xoxo.toXOXO(l) newdl= xoxo.fromXOXO(html) self.assertEqual(l,newdl) def testListofDictsRoundTrip(self): ''' make a list of Dicts into a xoxo file and back again; check it is the same''' l = ['3',{'a':'2'},{'b':'1','c':'4'}] html = xoxo.toXOXO(l) newdl= xoxo.fromXOXO(html) self.assertEqual(l,newdl) def testListofListsRoundTrip(self): ''' make a list of Lists into a xoxo file and back again; check it is the same''' l = ['3',['a','2'],['b',['1',['c','4']]]] html = xoxo.toXOXO(l) newdl= xoxo.fromXOXO(html) self.assertEqual(l,newdl) def testDictofListsRoundTrip(self): ''' make a dict with lists in into a xoxo file and back again; check it is the same''' d = {'test':['1','2'], 'name':'Kevin', 'nestlist':['a',['b','c']], 'nestdict':{'e':'6','f':'7'}} html = xoxo.toXOXO(d) newd = xoxo.fromXOXO(html) self.assertEqual(d,newd) def testXOXOjunkInContainers(self): '''make sure text outside <li> etc is ignored''' d=xoxo.fromXOXO('<ol>bad<li><dl>worse<dt>good</dt><dd>buy</dd> now</dl></li></ol>') self.assertEqual(d,{'good': 'buy'}) def testXOXOjunkInElements(self): '''make sure text within <li> but outside a subcontainer is ignored''' l=xoxo.fromXOXO('<ol><li>bad<dl><dt>good</dt><dd>buy</dd></dl>worse</li><li>bag<ol><li>OK</li></ol>fish</li></ol>') self.assertEqual(l,[{'good': 'buy'},['OK']]) def testXOXOWithSpacesAndNewlines(self): '''unmung some xoxo with spaces in and check result is right''' xoxoSample= '''<ol class='xoxo'> <li> <dl> <dt>text</dt> <dd>item 1</dd> <dt>description</dt> <dd> This item represents the main point we're trying to make.</dd> <dt>url</dt> <dd>http://example.com/more.xoxo</dd> <dt>title</dt> <dd>title of item 1</dd> <dt>type</dt> <dd>text/xml</dd> <dt>rel</dt> <dd>help</dd> </dl> </li> </ol>''' d = xoxo.fromXOXO(xoxoSample) d2={'text':'item 1', 'description':" This item represents the main point we're trying to make.", 'url':'http://example.com/more.xoxo', 'title':'title of item 1', 'type':'text/xml', 'rel':'help' } xoxoAgain = xoxo.toXOXO(d) self.assertEqual(d,d2) #this needs a smarter whitespace-sensitive comparison #self.assertEqual(xoxoSample,xoxoAgain) def testSpecialAttributeDecoding(self): '''unmung some xoxo with <a href=' rel= etc in and check result is right''' xoxoSample= '''<ol class='xoxo'> <li> <dl> <dt>text</dt> <dd>item 1</dd> <dt>url</dt> <dd>http://example.com/more.xoxo</dd> <dt>title</dt> <dd>title of item 1</dd> <dt>type</dt> <dd>text/xml</dd> <dt>rel</dt> <dd>help</dd> </dl> </li> </ol>''' d = xoxo.fromXOXO(xoxoSample) smartxoxoSample= '''<ol class='xoxo'> <li><a href="http://example.com/more.xoxo" title="title of item 1" type="text/xml" rel="help">item 1</a> <!-- note how the "text" property is simply the contents of the <a> element --> </li> </ol>''' d2 = xoxo.fromXOXO(smartxoxoSample) self.assertEqual(d,d2) def testSpecialAttributeAndDLDecoding(self): '''unmung some xoxo with <a href=' rel= etc in plus a <dl> in the same item and check result is right''' xoxoSample= '''<ol class="xoxo"> <li> <dl> <dt>text</dt> <dd>item 1</dd> <dt>description</dt> <dd> This item represents the main point we're trying to make.</dd> <dt>url</dt> <dd>http://example.com/more.xoxo</dd> <dt>title</dt> <dd>title of item 1</dd> <dt>type</dt> <dd>text/xml</dd> <dt>rel</dt> <dd>help</dd> </dl> </li> </ol>''' d = xoxo.fromXOXO(xoxoSample) smartxoxoSample= '''<ol class="xoxo"> <li><a href="http://example.com/more.xoxo" title="title of item 1" type="text/xml" rel="help">item 1</a> <!-- note how the "text" property is simply the contents of the <a> element --> <dl> <dt>description</dt> <dd> This item represents the main point we're trying to make.</dd> </dl> </li> </ol>''' d2 = xoxo.fromXOXO(smartxoxoSample) self.assertEqual(d,d2) def testSpecialAttributeEncode(self): '''check it makes an <a href with a url parameter''' d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help','text':'an example'} html=xoxo.toXOXO(d) expectedHTML= '<ol class="xoxo"><li><a href="http://example.com/more.xoxo" title="sample url" rel="help" type="text/xml" >an example</a></li></ol>' self.assertEqual(html,expectedHTML) def testSpecialAttributeRoundTripFull(self): '''check it makes an <a href with a url parameter''' d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help','text':'an example'} html=xoxo.toXOXO(d) self.assertEqual(d,xoxo.fromXOXO(html)) def testSpecialAttributeRoundTripNoText(self): '''check it makes an <a href with a url parameter and no text attribute''' d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help'} html=xoxo.toXOXO(d) self.assertEqual(d,xoxo.fromXOXO(html)) def testSpecialAttributeRoundTripNoTextOrTitle(self): '''check it makes an <a href with a url parameter and no text or title attribute''' d={'url':'http://example.com/more.xoxo'} html=xoxo.toXOXO(d) self.assertEqual(d,xoxo.fromXOXO(html)) def testUnicodeRoundtrip(self): '''check unicode characters can go to xoxo and back''' src=unicode('Tantek Çelik and a snowman ?','utf-8') html = html=xoxo.toXOXO(src) self.assertEqual(src,xoxo.fromXOXO(html)) if __name__ == "__main__": unittest.main() else: runner = unittest.TextTestRunner() suite = unittest.makeSuite(xoxoTestCases,'test') runner.run(suite)