xoxo-sample-code: Difference between revisions
Jump to navigation
Jump to search
Kevin Marks (talk | contribs) (→testxoxo.py: update testcases for encoding and dictionaries) |
1143297912 (talk | contribs) No edit summary |
||
Line 1,362: | Line 1,362: | ||
</nowiki></pre> | </nowiki></pre> | ||
<div style="display:none"> | |||
[l_sp] | |||
<a href="http://test.com"> test</a> | |||
</div> |
Revision as of 14:41, 25 March 2006
XOXO Sample Code
A whole bunch of open source (CC-by-2.0, Apache 2.0) sample code to read and write xoxo files in Python and Java (with Perl, PHP, ... to follow).
Python
xoxo.py
#!/usr/bin/python # -*- coding: utf-8 -*- """xoxo.py - a utility module for transforming to and from the XHTMLOutlines format XOXO http://microformats.org/wiki/xoxo toXOXO takes a Python datastructure (tuples, lists or dictionaries, arbitrarily nested) and returns a XOXO representation of it. fromXOXO parses an XHTML file for a xoxo list and returns the structure """ __version__ = "0.9" __date__ = "2005-11-02" __author__ = "Kevin Marks <kmarks@technorati.com>" __copyright__ = "Copyright 2004-2006, Kevin Marks & Technorati" __license__ = "http://creativecommons.org/licenses/by/2.0/ CC-by-2.0], [http://www.apache.org/licenses/LICENSE-2.0 Apache 2.0" __credits__ = """Tantek Çelik and Mark Pilgrim for data structure""" __history__ = """ TODO: add <title> tag TODO: add a proper profile link 0.9 smarter parsing for encoding and partial markup; fix dangling dictionary case 0.8 work in unicode then render to utf-8 0.7 initial encoding support - just utf-8 for now 0.6 support the special behaviour for url properties to/from <a> 0.5 fix some awkward side effects of whitespace and text outside our expected tags; simplify writing code 0.4 add correct XHTML headers so it validates 0.3 read/write version; fixed invalid nested list generation; 0.1 first write-only version """ try: True, False except NameError: True, False = not not 1, not 1 containerTags={'ol':False,'ul':False,'dl':False} import sgmllib, urllib, urlparse, re,codecs def toUnicode(key): if type(key) == type(u'unicode'): uKey= key else: try: uKey=unicode(key,'utf-8') except: uKey=unicode(key,'windows_1252') return uKey def makeXOXO(struct,className=None): s=u'' if isinstance(struct,list) or isinstance(struct,tuple): if className: s += u'<ol class="%s">' % className else: s+= u"<ol>" for item in struct: s+=u"<li>" + makeXOXO(item,None)+"</li>" s +=u"</ol>" elif isinstance(struct,dict): d=struct.copy() if d.has_key('url'): uURL=toUnicode(d['url']) s+=u'<a href="%s" ' % uURL text = d.get('text',d.get('title',uURL)) for attr in ('title','rel','type'): if d.has_key(attr): xVal = makeXOXO(d[attr],None) s +=u'%s="%s" ' % (attr,xVal) del d[attr] s +=u'>%s</a>' % makeXOXO(text,None) if d.has_key('text'): del d['text'] del d['url'] if len(d): s +=u"<dl>" for key,value in d.items(): xVal = makeXOXO(value,None) uKey=toUnicode(key) s+= u'<dt>%s</dt><dd>%s</dd>' % (uKey, xVal) s +=u"</dl>" elif type(struct) == type(u'unicode'): s+=struct else: if not type(struct)==type(' '): struct=str(struct) s += toUnicode(struct) return s class AttrParser(sgmllib.SGMLParser): def __init__(self): sgmllib.SGMLParser.__init__(self) self.text=[] self.encoding='utf-8' def cleanText(self,inText): if type(inText) == type(u'unicode'): inText = inText.encode(self.encoding,'replace') self.text=[] self.reset() self.feed(inText) return ''.join(self.text) def setEncoding(self,encoding): if 'ascii' in encoding: encoding='windows_1252' # so we don't throw an exception on high-bit set chars in there by mistake if encoding and not encoding =='text/html': try: canDecode = codecs.getdecoder(encoding) self.encoding = encoding except: try: encoding='japanese.' +encoding canDecode = codecs.getdecoder(encoding) self.encoding = encoding except: print "can't deal with encoding %s" % encoding def handle_entityref(self, ref): # called for each entity reference, e.g. for "©", ref will be "copy" # map through to unicode where we can try: entity =htmlentitydefs.name2codepoint[ref] self.handleUnicodeData(unichr(entity)) except: try: handle_charref(ref) # deal with char-ref's missing the '#' (see Akma) except: self.handle_data("&%s" % ref) def handle_charref(self, ref): # called for each character reference, e.g. for " ", ref will be "160" # Reconstruct the original character reference. try: if ref[0]=='x': self.handleUnicodeData(unichr(int(ref[1:],16))) else: self.handleUnicodeData(unichr(int(ref))) except: self.handle_data("&#%s" % ref) # called for each block of plain text, i.e. outside of any tag and # not containing any character or entity references def handle_data(self, text): if type(text)==type(u' '): self.handleUnicodeData(text) if self.encoding== 'utf-8': try: uText = unicode(text,self.encoding) #utf-8 is pretty clear when it is wrong except: uText = unicode(text,'windows_1252','ignore') # and this is the likely wrongness else: uText = unicode(text,self.encoding,'replace') # if they have really broken encoding, (eg lots of shift-JIS blogs) self.handleUnicodeData(uText) def handleUnicodeData(self, uText): self.text.append(uText) class xoxoParser(AttrParser): def __init__(self): AttrParser.__init__(self) self.structs=[] self.xostack=[] self.textstack=[''] self.attrparse = AttrParser() def normalize_attrs(self, attrs): attrs = [(k.lower(), self.attrparse.cleanText(v)) for k, v in attrs] attrs = [(k, k in ('rel','type') and v.lower() or v) for k, v in attrs] return attrs def setEncoding(self,encoding): AttrParser.setEncoding(self,encoding) self.attrparse.setEncoding(encoding) def pushStruct(self,struct): if type(struct) == type({}) and len(struct)==0 and len(self.structs) and type(self.structs[-1]) == type({}) and self.structs[-1].has_key('url') and self.structs[-1] != self.xostack[-1]: self.xostack.append(self.structs[-1]) # put back the <a>-made one for extra def's else: self.structs.append(struct) self.xostack.append(self.structs[-1]) def do_meta(self, attributes): atts = dict(self.normalize_attrs(attributes)) #print atts.encode('utf-8') if atts.has_key('http-equiv'): if atts['http-equiv'].lower() == "content-type": if atts.has_key('content'): encoding = atts['content'].split('charset=')[-1] self.setEncoding(encoding) def start_a(self,attrs): attrsD = dict(self.normalize_attrs(attrs)) attrsD['url']= attrsD.get('href','') if attrsD.has_key('href'): del attrsD['href'] self.pushStruct(attrsD) self.textstack.append('') def end_a(self): val = self.textstack.pop() if val: if self.xostack[-1].get('title','') == val: val='' if self.xostack[-1]['url'] == val: val='' if val: self.xostack[-1]['text']=val self.xostack.pop() def start_dl(self,attrs): self.pushStruct({}) def end_dl(self): self.xostack.pop() def start_ol(self,attrs): self.pushStruct([]) def end_ol(self): self.xostack.pop() def start_ul(self,attrs): self.pushStruct([]) def end_ul(self): self.xostack.pop() def start_li(self,attrs): self.textstack.append('') def end_li(self): val = self.textstack.pop() while ( self.structs[-1] != self.xostack[-1]): val = self.structs.pop() self.xostack[-1].append(val) if type(val) == type(' ') or type(val) == type(u' '): self.xostack[-1].append(val) def start_dt(self,attrs): self.textstack.append('') def end_dt(self): pass def start_dd(self,attrs): self.textstack.append('') def end_dd(self): val = self.textstack.pop() key = self.textstack.pop() if self.structs[-1] != self.xostack[-1]: val = self.structs.pop() self.xostack[-1][key]=val def handleUnicodeData(self, text): if len(self.stack) and containerTags.get(self.stack[-1],True): #skip text not within an element self.textstack[-1] += text def toXOXO(struct,addHTMLWrapper=False,cssUrl=''): if type(struct) ==type((1,))or type(struct) ==type([1,]): inStruct = struct else: inStruct = [struct] if addHTMLWrapper: s= u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />''' if cssUrl: s+=u'<style type="text/css" >@import "%s";</style>' % cssUrl s+=u"</head><body>%s</body></html>" % makeXOXO(inStruct,'xoxo') return s.encode('utf-8') else: return makeXOXO(inStruct,'xoxo').encode('utf-8') def fromXOXO(html): parser = xoxoParser() #parser.feed(unicode(html,'utf-8')) parser.feed(html) #print parser.structs structs=[struct for struct in parser.structs if struct] #print structs while (len(structs) ==1 and type(structs)==type([1,])): structs=structs[0] return structs # Allow direct invocation # Read HTML from URL, parse into data structures, then re-output import sys if __name__ == "__main__": if len(sys.argv) < 2: raise SystemExit("Usage: "+sys.argv[0]+" url\n"+__doc__) url=sys.argv[1] file = urllib.urlopen(url) html=file.read(-1) file.close s=fromXOXO(html) p=toXOXO(s,True) print p
testxoxo.py
# -*- coding: utf-8 -*- """testxoxo.py Unit tests for xoxo.py This file tests the functions in xoxo.py The underlying model here is http://diveintopython.org/unit_testing/index.html run from command line with python testxoxo.py -v """ import xoxo reload(xoxo) import unittest class xoxoTestCases(unittest.TestCase): def testSimpleList(self): '''make a xoxo file from a list''' l = ['1','2','3'] html = xoxo.toXOXO(l) self.assertEqual(html,'<ol class="xoxo"><li>1</li><li>2</li><li>3</li></ol>') def testNestedList(self): '''make a xoxo file from a list with a list in''' l = ['1',['2','3']] html = xoxo.toXOXO(l) self.assertEqual(html,'<ol class="xoxo"><li>1</li><li><ol><li>2</li><li>3</li></ol></li></ol>') def testDictionary(self): '''make a xoxo file from a dictionary''' d = {'test':'1','name':'Kevin'} html = xoxo.toXOXO(d) self.assertEqual(html,'<ol class="xoxo"><li><dl><dt>test</dt><dd>1</dd><dt>name</dt><dd>Kevin</dd></dl></li></ol>') def testSingleItem(self): '''make a xoxo file from a string''' l = "test" html = xoxo.toXOXO(l) self.assertEqual(html,'<ol class="xoxo"><li>test</li></ol>') def testWrapDiffers(self): '''make a xoxo file from a string with and without html wrapper and check they are different''' l = "test" html = xoxo.toXOXO(l) htmlwrap = xoxo.toXOXO(l,addHTMLWrapper=True) self.failIfEqual(html,htmlwrap) def testWrapSingleItem(self): '''make a wrapped xoxo file from a string''' l = "test" html = xoxo.toXOXO(l,addHTMLWrapper=True) self.assertEqual(html,'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body><ol class="xoxo"><li>test</li></ol></body></html>''') def testWrapItemWithCSS(self): '''make a wrapped xoxo file from a string''' l = "test" html = xoxo.toXOXO(l,addHTMLWrapper=True,cssUrl='reaptest.css') self.assertEqual(html,'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><style type="text/css" >@import "reaptest.css";</style></head><body><ol class="xoxo"><li>test</li></ol></body></html>''') def testDictionaryRoundTrip(self): ''' make a dictionary into a xoxo file and back again; check it is the same''' d = {'test':'1','name':'Kevin'} html = xoxo.toXOXO(d) newd = xoxo.fromXOXO(html) self.assertEqual(d,newd) def testDictionaryWithURLRoundTrip(self): ''' make a dictionary wiht an url in into a xoxo file and back again; check it is the same''' d = {'url':'http://example.com','name':'Kevin'} html = xoxo.toXOXO(d) newd = xoxo.fromXOXO(html) self.assertEqual(d,newd) def testNestedDictionaryRoundTrip(self): ''' make a dictionary with a dict in into a xoxo file and back again; check it is the same''' d = {'test':'1','inner':{'name':'Kevin'}} html = xoxo.toXOXO(d) newd = xoxo.fromXOXO(html) self.assertEqual(d,newd) def testNestedDictionaryWithURLRoundTrip(self): ''' make a dictionary with an url and a dict into a xoxo file and back again; check it is the same''' d = {'url':'http://example.com','inner':{'name':'Kevin'}} html = xoxo.toXOXO(d) newd = xoxo.fromXOXO(html) self.assertEqual(d,newd) def testNestedDictionariesWithURLsRoundTrip(self): ''' make a dictionary with an url and a dict with an url into a xoxo file and back again; check it is the same''' d = {'url':'http://example.com','inner':{'name':'Kevin','url':'http://slashdot.org'}} html = xoxo.toXOXO(d) newd = xoxo.fromXOXO(html) self.assertEqual(d,newd) def testListRoundTrip(self): ''' make a list into a xoxo file and back again; check it is the same''' l = ['3','2','1'] html = xoxo.toXOXO(l) newdl= xoxo.fromXOXO(html) self.assertEqual(l,newdl) def testListofDictsRoundTrip(self): ''' make a list of Dicts into a xoxo file and back again; check it is the same''' l = ['3',{'a':'2'},{'b':'1','c':'4'}] html = xoxo.toXOXO(l) newdl= xoxo.fromXOXO(html) self.assertEqual(l,newdl) def testListofListsRoundTrip(self): ''' make a list of Lists into a xoxo file and back again; check it is the same''' l = ['3',['a','2'],['b',['1',['c','4']]]] html = xoxo.toXOXO(l) newdl= xoxo.fromXOXO(html) self.assertEqual(l,newdl) def testDictofListsRoundTrip(self): ''' make a dict with lists in into a xoxo file and back again; check it is the same''' d = {'test':['1','2'], 'name':'Kevin', 'nestlist':['a',['b','c']], 'nestdict':{'e':'6','f':'7'}} html = xoxo.toXOXO(d) newd = xoxo.fromXOXO(html) self.assertEqual(d,newd) def testXOXOjunkInContainers(self): '''make sure text outside <li> etc is ignored''' d=xoxo.fromXOXO('<ol>bad<li><dl>worse<dt>good</dt><dd>buy</dd> now</dl></li></ol>') self.assertEqual(d,{'good': 'buy'}) def testXOXOjunkInElements(self): '''make sure text within <li> but outside a subcontainer is ignored''' l=xoxo.fromXOXO('<ol><li>bad<dl><dt>good</dt><dd>buy</dd></dl>worse</li><li>bag<ol><li>OK</li></ol>fish</li></ol>') self.assertEqual(l,[{'good': 'buy'},['OK']]) def testXOXOWithSpacesAndNewlines(self): '''unmung some xoxo with spaces in and check result is right''' xoxoSample= '''<ol class='xoxo'> <li> <dl> <dt>text</dt> <dd>item 1</dd> <dt>description</dt> <dd> This item represents the main point we're trying to make.</dd> <dt>url</dt> <dd>http://example.com/more.xoxo</dd> <dt>title</dt> <dd>title of item 1</dd> <dt>type</dt> <dd>text/xml</dd> <dt>rel</dt> <dd>help</dd> </dl> </li> </ol>''' d = xoxo.fromXOXO(xoxoSample) d2={'text':'item 1', 'description':" This item represents the main point we're trying to make.", 'url':'http://example.com/more.xoxo', 'title':'title of item 1', 'type':'text/xml', 'rel':'help' } xoxoAgain = xoxo.toXOXO(d) self.assertEqual(d,d2) #this needs a smarter whitespace-sensitive comparison #self.assertEqual(xoxoSample,xoxoAgain) def testSpecialAttributeDecoding(self): '''unmung some xoxo with <a href=' rel= etc in and check result is right''' xoxoSample= '''<ol class='xoxo'> <li> <dl> <dt>text</dt> <dd>item 1</dd> <dt>url</dt> <dd>http://example.com/more.xoxo</dd> <dt>title</dt> <dd>title of item 1</dd> <dt>type</dt> <dd>text/xml</dd> <dt>rel</dt> <dd>help</dd> </dl> </li> </ol>''' d = xoxo.fromXOXO(xoxoSample) smartxoxoSample= '''<ol class='xoxo'> <li><a href="http://example.com/more.xoxo" title="title of item 1" type="text/xml" rel="help">item 1</a> <!-- note how the "text" property is simply the contents of the <a> element --> </li> </ol>''' d2 = xoxo.fromXOXO(smartxoxoSample) self.assertEqual(d,d2) def testSpecialAttributeAndDLDecoding(self): '''unmung some xoxo with <a href=' rel= etc in plus a <dl> in the same item and check result is right''' xoxoSample= '''<ol class="xoxo"> <li> <dl> <dt>text</dt> <dd>item 1</dd> <dt>description</dt> <dd> This item represents the main point we're trying to make.</dd> <dt>url</dt> <dd>http://example.com/more.xoxo</dd> <dt>title</dt> <dd>title of item 1</dd> <dt>type</dt> <dd>text/xml</dd> <dt>rel</dt> <dd>help</dd> </dl> </li> </ol>''' d = xoxo.fromXOXO(xoxoSample) smartxoxoSample= '''<ol class="xoxo"> <li><a href="http://example.com/more.xoxo" title="title of item 1" type="text/xml" rel="help">item 1</a> <!-- note how the "text" property is simply the contents of the <a> element --> <dl> <dt>description</dt> <dd> This item represents the main point we're trying to make.</dd> </dl> </li> </ol>''' d2 = xoxo.fromXOXO(smartxoxoSample) self.assertEqual(d,d2) def testSpecialAttributeEncode(self): '''check it makes an <a href with a url parameter''' d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help','text':'an example'} html=xoxo.toXOXO(d) expectedHTML= '<ol class="xoxo"><li><a href="http://example.com/more.xoxo" title="sample url" rel="help" type="text/xml" >an example</a></li></ol>' self.assertEqual(html,expectedHTML) def testSpecialAttributeRoundTripFull(self): '''check it makes an <a href with a url parameter''' d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help','text':'an example'} html=xoxo.toXOXO(d) self.assertEqual(d,xoxo.fromXOXO(html)) def testSpecialAttributeRoundTripNoText(self): '''check it makes an <a href with a url parameter and no text attribute''' d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help'} html=xoxo.toXOXO(d) self.assertEqual(d,xoxo.fromXOXO(html)) def testSpecialAttributeRoundTripNoTextOrTitle(self): '''check it makes an <a href with a url parameter and no text or title attribute''' d={'url':'http://example.com/more.xoxo'} html=xoxo.toXOXO(d) self.assertEqual(d,xoxo.fromXOXO(html)) def testAttentionRoundTrip(self): '''check nested <a> and <dl> and <a> are preserved''' kmattn='''<ol class="xoxo"><li><a href="http://www.boingboing.net/" title="Boing Boing Blog" >Boing Boing Blog</a><dl><dt>alturls</dt><dd><ol><li><a href="http://boingboing.net/rss.xml" >xmlurl</a></li></ol></dd><dt>description</dt><dd>Boing Boing Blog</dd></dl></li><li><a href="http://www.financialcryptography.com/" title="Financial Cryptography" >Financial Cryptography</a><dl><dt>alturls</dt><dd><ol><li><a href="http://www.financialcryptography.com/mt/index.rdf" >xmlurl</a></li></ol></dd><dt>description</dt><dd>Financial Cryptography</dd></dl></li><li><a href="http://hublog.hubmed.org/" title="HubLog" >HubLog</a><dl><dt>alturls</dt><dd><ol><li><a href="http://hublog.hubmed.org/index.xml" >xmlurl</a></li><li><a href="http://hublog.hubmed.org/foaf.rdf" >foafurl</a></li></ol></dd><dt>description</dt><dd>HubLog</dd></dl></li></ol>'''; d = xoxo.fromXOXO(kmattn) newattn = xoxo.toXOXO(d) d2 = xoxo.fromXOXO(newattn) self.assertEqual(newattn,xoxo.toXOXO(d2)) self.assertEqual(d,d2) self.assertEqual(kmattn,newattn) def testUnicodeRoundtrip(self): '''check unicode characters can go to xoxo and back''' src=unicode('Tantek \xc3\x87elik and a snowman \xe2\x98\x83','utf-8') html = xoxo.toXOXO(src) self.assertEqual(src,xoxo.fromXOXO(html)) def testUtf8Roundtrip(self): '''check utf8 characters can go to xoxo and back''' src='Tantek \xc3\x87elik and a snowman \xe2\x98\x83' html = xoxo.toXOXO(src) self.assertEqual(src,xoxo.fromXOXO(html).encode('utf-8')) def testWindows1252Roundtrip(self): '''check 1252 characters can go to xoxo and back''' src='This is an evil\xa0space' html = xoxo.toXOXO(src) self.assertEqual(src,xoxo.fromXOXO(html).encode('windows-1252')) if __name__ == "__main__": unittest.main() else: runner = unittest.TextTestRunner() suite = unittest.makeSuite(xoxoTestCases,'test') runner.run(suite)
Java
XOXOWriter.java
/* * Copyright 2005 Robert Sayre * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Portions of this code are derived from the Apache-licensed Python XOXO * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code> */ package org.atompub.draft.xoxo; import java.util.*; public class XOXOWriter { public String[] attrs = {"title","rel","type"}; public String makeXOXO(List struct, String className){ return makeXOXO(struct, className, 0, true); } public String makeXOXO(List struct, String className, boolean doNSDeclaration){ return makeXOXO(struct, className, 0, doNSDeclaration); } public String makeXOXO(List struct){ return makeXOXO(struct, "xoxo", 0, true); } public String makeXOXO(Object struct, int depth){ return makeXOXO(struct, null, 0, false); } public String makeXOXO(Object struct, String className, int depth, boolean doNSDeclaration){ if(struct == null) return ""; StringBuffer sb = new StringBuffer(); if(struct instanceof Object[]){ struct = Arrays.asList((Object[]) struct); } if(struct instanceof List){ sb.append("<ol"); if(doNSDeclaration) sb.append(" xmlns=\"http://www.w3.org/1999/xhtml\""); if(className != null){ sb.append(" class=\""); sb.append(className); sb.append("\""); } sb.append(">"); } if(struct instanceof Map){ Map d = new LinkedHashMap((Map) struct); if(d.containsKey("url")){ sb.append("<a href=\"" + d.get("url") + "\" "); Object text; if(d.containsKey("text")){ text = d.get("text"); }else if(d.containsKey("title")){ text = d.get("title"); }else{ text = d.get("url"); } for(int i=0; i<attrs.length; i++){ String xVal = makeXOXO(d.get(attrs[i]),depth+1); if(xVal != null && !xVal.equals("")){ sb.append(attrs[i] + "=\"" + xVal + "\" "); } d.remove(attrs[i]); } sb.append(">" + makeXOXO(text, depth+1) + "</a>"); d.remove("text"); d.remove("url"); } if(d.size() > 0){ sb.append("<dl>"); for(Iterator i = d.keySet().iterator(); i.hasNext();){ Object k = i.next(); String ddVal = makeXOXO(d.get(k),depth+1); sb.append("<dt>" + k + "</dt>"); sb.append("<dd>" + ddVal + "</dd>"); } sb.append("</dl>"); } }else if(struct instanceof List){ List l = (List) struct; for(Iterator i = l.iterator(); i.hasNext();){ Object item = i.next(); sb.append("<li>" + makeXOXO(item,depth+1) + "</li>"); } sb.append("</ol>"); }else{ sb.append(struct); } return sb.toString(); } public String toXOXO(List struct){ return toXOXO(struct, false, null); } public String toXOXO(Object struct){ List alist = new ArrayList(); alist.add(struct); return toXOXO(alist); } public String toXOXO(Object struct, boolean addHTMLWrapper, String cssUrl){ List alist = new ArrayList(); alist.add(struct); return toXOXO(alist, addHTMLWrapper, cssUrl); } public String toXOXO(List struct, boolean addHTMLWrapper, String cssUrl){ String startHTML = "<!DOCTYPE html PUBLIC \"-//W3C//DTD" + "XHTML 1.0 Transitional//EN\n" + "http://www.w3.org/TR/xhtml1/DTD/" + "xhtml1-transitional.dtd\">" + "<html xmlns=\"http://www.w3.org/1999/xhtml\">" + "<head>"; if(addHTMLWrapper){ String s = startHTML; if(cssUrl != null){ s += "<style type=\"text/css\">@import \"" + cssUrl + "\";</style>"; } s += "</head><body>" + makeXOXO(struct, "xoxo", false) + "</body></html>"; return s; }else{ return makeXOXO(struct, "xoxo"); } } }
XOXOParser.java
This needs some small additions to handle the XHTML DTD and named character entities.
/* * Copyright 2005 Robert Sayre * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Portions of this code are derived from the Apache-licensed Python XOXO * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code> */ package org.atompub.draft.xoxo; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.Attributes; import org.xml.sax.helpers.XMLReaderFactory; import org.xml.sax.helpers.DefaultHandler; import java.util.*; import java.io.InputStream; import java.io.StringReader; import java.io.IOException; public class XOXOParser extends DefaultHandler { protected String XHTML_NS = "http://www.w3.org/1999/xhtml"; protected List elStack; protected Map listEls; public List structs; public List xoStack; public List textStack; public XOXOParser() { reset(); } protected void pushStruct(Object struct){ if((struct instanceof Map) && (((Map) struct).size() > 0) && (structs.get(structs.size()-1) instanceof Map) && (((Map) struct).containsKey("url"))){ // put back the <a>-made one for extra defs xoStack.add(structs.get(structs.size()-1)); }else{ structs.add(struct); xoStack.add(struct); } } public void startElement(String nsUri, String localName, String qName, Attributes atts){ // bounce non-XHTML elements if(nsUri.equals(XHTML_NS)){ elStack.add(localName); }else{ elStack.add("foo"); return; } if(localName.equals("a")){ Map attmap = new LinkedHashMap(); int len = atts.getLength(); for(int i=0; i<len; i++){ attmap.put(atts.getQName(i),atts.getValue(i)); } if(attmap.containsKey("href")){ attmap.put("url",attmap.get("href")); attmap.remove("href"); } pushStruct(attmap); textStack.add(""); }else if(localName.equals("dl")){ pushStruct(new LinkedHashMap()); }else if(localName.equals("ol")){ pushStruct(new ArrayList()); }else if(localName.equals("ul")){ pushStruct(new ArrayList()); }else if(localName.equals("li")){ textStack.add(""); }else if(localName.equals("dt")){ textStack.add(""); }else if(localName.equals("dd")){ textStack.add(""); } } public void endElement(String nsUri, String localName, String qName){ elStack.remove(elStack.size()-1); // bounce non-XHTML elements if(nsUri != XHTML_NS){ return; } if(localName.equals("a")){ String val = (String) textStack.remove(textStack.size()-1); if (val.length() > 0){ Map defs = (Map) xoStack.get(xoStack.size()-1); String defVal = (String) defs.get("title"); if((defVal != null) && (val.equals(defVal))){ val = ""; } defVal = (String) defs.get("url"); if((defVal != null) && (val.equals(defVal))){ val = ""; } if(val.length() > 0){ defs.put("text",val); } } xoStack.remove(xoStack.size()-1); }else if(localName.equals("dl")){ xoStack.remove(xoStack.size()-1); }else if(localName.equals("ol")){ xoStack.remove(xoStack.size()-1); }else if(localName.equals("ul")){ xoStack.remove(xoStack.size()-1); }else if(localName.equals("li")){ Object val = textStack.remove(textStack.size()-1); List last = (List) xoStack.get(xoStack.size()-1); if(structs.get(structs.size()-1) != last){ val = structs.remove(structs.size()-1); } last.add(val); }else if(localName.equals("dd")){ Object val = textStack.remove(textStack.size()-1); Object key = textStack.remove(textStack.size()-1); Map last = (Map) xoStack.get(xoStack.size()-1); if(structs.get(structs.size()-1) != last){ val = structs.remove(structs.size()-1); } last.put(key,val); } } public void characters(char[] ch, int start, int length){ if((xoStack.size() > 0) && (!listEls.containsKey(elStack.get(elStack.size()-1)))){ String text = (String) textStack.get(textStack.size()-1); String test = new String(ch,start,length); textStack.set(textStack.size()-1,text+test); } } public Object parse(String s) throws SAXException, IOException{ return parse(new InputSource(new StringReader(s))); } public Object parse(InputStream is) throws SAXException, IOException { return parse(new InputSource(is)); } public Object parse(InputSource in) throws SAXException, IOException { XMLReader parser = XMLReaderFactory.createXMLReader(); parser.setContentHandler(this); parser.parse(in); List returnList = new ArrayList(); for(Iterator i = this.structs.iterator(); i.hasNext();){ Object thing = i.next(); if(thing != null){ returnList.add(thing); } } while((returnList.size()==1)){ if(returnList.get(0) instanceof List){ returnList = (List) returnList.get(0); }else{ reset(); return returnList.get(0); } } reset(); return returnList; } protected void reset(){ elStack = new ArrayList(); listEls = new HashMap(); structs = new ArrayList(); xoStack = new ArrayList(); textStack = new ArrayList(); listEls.put("ol","ol"); listEls.put("ul","ul"); listEls.put("dl","dl"); } }
XOXOTest.java
/* * Copyright 2005 Robert Sayre * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * * Portions of this code are derived from the Apache-licensed Python XOXO * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code> */ package org.atompub.draft.xoxo.tests; import junit.framework.TestSuite; import junit.framework.TestCase; import junit.textui.TestRunner; import org.atompub.draft.xoxo.XOXOWriter; import org.atompub.draft.xoxo.XOXOParser; import java.util.*; public class XOXOTest extends TestCase { public static void main(String[] args) { new TestRunner().doRun(new TestSuite(XOXOTest.class)); } String XHTML_DEC = "xmlns=\"http://www.w3.org/1999/xhtml\" "; public String simpleListHTML = "<ol " + XHTML_DEC + "class=\"xoxo\">" + "<li>1</li><li>2</li><li>3</li></ol>"; public void testSimpleList(){ String [] numbers = {"1","2","3"}; XOXOWriter xoxo = new XOXOWriter(); assertEquals(simpleListHTML, xoxo.toXOXO(Arrays.asList(numbers))); } public void testStringIntegerList(){ Object[] numbers = {new Integer(1),"2","3"}; XOXOWriter xoxo = new XOXOWriter(); assertEquals(simpleListHTML, xoxo.toXOXO(Arrays.asList(numbers))); } public String nestedListHTML = "<ol " + XHTML_DEC + "class=\"xoxo\"><li>1</li><li>" + "<ol><li>2</li><li>3</li></ol></li></ol>"; public void testNestedList(){ Object[] arr = {"2","3"}; Object[] nested = {"1",Arrays.asList(arr)}; XOXOWriter xoxo = new XOXOWriter(); assertEquals(nestedListHTML, xoxo.toXOXO(Arrays.asList(nested))); } public void testNestedArray(){ Object[] arr = {"2","3"}; Object[] nested = {"1",arr}; XOXOWriter xoxo = new XOXOWriter(); assertEquals(nestedListHTML, xoxo.toXOXO(Arrays.asList(nested))); } public String dictHTML = "<ol " + XHTML_DEC + "class=\"xoxo\">" + "<li><dl><dt>test</dt><dd>1</dd><dt>name</dt>" + "<dd>Kevin</dd></dl></li></ol>"; public void testDictionary(){ Map dict = new LinkedHashMap(); dict.put("test", new Integer(1)); dict.put("name", "Kevin"); XOXOWriter xoxo = new XOXOWriter(); assertEquals(dictHTML, xoxo.toXOXO(dict)); } public String singleHTML = "<ol " + XHTML_DEC + "class=\"xoxo\">" + "<li>test</li></ol>"; public void testSingleItem(){ String item = "test"; XOXOWriter xoxo = new XOXOWriter(); assertEquals(singleHTML, xoxo.toXOXO(item)); } public void testWrapDiffers(){ String item = "test"; XOXOWriter xoxo = new XOXOWriter(); String nowrap = xoxo.toXOXO(item); Object[] itemArr = {item}; String wrap = xoxo.toXOXO(Arrays.asList(itemArr),true,null); assertFalse(wrap.equals(nowrap)); } String startHTML = "<!DOCTYPE html PUBLIC \"-//W3C//DTD" + "XHTML 1.0 Transitional//EN\n" + "http://www.w3.org/TR/xhtml1/DTD/" + "xhtml1-transitional.dtd\">" + "<html xmlns=\"http://www.w3.org/1999/xhtml\">" + "<head></head><body>"; public String singleWrapHTML = "<ol " + "class=\"xoxo\">" + "<li>test</li></ol>"; public String endHTML = "</body></html>"; public void testWrapSingleItem(){ String item = "test"; XOXOWriter xoxo = new XOXOWriter(); assertEquals(startHTML + singleWrapHTML + endHTML, xoxo.toXOXO(item,true,null)); } public void testXOXOParser(){ XOXOParser parser = new XOXOParser(); try{ parser.parse(dictHTML); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public void testDictRoundTrip(){ XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); Map dict = new LinkedHashMap(); dict.put("test", "1"); dict.put("name", "Kevin"); String html = xoxo.toXOXO(dict); try{ Object newDict = parser.parse(html); assertEquals(dict,newDict); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public void testListRoundTrip(){ Object[] obj = {"1","2","3"}; List testList = Arrays.asList(obj); XOXOWriter xoxo = new XOXOWriter(); String html = xoxo.toXOXO(testList); XOXOParser parser = new XOXOParser(); try{ Object newList = parser.parse(html); assertEquals(testList,newList); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public void testListOfDictsRoundTrip(){ XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); Map dict = new LinkedHashMap(); dict.put("test", "1"); dict.put("name", "Kevin"); Map dict2 = new LinkedHashMap(); dict2.put("one", "two"); dict2.put("three", "four"); dict2.put("five", "six"); Object[] obj = {"1",dict,dict2}; List testList = Arrays.asList(obj); String html = xoxo.toXOXO(testList); try{ Object newList = parser.parse(html); assertEquals(testList,newList); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public void testListOfListsRoundTrip(){ Object[] list1 = {"1","2","3"}; Object[] list2 = {"4","5","6", Arrays.asList(list1)}; Object[] list3 = {"7", Arrays.asList(list2)}; Object[] list4 = {"8", Arrays.asList(list3)}; List testList = Arrays.asList(list4); XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); String html = xoxo.toXOXO(testList); try{ Object newList = parser.parse(html); assertEquals(testList,newList); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public void testDictOfListsRoundTrip(){ Object[] list1 = {"1","2","3"}; Object[] list2 = {"4","5","6"}; Object[] list3 = {"7"}; Object[] list4 = {"8", "9"}; Map dict = new LinkedHashMap(); dict.put("foo", Arrays.asList(list1)); dict.put("bar", Arrays.asList(list2)); dict.put("baz", Arrays.asList(list3)); dict.put("qux", Arrays.asList(list4)); XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); String html = xoxo.toXOXO(dict); try{ Object newDict = parser.parse(html); assertEquals(dict,newDict); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public String junkXOXO = "<ol " + XHTML_DEC + "class=\"xoxo\">" + "bad<li><dl>worse<dt>good</dt><dd>buy</dd> now</dl></li></ol>"; public void testXOXOJunkInContainers(){ XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); Map dict = new LinkedHashMap(); dict.put("good","buy"); try{ Object newDict = parser.parse(junkXOXO); assertEquals(dict,newDict); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public String junkElementXOXO = "<ol " + XHTML_DEC + "><li>bad<dl><dt>good</dt><dd>buy</dd></dl>" + "worse</li><li>bag<ol><li>OK</li></ol>fish</li></ol>"; public void testXOXOjunkInElements(){ XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); Map dict = new LinkedHashMap(); dict.put("good","buy"); Object[] ok = {"OK"}; Object[] obj ={dict, Arrays.asList(ok)}; List testList = Arrays.asList(obj); try{ Object newList = parser.parse(junkElementXOXO); assertEquals(testList,newList); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public String xoxoSpacesNewlines = "<ol " + XHTML_DEC + " class='xoxo'> \n" + " <li>\n" + " <dl>\n" + " <dt>text</dt>\n" + " <dd>item 1</dd>\n" + " <dt>description</dt>\n" + " <dd> This item represents the main" + " point we're trying to make.</dd>\n" + " <dt>url</dt>\n" + " <dd>http://example.com/more.xoxo</dd>\n" + " <dt>title</dt>\n" + " <dd>title of item 1</dd>\n" + " <dt>type</dt>\n" + " <dd>text/xml</dd>\n" + " <dt>rel</dt>\n" + " <dd>help</dd>\n" + " </dl>\n" + " </li>\n" + "</ol>"; public void testXOXOWithSpacesAndNewlines(){ XOXOParser parser = new XOXOParser(); Map dict = new LinkedHashMap(); dict.put("text","item 1"); dict.put("description"," This item represents the main" + " point we're trying to make."); dict.put("url","http://example.com/more.xoxo"); dict.put("title","title of item 1"); dict.put("type","text/xml"); dict.put("rel","help"); try{ Object newDict = parser.parse(xoxoSpacesNewlines); assertEquals(dict,newDict); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public String xoxoSample = "<ol " + XHTML_DEC + " class='xoxo'> \n" + " <li>\n" + " <dl>\n" + " <dt>text</dt>\n" + " <dd>item 1</dd>\n" + " <dt>url</dt>\n" + " <dd>http://example.com/more.xoxo</dd>\n" + " <dt>title</dt>\n" + " <dd>title of item 1</dd>\n" + " <dt>type</dt>\n" + " <dd>text/xml</dd>\n" + " <dt>rel</dt>\n" + " <dd>help</dd>\n" + " </dl>\n" + " </li>\n" + "</ol>"; public String smartXOXOSample = "<ol " + XHTML_DEC + "class=\"xoxo\"> \n" + " <li><a href=\"http://example.com/more.xoxo\"\n" + " title=\"title of item 1\"\n" + " type=\"text/xml\"\n" + " rel=\"help\">item 1</a> \n" + "<!-- note how the \"text\" property is simply" + " the contents of the <a> element -->\n" + " </li>\n" + "</ol>"; public void testSpecialAttributeDecoding(){ XOXOParser parser = new XOXOParser(); try{ Object xoxoDict = parser.parse(xoxoSample); Object xoxoDict2 = parser.parse(smartXOXOSample); assertEquals(xoxoDict,xoxoDict2); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public String specialAttrHTML = "<ol " + XHTML_DEC + "class=\"xoxo\">" + "<li><a href=\"http://example.com/more.xoxo\" title=\"sample url\" " + "rel=\"help\" type=\"text/xml\" >an example</a></li></ol>"; public void testSpecialAttributeEncode(){ XOXOWriter xoxo = new XOXOWriter(); Map dict = new LinkedHashMap(); dict.put("url","http://example.com/more.xoxo"); dict.put("title","sample url"); dict.put("type","text/xml"); dict.put("rel","help"); dict.put("text","an example"); String html = xoxo.toXOXO(dict); assertEquals(specialAttrHTML,html); } public void testSpecialAttributeRoundTripFull(){ XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); Map dict = new LinkedHashMap(); dict.put("url","http://example.com/more.xoxo"); dict.put("title","sample url"); dict.put("type","text/xml"); dict.put("rel","help"); dict.put("text","an example"); String html = xoxo.toXOXO(dict); try{ Object newDict = parser.parse(html); assertEquals(dict,newDict); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public void testSpecialAttributeRoundTripNoText(){ XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); Map dict = new LinkedHashMap(); dict.put("url","http://example.com/more.xoxo"); dict.put("title","sample url"); dict.put("type","text/xml"); dict.put("rel","help"); String html = xoxo.toXOXO(dict); try{ Object newDict = parser.parse(html); assertEquals(dict,newDict); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public void testSpecialAttributeRoundTripNoTextOrTitle(){ XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); Map dict = new LinkedHashMap(); dict.put("url","http://example.com/more.xoxo"); dict.put("type","text/xml"); dict.put("rel","help"); String html = xoxo.toXOXO(dict); try{ Object newDict = parser.parse(html); assertEquals(dict,newDict); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } public void testUnicodeRoundTrip(){ String s = "Tantek Çelik and a snowman ?"; XOXOWriter xoxo = new XOXOWriter(); XOXOParser parser = new XOXOParser(); String html = xoxo.toXOXO(s); try{ Object newString = parser.parse(html); assertEquals(s,newString); }catch (Exception e){ fail(e.getMessage()); e.printStackTrace(); } } }