xoxo-sample-code: Difference between revisions

From Microformats Wiki
Jump to navigation Jump to search
Line 1,364: Line 1,364:


= PHP =
= PHP =
'''xoxolib.php'''
== xoxolib.php ==
<code><pre><nowiki>
<code><pre><nowiki>
<?
<?
Line 1,570: Line 1,570:
</nowiki></pre></code>
</nowiki></pre></code>


'''xoxotest.php'''
== xoxotest.php ==
<code><pre><nowiki>
<code><pre><nowiki>
<?
<?
Line 1,814: Line 1,814:
?>
?>
</nowiki></pre></code>
</nowiki></pre></code>
== other implementations ==
* [http://boxtheweb.mihopa.net/code/apis/xoxo2array.php.txt xoxo2array.php]
* [http://boxtheweb.mihopa.net/code/apis/xoxo2array.php.txt xoxo2array.php]
* [http://boxtheweb.mihopa.net/code/apis/array2xoxo.php.txt array2xoxo.php]
* [http://boxtheweb.mihopa.net/code/apis/array2xoxo.php.txt array2xoxo.php]
* [http://boxtheweb.mihopa.net/code/apis/#outlineclasses Outline Classes]
* [http://boxtheweb.mihopa.net/code/apis/#outlineclasses Outline Classes]

Revision as of 09:34, 25 October 2006

XOXO Sample Code

A whole bunch of open source (CC-by-2.0, Apache 2.0) sample code to read and write xoxo files in Python and Java (with Perl, PHP, ... to follow).

Python

xoxo.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""xoxo.py - a utility module for transforming to and from the XHTMLOutlines format XOXO http://microformats.org/wiki/xoxo
toXOXO takes a Python datastructure (tuples, lists or dictionaries, arbitrarily nested) and returns a XOXO representation of it.
fromXOXO parses an XHTML file for a xoxo list and returns the structure
"""
__version__ = "0.9"
__date__ = "2005-11-02"
__author__ = "Kevin Marks <kmarks@technorati.com>"
__copyright__ = "Copyright 2004-2006, Kevin Marks & Technorati"
__license__ = "http://creativecommons.org/licenses/by/2.0/ CC-by-2.0], [http://www.apache.org/licenses/LICENSE-2.0 Apache 2.0"
__credits__ = """Tantek Çelik and Mark Pilgrim for data structure"""
__history__ = """
TODO: add <title> tag
TODO: add a proper profile link
0.9 smarter parsing for encoding and partial markup; fix dangling dictionary case
0.8 work in unicode then render to utf-8
0.7 initial encoding support - just utf-8 for now
0.6 support the special behaviour for url properties  to/from <a>
0.5 fix some awkward side effects of whitespace and text outside our expected tags; simplify writing code
0.4 add correct XHTML headers so it validates
0.3 read/write version; fixed invalid nested list generation;
0.1 first write-only version
"""

try:
    True, False
except NameError:
    True, False = not not 1, not 1
containerTags={'ol':False,'ul':False,'dl':False}
import sgmllib, urllib, urlparse, re,codecs

def toUnicode(key):
    if type(key) == type(u'unicode'):
        uKey= key
    else:
        try: 
            uKey=unicode(key,'utf-8')
        except:
            uKey=unicode(key,'windows_1252')
    return uKey

def makeXOXO(struct,className=None):
    s=u''
    if isinstance(struct,list) or isinstance(struct,tuple):
        if className:
            s += u'<ol class="%s">' % className
        else:
            s+= u"<ol>"
        for item in struct:
            s+=u"<li>" + makeXOXO(item,None)+"</li>"
        s +=u"</ol>"
    elif isinstance(struct,dict):
        d=struct.copy()
        if d.has_key('url'):
            uURL=toUnicode(d['url'])
            s+=u'<a href="%s" ' % uURL
            text =  d.get('text',d.get('title',uURL))
            for attr in ('title','rel','type'):
                if d.has_key(attr):
                    xVal = makeXOXO(d[attr],None)
                    s +=u'%s="%s" ' % (attr,xVal)
                    del d[attr]
            s +=u'>%s</a>' % makeXOXO(text,None)
            if d.has_key('text'):
                del d['text']
            del d['url']
        if len(d):
            s +=u"<dl>"
            for key,value in d.items():
                xVal = makeXOXO(value,None)
                uKey=toUnicode(key)
                s+= u'<dt>%s</dt><dd>%s</dd>' % (uKey, xVal)
            s +=u"</dl>"
    elif type(struct) == type(u'unicode'):
        s+=struct
    else:
        if not type(struct)==type(' '):
            struct=str(struct)
        s += toUnicode(struct)
    return s
class AttrParser(sgmllib.SGMLParser):
    def __init__(self):
        sgmllib.SGMLParser.__init__(self)
        self.text=[]
        self.encoding='utf-8'
    def cleanText(self,inText):
        if type(inText) == type(u'unicode'):
            inText = inText.encode(self.encoding,'replace')
        self.text=[]
        self.reset()
        self.feed(inText)
        return ''.join(self.text)
    def setEncoding(self,encoding):
        if 'ascii' in encoding:
            encoding='windows_1252' # so we don't throw an exception on high-bit set chars in there by mistake
        if encoding and not encoding =='text/html':
            try:
                canDecode = codecs.getdecoder(encoding)
                self.encoding = encoding
            except:
                try:
                    encoding='japanese.' +encoding
                    canDecode = codecs.getdecoder(encoding)
                    self.encoding = encoding
                except:
                    print "can't deal with encoding %s" % encoding
                    
    def handle_entityref(self, ref):
        # called for each entity reference, e.g. for "©", ref will be "copy"
        # map through to unicode where we can
        try:
            entity =htmlentitydefs.name2codepoint[ref]
            self.handleUnicodeData(unichr(entity))
        except:
            try:
                handle_charref(ref) # deal with char-ref's missing the '#' (see Akma)
            except:
                self.handle_data("&%s" % ref)

    def handle_charref(self, ref):
        # called for each character reference, e.g. for " ", ref will be "160"
        # Reconstruct the original character reference.
        try:
            if ref[0]=='x':
                self.handleUnicodeData(unichr(int(ref[1:],16)))
            else:
                self.handleUnicodeData(unichr(int(ref)))
        except:
            self.handle_data("&#%s" % ref)

# called for each block of plain text, i.e. outside of any tag and
# not containing any character or entity references
    def handle_data(self, text):
        if type(text)==type(u' '):
            self.handleUnicodeData(text)
        if self.encoding== 'utf-8':
            try:
                uText = unicode(text,self.encoding) #utf-8 is pretty clear when it is wrong
            except:
                uText = unicode(text,'windows_1252','ignore') # and this is the likely wrongness
        else:
            uText = unicode(text,self.encoding,'replace') # if they have really broken encoding, (eg lots of shift-JIS blogs)
        self.handleUnicodeData(uText)
    def handleUnicodeData(self, uText):
        self.text.append(uText)
        
class xoxoParser(AttrParser):
    def __init__(self):
        AttrParser.__init__(self)
        self.structs=[]
        self.xostack=[]
        self.textstack=['']
        self.attrparse = AttrParser()
    def normalize_attrs(self, attrs):
        attrs = [(k.lower(), self.attrparse.cleanText(v)) for k, v in attrs]
        attrs = [(k, k in ('rel','type') and v.lower() or v) for k, v in attrs]
        return attrs
    def setEncoding(self,encoding):
        AttrParser.setEncoding(self,encoding)
        self.attrparse.setEncoding(encoding)
    def pushStruct(self,struct):
        if type(struct) == type({}) and len(struct)==0 and len(self.structs) and type(self.structs[-1]) == type({}) and self.structs[-1].has_key('url') and self.structs[-1] != self.xostack[-1]:
            self.xostack.append(self.structs[-1]) # put back the <a>-made one for extra def's
        else:
            self.structs.append(struct)
            self.xostack.append(self.structs[-1])
    def do_meta(self, attributes):
        atts = dict(self.normalize_attrs(attributes))
        #print atts.encode('utf-8')
        if atts.has_key('http-equiv'):
            if atts['http-equiv'].lower() == "content-type":
                if atts.has_key('content'):
                    encoding = atts['content'].split('charset=')[-1]
                    self.setEncoding(encoding)
    def start_a(self,attrs):
        attrsD = dict(self.normalize_attrs(attrs))
        attrsD['url']= attrsD.get('href','')
        if attrsD.has_key('href'):
            del attrsD['href']
        self.pushStruct(attrsD)
        self.textstack.append('')
    def end_a(self):
        val = self.textstack.pop()
        if val: 
            if self.xostack[-1].get('title','') == val:
                val=''
            if self.xostack[-1]['url'] == val:
                val=''
            if val:
                self.xostack[-1]['text']=val
        self.xostack.pop()
    def start_dl(self,attrs):
        self.pushStruct({})
    def end_dl(self):
        self.xostack.pop()
    def start_ol(self,attrs):
        self.pushStruct([])
    def end_ol(self):
        self.xostack.pop()
    def start_ul(self,attrs):
        self.pushStruct([])
    def end_ul(self):
        self.xostack.pop()
    def start_li(self,attrs):
        self.textstack.append('')
    def end_li(self):
        val = self.textstack.pop()
        while ( self.structs[-1] != self.xostack[-1]):
            val = self.structs.pop()
            self.xostack[-1].append(val)
        if type(val) == type(' ') or type(val) == type(u' '):
            self.xostack[-1].append(val)
    def start_dt(self,attrs):
        self.textstack.append('')
    def end_dt(self):
        pass
    def start_dd(self,attrs):
        self.textstack.append('')
    def end_dd(self):
        val = self.textstack.pop()
        key = self.textstack.pop()
        if self.structs[-1] != self.xostack[-1]:
            val = self.structs.pop()
        self.xostack[-1][key]=val
    def handleUnicodeData(self, text):
        if len(self.stack) and containerTags.get(self.stack[-1],True): #skip text not within an element
            self.textstack[-1] += text
def toXOXO(struct,addHTMLWrapper=False,cssUrl=''):
    if type(struct) ==type((1,))or type(struct) ==type([1,]):
        inStruct = struct
    else:
        inStruct = [struct]
    if addHTMLWrapper:
        s= u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN
http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'''
        if cssUrl:
            s+=u'<style type="text/css" >@import "%s";</style>' % cssUrl
        s+=u"</head><body>%s</body></html>" % makeXOXO(inStruct,'xoxo')
        return s.encode('utf-8')
    else:
        return makeXOXO(inStruct,'xoxo').encode('utf-8')
    
def fromXOXO(html):
    parser = xoxoParser()
    #parser.feed(unicode(html,'utf-8'))
    parser.feed(html)
    #print parser.structs
    structs=[struct for struct in parser.structs if struct]
    #print structs
    while (len(structs) ==1 and type(structs)==type([1,])):
        structs=structs[0]
    return structs

# Allow direct invocation
# Read HTML from URL, parse into data structures, then re-output

import sys

if __name__ == "__main__":
  if len(sys.argv) < 2: raise SystemExit("Usage: "+sys.argv[0]+" url\n"+__doc__)
  url=sys.argv[1]
  file = urllib.urlopen(url)
  html=file.read(-1)
  file.close
  s=fromXOXO(html)
  p=toXOXO(s,True)
  print p

testxoxo.py

# -*- coding: utf-8 -*-
"""testxoxo.py 
Unit tests for xoxo.py
This file tests the functions in xoxo.py 
The underlying model here is http://diveintopython.org/unit_testing/index.html 

run from command line with
python testxoxo.py -v
"""
import xoxo
reload(xoxo)
import unittest

class xoxoTestCases(unittest.TestCase):
    
    def testSimpleList(self):
        '''make a xoxo file from a list'''
        l = ['1','2','3']
        html = xoxo.toXOXO(l)
        self.assertEqual(html,'<ol class="xoxo"><li>1</li><li>2</li><li>3</li></ol>')
    def testNestedList(self):
        '''make a xoxo file from a list with a list in'''
        l = ['1',['2','3']]
        html = xoxo.toXOXO(l)
        self.assertEqual(html,'<ol class="xoxo"><li>1</li><li><ol><li>2</li><li>3</li></ol></li></ol>')

    def testDictionary(self):
        '''make a xoxo file from a dictionary'''
        d = {'test':'1','name':'Kevin'}
        html = xoxo.toXOXO(d)
        self.assertEqual(html,'<ol class="xoxo"><li><dl><dt>test</dt><dd>1</dd><dt>name</dt><dd>Kevin</dd></dl></li></ol>')

    def testSingleItem(self):
        '''make a xoxo file from a string'''
        l = "test"
        html = xoxo.toXOXO(l)
        self.assertEqual(html,'<ol class="xoxo"><li>test</li></ol>')

    def testWrapDiffers(self):
        '''make a xoxo file from a string with and without html wrapper and check they are different'''
        l = "test"
        html = xoxo.toXOXO(l)
        htmlwrap =  xoxo.toXOXO(l,addHTMLWrapper=True)
        self.failIfEqual(html,htmlwrap)

    def testWrapSingleItem(self):
        '''make a wrapped xoxo file from a string'''
        l = "test"
        html = xoxo.toXOXO(l,addHTMLWrapper=True)
        self.assertEqual(html,'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN
http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body><ol class="xoxo"><li>test</li></ol></body></html>''')

    def testWrapItemWithCSS(self):
        '''make a wrapped xoxo file from a string'''
        l = "test"
        html = xoxo.toXOXO(l,addHTMLWrapper=True,cssUrl='reaptest.css')
        self.assertEqual(html,'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN
http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><style type="text/css" >@import "reaptest.css";</style></head><body><ol class="xoxo"><li>test</li></ol></body></html>''')

    def testDictionaryRoundTrip(self):
        ''' make a dictionary into a xoxo file and back again; check it is the same'''
        d = {'test':'1','name':'Kevin'}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)
        
    def testDictionaryWithURLRoundTrip(self):
        ''' make a dictionary wiht an url in into a xoxo file and back again; check it is the same'''
        d = {'url':'http://example.com','name':'Kevin'}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)    
    def testNestedDictionaryRoundTrip(self):
        ''' make a dictionary with a dict in into a xoxo file and back again; check it is the same'''
        d = {'test':'1','inner':{'name':'Kevin'}}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)
    def testNestedDictionaryWithURLRoundTrip(self):
        ''' make a dictionary with an url and a dict into a xoxo file and back again; check it is the same'''
        d = {'url':'http://example.com','inner':{'name':'Kevin'}}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)
    def testNestedDictionariesWithURLsRoundTrip(self):
        ''' make a dictionary with an url and a dict with an url into a xoxo file and back again; check it is the same'''
        d = {'url':'http://example.com','inner':{'name':'Kevin','url':'http://slashdot.org'}}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)
    def testListRoundTrip(self):
        ''' make a list into a xoxo file and back again; check it is the same'''
        l = ['3','2','1']
        html = xoxo.toXOXO(l)
        newdl= xoxo.fromXOXO(html)
        self.assertEqual(l,newdl)
    def testListofDictsRoundTrip(self):
        ''' make a list of Dicts into a xoxo file and back again; check it is the same'''
        l = ['3',{'a':'2'},{'b':'1','c':'4'}]
        html = xoxo.toXOXO(l)
        newdl= xoxo.fromXOXO(html)
        self.assertEqual(l,newdl)
    def testListofListsRoundTrip(self):
        ''' make a list of Lists into a xoxo file and back again; check it is the same'''
        l = ['3',['a','2'],['b',['1',['c','4']]]]
        html = xoxo.toXOXO(l)
        newdl= xoxo.fromXOXO(html)
        self.assertEqual(l,newdl)
    def testDictofListsRoundTrip(self):
        ''' make a dict with lists in into a xoxo file and back again; check it is the same'''
        d = {'test':['1','2'],
        'name':'Kevin',
        'nestlist':['a',['b','c']],
        'nestdict':{'e':'6','f':'7'}}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)

    def testXOXOjunkInContainers(self):
        '''make sure text outside <li> etc is ignored'''
        d=xoxo.fromXOXO('<ol>bad<li><dl>worse<dt>good</dt><dd>buy</dd> now</dl></li></ol>')
        self.assertEqual(d,{'good': 'buy'})

    def testXOXOjunkInElements(self):
        '''make sure text within <li> but outside a subcontainer is ignored'''
        l=xoxo.fromXOXO('<ol><li>bad<dl><dt>good</dt><dd>buy</dd></dl>worse</li><li>bag<ol><li>OK</li></ol>fish</li></ol>')
        self.assertEqual(l,[{'good': 'buy'},['OK']])

    def testXOXOWithSpacesAndNewlines(self):
        '''unmung some xoxo with spaces in and check result is right'''
        xoxoSample= '''<ol class='xoxo'> 
  <li>
    <dl>
        <dt>text</dt>
        <dd>item 1</dd>
        <dt>description</dt>
        <dd> This item represents the main point we're trying to make.</dd>
        <dt>url</dt>
        <dd>http://example.com/more.xoxo</dd>
        <dt>title</dt>
        <dd>title of item 1</dd>
        <dt>type</dt>
        <dd>text/xml</dd>
        <dt>rel</dt>
        <dd>help</dd>
    </dl>
  </li>
</ol>'''
        d = xoxo.fromXOXO(xoxoSample)
        d2={'text':'item 1',
            'description':" This item represents the main point we're trying to make.",
            'url':'http://example.com/more.xoxo',
            'title':'title of item 1',
            'type':'text/xml',
            'rel':'help'
            }
        xoxoAgain = xoxo.toXOXO(d)
        self.assertEqual(d,d2)
        #this needs a smarter whitespace-sensitive comparison
        #self.assertEqual(xoxoSample,xoxoAgain)

    def testSpecialAttributeDecoding(self):
        '''unmung some xoxo with <a href=' rel= etc in and check result is right'''
        xoxoSample= '''<ol class='xoxo'> 
  <li>
    <dl>
        <dt>text</dt>
        <dd>item 1</dd>
        <dt>url</dt>
        <dd>http://example.com/more.xoxo</dd>
        <dt>title</dt>
        <dd>title of item 1</dd>
        <dt>type</dt>
        <dd>text/xml</dd>
        <dt>rel</dt>
        <dd>help</dd>
    </dl>
  </li>
</ol>'''
        d = xoxo.fromXOXO(xoxoSample)
        smartxoxoSample= '''<ol class='xoxo'> 
  <li><a href="http://example.com/more.xoxo"
         title="title of item 1"
         type="text/xml"
         rel="help">item 1</a> 
<!-- note how the "text" property is simply the contents of the <a> element -->
  </li>
</ol>'''
        d2 = xoxo.fromXOXO(smartxoxoSample)
        self.assertEqual(d,d2)
    def testSpecialAttributeAndDLDecoding(self):
        '''unmung some xoxo with <a href=' rel= etc in plus a <dl> in the same item and check result is right'''
        xoxoSample= '''<ol class="xoxo"> 
  <li>
    <dl>
        <dt>text</dt>
        <dd>item 1</dd>
        <dt>description</dt>
        <dd> This item represents the main point we're trying to make.</dd>
        <dt>url</dt>
        <dd>http://example.com/more.xoxo</dd>
        <dt>title</dt>
        <dd>title of item 1</dd>
        <dt>type</dt>
        <dd>text/xml</dd>
        <dt>rel</dt>
        <dd>help</dd>
    </dl>
  </li>
</ol>'''
        d = xoxo.fromXOXO(xoxoSample)
        smartxoxoSample= '''<ol class="xoxo"> 
  <li><a href="http://example.com/more.xoxo"
         title="title of item 1"
         type="text/xml"
         rel="help">item 1</a> 
<!-- note how the "text" property is simply the contents of the <a> element -->
      <dl>
        <dt>description</dt>
          <dd> This item represents the main point we're trying to make.</dd>
      </dl>
  </li>
</ol>'''
        d2 = xoxo.fromXOXO(smartxoxoSample)
        self.assertEqual(d,d2)
    def testSpecialAttributeEncode(self):
        '''check it makes an <a href with a url parameter'''
        d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help','text':'an example'}
        html=xoxo.toXOXO(d)
        expectedHTML= '<ol class="xoxo"><li><a href="http://example.com/more.xoxo" title="sample url" rel="help" type="text/xml" >an example</a></li></ol>' 
        self.assertEqual(html,expectedHTML)
        
    def testSpecialAttributeRoundTripFull(self):
        '''check it makes an <a href with a url parameter'''
        d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help','text':'an example'}
        html=xoxo.toXOXO(d)
        self.assertEqual(d,xoxo.fromXOXO(html))
    def testSpecialAttributeRoundTripNoText(self):
        '''check it makes an <a href with a url parameter and no text attribute'''
        d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help'}
        html=xoxo.toXOXO(d)
        self.assertEqual(d,xoxo.fromXOXO(html))
    def testSpecialAttributeRoundTripNoTextOrTitle(self):
        '''check it makes an <a href with a url parameter and no text or title attribute'''
        d={'url':'http://example.com/more.xoxo'}
        html=xoxo.toXOXO(d)
        self.assertEqual(d,xoxo.fromXOXO(html))
    def testAttentionRoundTrip(self):
        '''check nested <a> and <dl> and <a> are preserved'''
        kmattn='''<ol class="xoxo"><li><a href="http://www.boingboing.net/" title="Boing Boing Blog" >Boing Boing Blog</a><dl><dt>alturls</dt><dd><ol><li><a href="http://boingboing.net/rss.xml" >xmlurl</a></li></ol></dd><dt>description</dt><dd>Boing Boing Blog</dd></dl></li><li><a href="http://www.financialcryptography.com/" title="Financial Cryptography" >Financial Cryptography</a><dl><dt>alturls</dt><dd><ol><li><a href="http://www.financialcryptography.com/mt/index.rdf" >xmlurl</a></li></ol></dd><dt>description</dt><dd>Financial Cryptography</dd></dl></li><li><a href="http://hublog.hubmed.org/" title="HubLog" >HubLog</a><dl><dt>alturls</dt><dd><ol><li><a href="http://hublog.hubmed.org/index.xml" >xmlurl</a></li><li><a href="http://hublog.hubmed.org/foaf.rdf" >foafurl</a></li></ol></dd><dt>description</dt><dd>HubLog</dd></dl></li></ol>''';
        d = xoxo.fromXOXO(kmattn)
        newattn = xoxo.toXOXO(d)
        d2 = xoxo.fromXOXO(newattn)
        self.assertEqual(newattn,xoxo.toXOXO(d2))
        self.assertEqual(d,d2)
        self.assertEqual(kmattn,newattn)
        
    def testUnicodeRoundtrip(self):
        '''check unicode characters can go to xoxo and back'''
        src=unicode('Tantek \xc3\x87elik and a snowman \xe2\x98\x83','utf-8')
        html = xoxo.toXOXO(src)
        self.assertEqual(src,xoxo.fromXOXO(html))
    def testUtf8Roundtrip(self):
        '''check utf8 characters can go to xoxo and back'''
        src='Tantek \xc3\x87elik and a snowman \xe2\x98\x83'
        html = xoxo.toXOXO(src)
        self.assertEqual(src,xoxo.fromXOXO(html).encode('utf-8'))
    def testWindows1252Roundtrip(self):
        '''check 1252 characters can go to xoxo and back'''
        src='This is an evil\xa0space'
        html = xoxo.toXOXO(src)
        self.assertEqual(src,xoxo.fromXOXO(html).encode('windows-1252'))
if __name__ == "__main__":
    unittest.main()
else:
    runner = unittest.TextTestRunner()
    suite = unittest.makeSuite(xoxoTestCases,'test')
    runner.run(suite)

Java

XOXOWriter.java

/*
 * Copyright 2005 Robert Sayre
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Portions of this code are derived from the Apache-licensed Python XOXO
 * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code>
 */

package org.atompub.draft.xoxo;

import java.util.*;

public class XOXOWriter {

  public String[] attrs = {"title","rel","type"};

  public String makeXOXO(List struct, String className){
    return makeXOXO(struct, className, 0, true);
  }

  public String makeXOXO(List struct, String className,
                         boolean doNSDeclaration){
    return makeXOXO(struct, className, 0, doNSDeclaration);
  }

  public String makeXOXO(List struct){
    return makeXOXO(struct, "xoxo", 0, true);
  }

  public String makeXOXO(Object struct, int depth){
    return makeXOXO(struct, null, 0, false);
  }

  public String makeXOXO(Object struct, String className,
                         int depth, boolean doNSDeclaration){
    if(struct == null) return "";
    StringBuffer sb = new StringBuffer();
    if(struct instanceof Object[]){
      struct = Arrays.asList((Object[]) struct);
    }
    if(struct instanceof List){
      sb.append("<ol");
      if(doNSDeclaration)
        sb.append(" xmlns=\"http://www.w3.org/1999/xhtml\"");
      if(className != null){
        sb.append(" class=\"");
        sb.append(className);
        sb.append("\"");
      }
      sb.append(">");
    }
    if(struct instanceof Map){
      Map d = new LinkedHashMap((Map) struct);
      if(d.containsKey("url")){
        sb.append("<a href=\"" + d.get("url") + "\" ");
        Object text;
        if(d.containsKey("text")){
          text = d.get("text");
        }else if(d.containsKey("title")){
          text = d.get("title");
        }else{
          text = d.get("url");
        }
        for(int i=0; i<attrs.length; i++){
          String xVal = makeXOXO(d.get(attrs[i]),depth+1);
          if(xVal != null && !xVal.equals("")){
            sb.append(attrs[i] + "=\"" + xVal + "\" ");
          }
          d.remove(attrs[i]);
        }
        sb.append(">" + makeXOXO(text, depth+1) + "</a>");
        d.remove("text");
        d.remove("url");
      }
      if(d.size() > 0){
        sb.append("<dl>");
        for(Iterator i = d.keySet().iterator(); i.hasNext();){
          Object k = i.next();
          String ddVal = makeXOXO(d.get(k),depth+1);
          sb.append("<dt>" + k + "</dt>");
          sb.append("<dd>" + ddVal + "</dd>");
        }
        sb.append("</dl>");
      }
    }else if(struct instanceof List){
      List l = (List) struct;
      for(Iterator i = l.iterator(); i.hasNext();){
        Object item = i.next();
        sb.append("<li>" + makeXOXO(item,depth+1) + "</li>");
      }
      sb.append("</ol>");
    }else{
      sb.append(struct);
    }
    return sb.toString();
  }

  public String toXOXO(List struct){
    return toXOXO(struct, false, null);
  }

  public String toXOXO(Object struct){
    List alist = new ArrayList();
    alist.add(struct);
    return toXOXO(alist);
  }

  public String toXOXO(Object struct,
                       boolean addHTMLWrapper,
                       String cssUrl){
    List alist = new ArrayList();
    alist.add(struct);
    return toXOXO(alist, addHTMLWrapper, cssUrl);
  }

  public String toXOXO(List struct,
                       boolean addHTMLWrapper,
                       String cssUrl){
    String startHTML = "<!DOCTYPE html PUBLIC \"-//W3C//DTD"
        + "XHTML 1.0 Transitional//EN\n"
        + "http://www.w3.org/TR/xhtml1/DTD/"
        + "xhtml1-transitional.dtd\">"
        + "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
        + "<head>";
    if(addHTMLWrapper){
      String s = startHTML;
      if(cssUrl != null){
        s += "<style type=\"text/css\">@import \""
            + cssUrl + "\";</style>";
      }
      s += "</head><body>" + makeXOXO(struct, "xoxo", false)
          + "</body></html>";
      return s;
    }else{
      return makeXOXO(struct, "xoxo");
    }
  }
}

XOXOParser.java

This needs some small additions to handle the XHTML DTD and named character entities.

/*
 * Copyright 2005 Robert Sayre
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Portions of this code are derived from the Apache-licensed Python XOXO
 * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code>
 */

package org.atompub.draft.xoxo;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.DefaultHandler;

import java.util.*;
import java.io.InputStream;
import java.io.StringReader;
import java.io.IOException;

public class XOXOParser extends DefaultHandler {

  protected String XHTML_NS = "http://www.w3.org/1999/xhtml";
  protected List elStack;
  protected Map listEls;
  public List structs;
  public List xoStack;
  public List textStack;

  public XOXOParser() {
    reset();
  }

  protected void pushStruct(Object struct){
    if((struct instanceof Map) && (((Map) struct).size() > 0)
        && (structs.get(structs.size()-1) instanceof Map)
        && (((Map) struct).containsKey("url"))){
      // put back the <a>-made one for extra defs
      xoStack.add(structs.get(structs.size()-1));
    }else{
      structs.add(struct);
      xoStack.add(struct);
    }
  }

  public void startElement(String nsUri, String localName,
                           String qName, Attributes atts){
    // bounce non-XHTML elements
    if(nsUri.equals(XHTML_NS)){
      elStack.add(localName);
    }else{
      elStack.add("foo");
      return;
    }

    if(localName.equals("a")){
      Map attmap = new LinkedHashMap();
      int len = atts.getLength();
      for(int i=0; i<len; i++){
        attmap.put(atts.getQName(i),atts.getValue(i));
      }
      if(attmap.containsKey("href")){
        attmap.put("url",attmap.get("href"));
        attmap.remove("href");
      }
      pushStruct(attmap);
      textStack.add("");
    }else if(localName.equals("dl")){
      pushStruct(new LinkedHashMap());
    }else if(localName.equals("ol")){
      pushStruct(new ArrayList());
    }else if(localName.equals("ul")){
      pushStruct(new ArrayList());
    }else if(localName.equals("li")){
      textStack.add("");
    }else if(localName.equals("dt")){
      textStack.add("");
    }else if(localName.equals("dd")){
      textStack.add("");
    }
  }

  public void endElement(String nsUri, String localName,
                         String qName){
    elStack.remove(elStack.size()-1);
    // bounce non-XHTML elements
    if(nsUri != XHTML_NS){
      return;
    }

    if(localName.equals("a")){
      String val = (String) textStack.remove(textStack.size()-1);
      if (val.length() > 0){
        Map defs = (Map) xoStack.get(xoStack.size()-1);
        String defVal = (String) defs.get("title");
        if((defVal != null) && (val.equals(defVal))){
          val = "";
        }
        defVal = (String) defs.get("url");
        if((defVal != null) && (val.equals(defVal))){
          val = "";
        }
        if(val.length() > 0){
          defs.put("text",val);
        }
      }
      xoStack.remove(xoStack.size()-1);
    }else if(localName.equals("dl")){
      xoStack.remove(xoStack.size()-1);
    }else if(localName.equals("ol")){
      xoStack.remove(xoStack.size()-1);
    }else if(localName.equals("ul")){
      xoStack.remove(xoStack.size()-1);
    }else if(localName.equals("li")){
      Object val = textStack.remove(textStack.size()-1);
      List last = (List) xoStack.get(xoStack.size()-1);
      if(structs.get(structs.size()-1) != last){
        val = structs.remove(structs.size()-1);
      }
      last.add(val);
    }else if(localName.equals("dd")){
      Object val = textStack.remove(textStack.size()-1);
      Object key = textStack.remove(textStack.size()-1);
      Map last = (Map) xoStack.get(xoStack.size()-1);
      if(structs.get(structs.size()-1) != last){
        val = structs.remove(structs.size()-1);
      }
      last.put(key,val);
    }
  }

  public void characters(char[] ch, int start, int length){
    if((xoStack.size() > 0)
        && (!listEls.containsKey(elStack.get(elStack.size()-1)))){
      String text = (String) textStack.get(textStack.size()-1);
      String test = new String(ch,start,length);
      textStack.set(textStack.size()-1,text+test);
    }
  }

  public Object parse(String s) throws SAXException, IOException{
    return parse(new InputSource(new StringReader(s)));
  }

  public Object parse(InputStream is) throws SAXException, IOException {
    return parse(new InputSource(is));
  }

  public Object parse(InputSource in) throws SAXException, IOException {
    XMLReader parser = XMLReaderFactory.createXMLReader();
    parser.setContentHandler(this);
    parser.parse(in);
    List returnList = new ArrayList();
    for(Iterator i = this.structs.iterator(); i.hasNext();){
      Object thing = i.next();
      if(thing != null){
        returnList.add(thing);
      }
    }
    while((returnList.size()==1)){
      if(returnList.get(0) instanceof List){
        returnList = (List) returnList.get(0);
      }else{
        reset();
        return returnList.get(0);
      }
    }
    reset();
    return returnList;
  }

  protected void reset(){
    elStack = new ArrayList();
    listEls = new HashMap();
    structs = new ArrayList();
    xoStack = new ArrayList();
    textStack = new ArrayList();
    listEls.put("ol","ol");
    listEls.put("ul","ul");
    listEls.put("dl","dl");
  }
}

XOXOTest.java

/*
 * Copyright 2005 Robert Sayre
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Portions of this code are derived from the Apache-licensed Python XOXO
 * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code>
 */

package org.atompub.draft.xoxo.tests;

import junit.framework.TestSuite;
import junit.framework.TestCase;
import junit.textui.TestRunner;
import org.atompub.draft.xoxo.XOXOWriter;
import org.atompub.draft.xoxo.XOXOParser;

import java.util.*;

public class XOXOTest extends TestCase {

  public static void main(String[] args) {
    new TestRunner().doRun(new TestSuite(XOXOTest.class));
  }
  String XHTML_DEC = "xmlns=\"http://www.w3.org/1999/xhtml\" ";
  public String simpleListHTML = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\">"
  + "<li>1</li><li>2</li><li>3</li></ol>";

  public void testSimpleList(){
    String [] numbers = {"1","2","3"};
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(simpleListHTML,
                 xoxo.toXOXO(Arrays.asList(numbers)));
  }

  public void testStringIntegerList(){
    Object[] numbers = {new Integer(1),"2","3"};
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(simpleListHTML,
                 xoxo.toXOXO(Arrays.asList(numbers)));
  }

  public String nestedListHTML = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\"><li>1</li><li>"
  + "<ol><li>2</li><li>3</li></ol></li></ol>";

  public void testNestedList(){
    Object[] arr = {"2","3"};
    Object[] nested = {"1",Arrays.asList(arr)};
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(nestedListHTML,
                 xoxo.toXOXO(Arrays.asList(nested)));
  }

  public void testNestedArray(){
    Object[] arr = {"2","3"};
    Object[] nested = {"1",arr};
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(nestedListHTML,
                 xoxo.toXOXO(Arrays.asList(nested)));
  }

  public String dictHTML = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\">"
  + "<li><dl><dt>test</dt><dd>1</dd><dt>name</dt>"
  + "<dd>Kevin</dd></dl></li></ol>";

  public void testDictionary(){
    Map dict = new LinkedHashMap();
    dict.put("test", new Integer(1));
    dict.put("name", "Kevin");
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(dictHTML,
                 xoxo.toXOXO(dict));
  }

  public String singleHTML = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\">"
  + "<li>test</li></ol>";

  public void testSingleItem(){
    String item = "test";
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(singleHTML,
                 xoxo.toXOXO(item));
  }

  public void testWrapDiffers(){
    String item = "test";
    XOXOWriter xoxo = new XOXOWriter();
    String nowrap = xoxo.toXOXO(item);
    Object[] itemArr = {item};
    String wrap = xoxo.toXOXO(Arrays.asList(itemArr),true,null);
    assertFalse(wrap.equals(nowrap));
  }

  String startHTML = "<!DOCTYPE html PUBLIC \"-//W3C//DTD"
        + "XHTML 1.0 Transitional//EN\n"
        + "http://www.w3.org/TR/xhtml1/DTD/"
        + "xhtml1-transitional.dtd\">"
        + "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
        + "<head></head><body>";
  public String singleWrapHTML = "<ol "
  + "class=\"xoxo\">"
  + "<li>test</li></ol>";
  public String endHTML = "</body></html>";

  public void testWrapSingleItem(){
    String item = "test";
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(startHTML + singleWrapHTML + endHTML,
                 xoxo.toXOXO(item,true,null));
  }

  public void testXOXOParser(){
    XOXOParser parser = new XOXOParser();
    try{
      parser.parse(dictHTML);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testDictRoundTrip(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("test", "1");
    dict.put("name", "Kevin");
    String html = xoxo.toXOXO(dict);
     try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testListRoundTrip(){
    Object[] obj = {"1","2","3"};
    List testList = Arrays.asList(obj);
    XOXOWriter xoxo = new XOXOWriter();
    String html = xoxo.toXOXO(testList);
    XOXOParser parser = new XOXOParser();
    try{
      Object newList = parser.parse(html);
      assertEquals(testList,newList);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testListOfDictsRoundTrip(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("test", "1");
    dict.put("name", "Kevin");
    Map dict2 = new LinkedHashMap();
    dict2.put("one", "two");
    dict2.put("three", "four");
    dict2.put("five", "six");
    Object[] obj = {"1",dict,dict2};
    List testList = Arrays.asList(obj);
    String html = xoxo.toXOXO(testList);
    try{
      Object newList = parser.parse(html);
      assertEquals(testList,newList);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testListOfListsRoundTrip(){
    Object[] list1 = {"1","2","3"};
    Object[] list2 = {"4","5","6", Arrays.asList(list1)};
    Object[] list3 = {"7", Arrays.asList(list2)};
    Object[] list4 = {"8", Arrays.asList(list3)};
    List testList = Arrays.asList(list4);
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    String html = xoxo.toXOXO(testList);
    try{
      Object newList = parser.parse(html);
      assertEquals(testList,newList);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testDictOfListsRoundTrip(){
    Object[] list1 = {"1","2","3"};
    Object[] list2 = {"4","5","6"};
    Object[] list3 = {"7"};
    Object[] list4 = {"8", "9"};
    Map dict = new LinkedHashMap();
    dict.put("foo", Arrays.asList(list1));
    dict.put("bar", Arrays.asList(list2));
    dict.put("baz", Arrays.asList(list3));
    dict.put("qux", Arrays.asList(list4));
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    String html = xoxo.toXOXO(dict);
    try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String junkXOXO = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\">"
  + "bad<li><dl>worse<dt>good</dt><dd>buy</dd> now</dl></li></ol>";

  public void testXOXOJunkInContainers(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("good","buy");
    try{
      Object newDict = parser.parse(junkXOXO);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String junkElementXOXO = "<ol "
  + XHTML_DEC
  + "><li>bad<dl><dt>good</dt><dd>buy</dd></dl>"
  + "worse</li><li>bag<ol><li>OK</li></ol>fish</li></ol>";

  public void testXOXOjunkInElements(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("good","buy");
    Object[] ok = {"OK"};
    Object[] obj ={dict, Arrays.asList(ok)};
    List testList = Arrays.asList(obj);
    try{
      Object newList = parser.parse(junkElementXOXO);
      assertEquals(testList,newList);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String xoxoSpacesNewlines = "<ol " +  XHTML_DEC +
      " class='xoxo'> \n" +
      "  <li>\n" +
      "    <dl>\n" +
      "        <dt>text</dt>\n" +
      "        <dd>item 1</dd>\n" +
      "        <dt>description</dt>\n" +
      "        <dd> This item represents the main" +
      " point we're trying to make.</dd>\n" +
      "        <dt>url</dt>\n" +
      "        <dd>http://example.com/more.xoxo</dd>\n" +
      "        <dt>title</dt>\n" +
      "        <dd>title of item 1</dd>\n" +
      "        <dt>type</dt>\n" +
      "        <dd>text/xml</dd>\n" +
      "        <dt>rel</dt>\n" +
      "        <dd>help</dd>\n" +
      "    </dl>\n" +
      "  </li>\n" +
      "</ol>";

  public void testXOXOWithSpacesAndNewlines(){
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("text","item 1");
    dict.put("description"," This item represents the main" +
        " point we're trying to make.");
    dict.put("url","http://example.com/more.xoxo");
    dict.put("title","title of item 1");
    dict.put("type","text/xml");
    dict.put("rel","help");
    try{
      Object newDict = parser.parse(xoxoSpacesNewlines);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String xoxoSample = "<ol " +  XHTML_DEC +
      " class='xoxo'> \n" +
      "  <li>\n" +
      "    <dl>\n" +
      "        <dt>text</dt>\n" +
      "        <dd>item 1</dd>\n" +
      "        <dt>url</dt>\n" +
      "        <dd>http://example.com/more.xoxo</dd>\n" +
      "        <dt>title</dt>\n" +
      "        <dd>title of item 1</dd>\n" +
      "        <dt>type</dt>\n" +
      "        <dd>text/xml</dd>\n" +
      "        <dt>rel</dt>\n" +
      "        <dd>help</dd>\n" +
      "    </dl>\n" +
      "  </li>\n" +
      "</ol>";

  public String smartXOXOSample = "<ol " + XHTML_DEC +
      "class=\"xoxo\"> \n" +
      "  <li><a href=\"http://example.com/more.xoxo\"\n" +
      "         title=\"title of item 1\"\n" +
      "         type=\"text/xml\"\n" +
      "         rel=\"help\">item 1</a> \n" +
      "<!-- note how the \"text\" property is simply" +
      " the contents of the <a> element -->\n" +
      "  </li>\n" +
      "</ol>";

  public void testSpecialAttributeDecoding(){
    XOXOParser parser = new XOXOParser();
    try{
      Object xoxoDict = parser.parse(xoxoSample);
      Object xoxoDict2 = parser.parse(smartXOXOSample);
      assertEquals(xoxoDict,xoxoDict2);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String specialAttrHTML =  "<ol " + XHTML_DEC +
      "class=\"xoxo\">" +
      "<li><a href=\"http://example.com/more.xoxo\" title=\"sample url\" " +
      "rel=\"help\" type=\"text/xml\" >an example</a></li></ol>";

  public void testSpecialAttributeEncode(){
    XOXOWriter xoxo = new XOXOWriter();
    Map dict = new LinkedHashMap();
    dict.put("url","http://example.com/more.xoxo");
    dict.put("title","sample url");
    dict.put("type","text/xml");
    dict.put("rel","help");
    dict.put("text","an example");
    String html = xoxo.toXOXO(dict);
    assertEquals(specialAttrHTML,html);
  }

  public void testSpecialAttributeRoundTripFull(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("url","http://example.com/more.xoxo");
    dict.put("title","sample url");
    dict.put("type","text/xml");
    dict.put("rel","help");
    dict.put("text","an example");
    String html = xoxo.toXOXO(dict);
    try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testSpecialAttributeRoundTripNoText(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("url","http://example.com/more.xoxo");
    dict.put("title","sample url");
    dict.put("type","text/xml");
    dict.put("rel","help");
    String html = xoxo.toXOXO(dict);
    try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testSpecialAttributeRoundTripNoTextOrTitle(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("url","http://example.com/more.xoxo");
    dict.put("type","text/xml");
    dict.put("rel","help");
    String html = xoxo.toXOXO(dict);
    try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testUnicodeRoundTrip(){
    String s = "Tantek Çelik and a snowman ?";
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    String html = xoxo.toXOXO(s);
    try{
      Object newString = parser.parse(html);
      assertEquals(s,newString);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

}

PHP

xoxolib.php

<nowiki>
<?
function getKind($struct)
{
    if (!is_array($struct)) return 'string';
    if (!isset($struct[0]))
        $result = 'dictionary';
    else if (array_keys($struct)==range(0,count($struct)-1))
        $result = 'list';
    else
        $result = 'dictionary';
    return $result;
}
function makeXOXO($struct,$className='')
{
    $s='';
    $kind = getKind($struct);
    #echo "$kind:\n";
    #var_dump($struct);
    if ($kind=='list')
        {
        if ($className)
            $s .= "<ol class=\"$className\">";
        else 
            $s .= "<ol>";
        foreach ($struct as $key => $value)
            $s .= "<li>" . makeXOXO($value) ."</li>";
        $s .="</ol>";
        }
    else if ($kind=='dictionary')
        {
        if (isset($struct['url']))
            {
            $s .='<a href="' .$struct['url']. '" ';
            if (isset($struct['text']))
                $text= $struct['text'];
            else if (isset($struct['title']))
                $text= $struct['title'];
            else
                $text= $struct['url'];
            foreach (array('title','rel','type') as $attr)
                if (isset($struct[$attr]))
                    {
                    $s .= "$attr=\"" . $struct[$attr] .'" ';
                    unset($struct[$attr]);
                    }
            $s .= ">" . makeXOXO($text) ."</a>";
            unset($struct['url'],$struct['text']);
            }
        if (count($struct))
            {
            $s .="<dl>";
            foreach ($struct as $key => $value)
                $s .= "<dt>$key</dt><dd>". makeXOXO($value) . "</dd>";
            $s .= "</dl>";
            }
        }
    else
        $s .= "$struct";
    #echo "returned $s\n";
    return $s;
}
function toXOXO($struct,$addHTMLWrapper=FALSE,$cssUrl='')
{
    if (getKind($struct) != 'list')
        $struct = array($struct);
    $xoxo = makeXOXO($struct,'xoxo');
    if ($addHTMLWrapper)
        {
        $s= '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />';
        if ($cssUrl) $s .="<style type=\"text/css\" >@import \"$cssUrl\";</style>";
        $s .="</head><body>$xoxo</body></html>";
        return $s;
        }
    return $xoxo;
}


function pushStruct($struct,&$structstack,&$xostack,$structType)
{
    if (is_array($struct) && $structType=='dict' && count($structstack) && is_array(end($structstack)) && isset($structstack[count($structstack)-1]['url']) && end($structstack) != end($xostack))
        $xostack[] = &$structstack[count($structstack)-1]; # put back the <a>-made one for extra def's
    else
        {
        $structstack[]=$struct;
        $xostack[]=&$structstack[count($structstack)-1];
        }
}

function fromXOXO($html)
{
    $structs=array();
    $xostack=array();
    $textstack=array('');
    $dumpStacks=0;
    $p = xml_parser_create();
    xml_parse_into_struct($p, $html, $xoxoVals, $xoxoIndex);
    xml_parser_free($p);

  if($dumpStacks)
        {
        echo "<pre>";
        var_dump($xoxoVals);
        var_dump($xoxoIndex);
        echo "

";

       }
   $howmany = sizeof($xoxoVals);
   

#echo "

";
    $x = $xoxoIndex['OL'];
    for ($x=0;$x<$howmany;++$x)
        {
        if ($xoxoVals[$x]['tag'] == 'OL' || $xoxoVals[$x]['tag'] == 'DL'|| $xoxoVals[$x]['tag'] == 'UL')
            {
            if ($xoxoVals[$x]['tag'] == 'DL')
                $structType = 'dict';
            else 
                $structType = 'list';
            if ($xoxoVals[$x]['type'] == 'open')
                pushStruct(array(),$structs,$xostack,$structType);
            if ($xoxoVals[$x]['type'] == 'close')
                array_pop($xostack);
            if($dumpStacks)
                {
                echo $xoxoVals[$x]['type'] .' ' . $xoxoVals[$x]['tag'] .":\n";
                var_dump($structs);
                var_dump($xostack);
                }
            }
        if ($xoxoVals[$x]['tag'] == 'LI')
            {
            if ($xoxoVals[$x]['type'] == 'complete')
               array_push($xostack[count($xostack)-1],$xoxoVals[$x]['value']);
            if ($xoxoVals[$x]['type'] == 'close')
                {
                array_push($xostack[count($xostack)-1],array_pop($structs));
                }
            if($dumpStacks)
                {
                echo $xoxoVals[$x]['type'] .' ' . $xoxoVals[$x]['tag'] .":\n";
                var_dump($structs);
                var_dump($xostack);
                }
            }

        if ($xoxoVals[$x]['tag'] == 'DT')
            {
            if ($xoxoVals[$x]['type'] == 'complete')
                array_push($textstack,$xoxoVals[$x]['value']);
            }
        if ($xoxoVals[$x]['tag'] == 'DD')
            {
            if ($xoxoVals[$x]['type'] == 'complete')
                {
                $key = array_pop($textstack);
                $xostack[count($xostack)-1][$key] = $xoxoVals[$x]['value'];
                }
            if ($xoxoVals[$x]['type'] == 'close')
                {
                $key = array_pop($textstack);
                $xostack[count($xostack)-1][$key] =array_pop($structs);
                }
          if($dumpStacks)
                {
                echo $xoxoVals[$x]['type'] .' ' . $xoxoVals[$x]['tag'] .":\n";
                var_dump($structs);
                var_dump($xostack);
                }
            }
        if ($xoxoVals[$x]['tag'] == 'A')
            {
            if ($xoxoVals[$x]['type'] == 'complete')
                {
                $attrs = $xoxoVals[$x]['attributes'];
                $dict=array();
                foreach ($attrs as $key=> $value)
                    {
                    if ($key=='HREF')
                        $dict['url'] = $value;
                    else
                        $dict[strtolower($key)] = $value;
                    }
                $val = $xoxoVals[$x]['value'];
                if (isset($val) && ($val != $dict['title']) && ($val != $dict['url']))
                    $dict['text'] = $val;
                pushStruct($dict,$structs,$xostack,'dict');
                array_pop($xostack);
 
                if($dumpStacks)
                    {
                    echo $xoxoVals[$x]['type'] .' ' . $xoxoVals[$x]['tag'] .":\n";
                    var_dump($structs);
                    var_dump($xostack);
                    }
               }
            }
        }
     #echo "

";

  while (count($structs) == 1 && getKind($structs) == 'list')
       $structs = $structs[0];
   return $structs;

} ?>

</nowiki>

xoxotest.php

<nowiki>
<?
include("xoxolib.php");
function assertEqual($testname,$str1,$str2)
{
if ($str1 == $str2)
    echo "<h3>√ $testname </h3>";
else 
    {
    echo "<h3><big>☹</big> $testname failed</h3>";
    echo "<dl><dt>expected</dt>\n<dd>$str1</dd>\n<dt>returned</dt>\n<dd>$str2</dd>\n<dl>";
    
    }
}

function assertArrayEqual($testname,$expected,$returned)
{
if ($expected == $returned)
    echo "<h3>√ $testname </h3>";
else 
    {
    echo "<h3><big>☹</big> $testname failed</h3>";
    echo "<dl><dt>expected</dt>\n<dd><pre>";
    var_dump($expected);
    echo "

\n

returned

\n

";
   var_dump($returned);
echo "

\n

";
   }

}

function failIfEqual($testname,$str1,$str2) { if ($str1 != $str2)

echo "

√ $testname

";

else

   {
echo "

$testname failed

"; echo "
both were
$str1
"; } } $l = array('1','2','3'); $html = toXOXO($l); assertEqual('make xoxo from list','
  1. 1
  2. 2
  3. 3
',$html);

$s = 'test'; $html = toXOXO($s);

assertEqual("make xoxo from a string",'
  1. test
',$html);

$htmlwrap = toXOXO($s,TRUE); failIfEqual("make sure wrapped and unwrapped differ",html,htmlwrap);

assertEqual("make wrapped xoxo from a string",'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body>
  1. test
</body></html>',$htmlwrap);

$csswrap = toXOXO($s,TRUE,"reaptest.css");

assertEqual("make wrapped xoxo with css link from a string",'<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd"><html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><style type="text/css" >@import "reaptest.css";</style></head><body>
  1. test
</body></html>',$csswrap);

$l2 = array('1',array('2','3')); $html = toXOXO($l2);

assertEqual('make xoxo from nested list','
  1. 1
    1. 2
    2. 3
',$html);

$d = array(test=>'1'); $html = toXOXO($d);

assertEqual('make xoxo from 1-element dictionary','
  1. test
    1
',$html);

$d = array(test=>'1',name=>Kevin); $html = toXOXO($d);

assertEqual('make xoxo from dictionary','
  1. test
    1
    name
    Kevin
',$html);

$d = array('url'=>'http://example.com/more.xoxo','title'=>'sample url','type'=>"text/xml",'rel'=>'help','text'=>'an example'); $html = toXOXO($d);

assertEqual('make xoxo from dictionary with url','
  1. <a href="http://example.com/more.xoxo" title="sample url" rel="help" type="text/xml" >an example</a>
',$html);

$d = array('url'=>'http://example.com/more.xoxo','title'=>'sample url','type'=>"text/xml",'rel'=>'help','text'=>'an example','thing'=>'and another thing...'); $html = toXOXO($d);

assertEqual('make xoxo from dictionary with url and thing','
  1. <a href="http://example.com/more.xoxo" title="sample url" rel="help" type="text/xml" >an example</a>
    thing
    and another thing...
',$html);

$d = array('url'=>'http://example.com/more.xoxo','title'=>'sample url','type'=>"text/xml",'rel'=>'help','text'=>'an example','list'=>array('and', 'another','thing...')); $html = toXOXO($d);

assertEqual('make xoxo from dictionary with url and list','
  1. <a href="http://example.com/more.xoxo" title="sample url" rel="help" type="text/xml" >an example</a>
    list
    1. and
    2. another
    3. thing...
',$html);

$l = array('3',array('a'=>'2')); $html = toXOXO($l);

assertEqual('make xoxo from dict in list','
  1. 3
  2. a
    2
',$html);

$l = array('3','2','1'); $html = toXOXO($l); $newdl= fromXOXO($html); assertArrayEqual('list to xoxo and back',$l,$newdl); $l = array('1',array('a','b')); $html = toXOXO($l); $newdl= fromXOXO($html); assertArrayEqual('list of lists to xoxo and back',$l,$newdl);

$l= array('3',array('a','2'),array('b',array('1',array('c','4')))); $html = toXOXO($l); $newdl= fromXOXO($html); assertArrayEqual('list of list of lists to xoxo and back',$l,$newdl); $d = array(test=>'1',name=>Kevin); $html = toXOXO($d); $newd= fromXOXO($html); assertArrayEqual('dictionary to xoxo and back',$d,$newd);

$l = array('3',array('a'=>'2'),array('b'=>'1','c'=>'4')); $html = toXOXO($l); $newdl= fromXOXO($html); assertArrayEqual('list of dicts to xoxo and back',$l,$newdl); assertEqual('list of dicts to xoxo and back',$html,toXOXO($newdl)); $l = array('one'=>array('a'=>'2','b'=>'3'),'two'=>array('c'=>'4')); $html = toXOXO($l); $newdl= fromXOXO($html); assertArrayEqual('dict of dicts to xoxo and back',$l,$newdl); assertEqual('dict of dicts to xoxo and back',$html,toXOXO($newdl)); $l = array('one'=>array('a'=>'2','b'=>'3'),'url'=>'http://example.com'); $html = toXOXO($l); $newdl= fromXOXO($html); assertArrayEqual('dict of dicts with url to xoxo and back',$l,$newdl); assertEqual('dict of dicts with url to xoxo and back',$html,toXOXO($newdl)); $d = array('test'=> array('1','2'), 'name'=> 'Kevin','nestlist'=> array('a',array('b','c')), 'nestdict'=>array('e'=>'6','f'=>'7')); $html = toXOXO($d); $newd= fromXOXO($html); assertArrayEqual('dictionary of lists to xoxo and back',$d,$newd);

$d=fromXOXO('
    bad
  1. worse
    good
    buy
    now
');

assertArrayEqual('make sure text outside <li> etc is ignored',array(good=>buy),$d);

$l=fromXOXO('
  1. bad
    good
    buy
    worse
  2. bag
    1. OK
    fish
');

assertArrayEqual('make sure text within <li> but outside a subcontainer is ignored',array(array(good=>buy),array('OK')),$l);

$xoxoSample= "
  1. text
    item 1
    description
    This item represents the main point we're trying to make.
    url
    http://example.com/more.xoxo
    title
    title of item 1
    type
    text/xml
    rel
    help
";

$d = fromXOXO($xoxoSample); $d2=array('text'=>'item 1',

   'description'=>" This item represents the main point we're trying to make.",
   'url'=>'http://example.com/more.xoxo',
   'title'=>'title of item 1',
   'type'=>'text/xml',
   'rel'=>'help');

assertArrayEqual('unmung some xoxo with spaces in and check result is right',$d2,$d);

$xoxoSample= "
  1. text
    item 1
    url
    http://example.com/more.xoxo
    title
    title of item 1
    type
    text/xml
    rel
    help
";

$d = fromXOXO($xoxoSample);

$smartxoxoSample= "
  1. <a href=\"http://example.com/more.xoxo\" title=\"title of item 1\" type=\"text/xml\" rel=\"help\">item 1</a>
";

$d2 = fromXOXO($smartxoxoSample); assertArrayEqual('unmung some xoxo with <a href= rel= etc in and check result is right',$d,$d2);

$xoxoSample= "
  1. text
    item 1
    description
    This item represents the main point we're trying to make.
    url
    http://example.com/more.xoxo
    title
    title of item 1
    type
    text/xml
    rel
    help
";

$d = fromXOXO($xoxoSample);

$smartxoxoSample= "
  1. <a href=\"http://example.com/more.xoxo\" title=\"title of item 1\" type=\"text/xml\" rel=\"help\">item 1</a>
    description
    This item represents the main point we're trying to make.
";

$d2 = fromXOXO($smartxoxoSample); assertArrayEqual('unmung some xoxo with <a href= rel= etc in and check result is right',$d,$d2);

$d=array('url'=>'http://example.com/more.xoxo','title'=>'sample url','type'=>"text/xml",'rel'=>'help','text'=>'an example'); $html=toXOXO($d); assertArrayEqual('round trip url to href to url',$d,fromXOXO($html));

$d=array('url'=>'http://example.com/more.xoxo','title'=>'sample url','type'=>"text/xml",'rel'=>'help'); $html=toXOXO($d); assertArrayEqual('round trip url to href to url (no text)',$d,fromXOXO($html));

$d=array('url'=>'http://example.com/more.xoxo'); $html=toXOXO($d); assertArrayEqual('round trip url to href to url (just url)',$d,fromXOXO($html)); $kmattn=<<<ENDATTN

  1. <a href="http://www.boingboing.net/" title="Boing Boing Blog" >Boing Boing Blog</a>
    alturls
    1. <a href="http://boingboing.net/rss.xml" >xmlurl</a>
    description
    Boing Boing Blog
  2. <a href="http://www.financialcryptography.com/" title="Financial Cryptography" >Financial Cryptography</a>
    alturls
    1. <a href="http://www.financialcryptography.com/mt/index.rdf" >xmlurl</a>
    description
    Financial Cryptography
  3. <a href="http://hublog.hubmed.org/" title="HubLog" >HubLog</a>
    alturls
    1. <a href="http://hublog.hubmed.org/index.xml" >xmlurl</a>
    2. <a href="http://hublog.hubmed.org/foaf.rdf" >foafurl</a>
    description
    HubLog

ENDATTN; $d=fromXOXO($kmattn); $newattn = toXOXO($d); $d2=fromXOXO($newattn); assertArrayEqual('attention double round-trip',$d,$d2); assertEqual('attention triple round-trip',$newattn,toXOXO($d2)); assertEqual('attention one round-trip',$kmattn,$newattn); $d=array(array(url=>"http://www.boingboing.net/",title=>"Boing Boing Blog","alturls"=>array(array("url"=>"http://boingboing.net/rss.xml","text"=> "xmlurl")),"description"=>"Boing Boing Blog"),array(url=>"http://www.financialcryptography.com/",title=>"Financial Cryptography","alturls"=>array(array("url"=>"http://www.financialcryptography.com/mt/index.rdf","text"=> "xmlurl")),"description"=>"Financial Cryptography")); $attn=<<<ENDATTN

  1. <a href="http://www.boingboing.net/" title="Boing Boing Blog" >Boing Boing Blog</a>
    alturls
    1. <a href="http://boingboing.net/rss.xml" >xmlurl</a>
    description
    Boing Boing Blog
  2. <a href="http://www.financialcryptography.com/" title="Financial Cryptography" >Financial Cryptography</a>
    alturls
    1. <a href="http://www.financialcryptography.com/mt/index.rdf" >xmlurl</a>
    description
    Financial Cryptography

ENDATTN; assertEqual('attention encode',$attn,toXOXO($d));


?>

</nowiki>

other implementations