xoxo-sample-code

From Microformats Wiki
Revision as of 02:00, 9 August 2006 by Singpolyma (talk | contribs)
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

XOXO Sample Code

A whole bunch of open source (CC-by-2.0, Apache 2.0) sample code to read and write xoxo files in Python and Java (with Perl, PHP, ... to follow).

Python

xoxo.py

#!/usr/bin/python
# -*- coding: utf-8 -*-
"""xoxo.py - a utility module for transforming to and from the XHTMLOutlines format XOXO http://microformats.org/wiki/xoxo
toXOXO takes a Python datastructure (tuples, lists or dictionaries, arbitrarily nested) and returns a XOXO representation of it.
fromXOXO parses an XHTML file for a xoxo list and returns the structure
"""
__version__ = "0.9"
__date__ = "2005-11-02"
__author__ = "Kevin Marks <kmarks@technorati.com>"
__copyright__ = "Copyright 2004-2006, Kevin Marks & Technorati"
__license__ = "http://creativecommons.org/licenses/by/2.0/ CC-by-2.0], [http://www.apache.org/licenses/LICENSE-2.0 Apache 2.0"
__credits__ = """Tantek Çelik and Mark Pilgrim for data structure"""
__history__ = """
TODO: add <title> tag
TODO: add a proper profile link
0.9 smarter parsing for encoding and partial markup; fix dangling dictionary case
0.8 work in unicode then render to utf-8
0.7 initial encoding support - just utf-8 for now
0.6 support the special behaviour for url properties  to/from <a>
0.5 fix some awkward side effects of whitespace and text outside our expected tags; simplify writing code
0.4 add correct XHTML headers so it validates
0.3 read/write version; fixed invalid nested list generation;
0.1 first write-only version
"""

try:
    True, False
except NameError:
    True, False = not not 1, not 1
containerTags={'ol':False,'ul':False,'dl':False}
import sgmllib, urllib, urlparse, re,codecs

def toUnicode(key):
    if type(key) == type(u'unicode'):
        uKey= key
    else:
        try: 
            uKey=unicode(key,'utf-8')
        except:
            uKey=unicode(key,'windows_1252')
    return uKey

def makeXOXO(struct,className=None):
    s=u''
    if isinstance(struct,list) or isinstance(struct,tuple):
        if className:
            s += u'<ol class="%s">' % className
        else:
            s+= u"<ol>"
        for item in struct:
            s+=u"<li>" + makeXOXO(item,None)+"</li>"
        s +=u"</ol>"
    elif isinstance(struct,dict):
        d=struct.copy()
        if d.has_key('url'):
            uURL=toUnicode(d['url'])
            s+=u'<a href="%s" ' % uURL
            text =  d.get('text',d.get('title',uURL))
            for attr in ('title','rel','type'):
                if d.has_key(attr):
                    xVal = makeXOXO(d[attr],None)
                    s +=u'%s="%s" ' % (attr,xVal)
                    del d[attr]
            s +=u'>%s</a>' % makeXOXO(text,None)
            if d.has_key('text'):
                del d['text']
            del d['url']
        if len(d):
            s +=u"<dl>"
            for key,value in d.items():
                xVal = makeXOXO(value,None)
                uKey=toUnicode(key)
                s+= u'<dt>%s</dt><dd>%s</dd>' % (uKey, xVal)
            s +=u"</dl>"
    elif type(struct) == type(u'unicode'):
        s+=struct
    else:
        if not type(struct)==type(' '):
            struct=str(struct)
        s += toUnicode(struct)
    return s
class AttrParser(sgmllib.SGMLParser):
    def __init__(self):
        sgmllib.SGMLParser.__init__(self)
        self.text=[]
        self.encoding='utf-8'
    def cleanText(self,inText):
        if type(inText) == type(u'unicode'):
            inText = inText.encode(self.encoding,'replace')
        self.text=[]
        self.reset()
        self.feed(inText)
        return ''.join(self.text)
    def setEncoding(self,encoding):
        if 'ascii' in encoding:
            encoding='windows_1252' # so we don't throw an exception on high-bit set chars in there by mistake
        if encoding and not encoding =='text/html':
            try:
                canDecode = codecs.getdecoder(encoding)
                self.encoding = encoding
            except:
                try:
                    encoding='japanese.' +encoding
                    canDecode = codecs.getdecoder(encoding)
                    self.encoding = encoding
                except:
                    print "can't deal with encoding %s" % encoding
                    
    def handle_entityref(self, ref):
        # called for each entity reference, e.g. for "©", ref will be "copy"
        # map through to unicode where we can
        try:
            entity =htmlentitydefs.name2codepoint[ref]
            self.handleUnicodeData(unichr(entity))
        except:
            try:
                handle_charref(ref) # deal with char-ref's missing the '#' (see Akma)
            except:
                self.handle_data("&%s" % ref)

    def handle_charref(self, ref):
        # called for each character reference, e.g. for " ", ref will be "160"
        # Reconstruct the original character reference.
        try:
            if ref[0]=='x':
                self.handleUnicodeData(unichr(int(ref[1:],16)))
            else:
                self.handleUnicodeData(unichr(int(ref)))
        except:
            self.handle_data("&#%s" % ref)

# called for each block of plain text, i.e. outside of any tag and
# not containing any character or entity references
    def handle_data(self, text):
        if type(text)==type(u' '):
            self.handleUnicodeData(text)
        if self.encoding== 'utf-8':
            try:
                uText = unicode(text,self.encoding) #utf-8 is pretty clear when it is wrong
            except:
                uText = unicode(text,'windows_1252','ignore') # and this is the likely wrongness
        else:
            uText = unicode(text,self.encoding,'replace') # if they have really broken encoding, (eg lots of shift-JIS blogs)
        self.handleUnicodeData(uText)
    def handleUnicodeData(self, uText):
        self.text.append(uText)
        
class xoxoParser(AttrParser):
    def __init__(self):
        AttrParser.__init__(self)
        self.structs=[]
        self.xostack=[]
        self.textstack=['']
        self.attrparse = AttrParser()
    def normalize_attrs(self, attrs):
        attrs = [(k.lower(), self.attrparse.cleanText(v)) for k, v in attrs]
        attrs = [(k, k in ('rel','type') and v.lower() or v) for k, v in attrs]
        return attrs
    def setEncoding(self,encoding):
        AttrParser.setEncoding(self,encoding)
        self.attrparse.setEncoding(encoding)
    def pushStruct(self,struct):
        if type(struct) == type({}) and len(struct)==0 and len(self.structs) and type(self.structs[-1]) == type({}) and self.structs[-1].has_key('url') and self.structs[-1] != self.xostack[-1]:
            self.xostack.append(self.structs[-1]) # put back the <a>-made one for extra def's
        else:
            self.structs.append(struct)
            self.xostack.append(self.structs[-1])
    def do_meta(self, attributes):
        atts = dict(self.normalize_attrs(attributes))
        #print atts.encode('utf-8')
        if atts.has_key('http-equiv'):
            if atts['http-equiv'].lower() == "content-type":
                if atts.has_key('content'):
                    encoding = atts['content'].split('charset=')[-1]
                    self.setEncoding(encoding)
    def start_a(self,attrs):
        attrsD = dict(self.normalize_attrs(attrs))
        attrsD['url']= attrsD.get('href','')
        if attrsD.has_key('href'):
            del attrsD['href']
        self.pushStruct(attrsD)
        self.textstack.append('')
    def end_a(self):
        val = self.textstack.pop()
        if val: 
            if self.xostack[-1].get('title','') == val:
                val=''
            if self.xostack[-1]['url'] == val:
                val=''
            if val:
                self.xostack[-1]['text']=val
        self.xostack.pop()
    def start_dl(self,attrs):
        self.pushStruct({})
    def end_dl(self):
        self.xostack.pop()
    def start_ol(self,attrs):
        self.pushStruct([])
    def end_ol(self):
        self.xostack.pop()
    def start_ul(self,attrs):
        self.pushStruct([])
    def end_ul(self):
        self.xostack.pop()
    def start_li(self,attrs):
        self.textstack.append('')
    def end_li(self):
        val = self.textstack.pop()
        while ( self.structs[-1] != self.xostack[-1]):
            val = self.structs.pop()
            self.xostack[-1].append(val)
        if type(val) == type(' ') or type(val) == type(u' '):
            self.xostack[-1].append(val)
    def start_dt(self,attrs):
        self.textstack.append('')
    def end_dt(self):
        pass
    def start_dd(self,attrs):
        self.textstack.append('')
    def end_dd(self):
        val = self.textstack.pop()
        key = self.textstack.pop()
        if self.structs[-1] != self.xostack[-1]:
            val = self.structs.pop()
        self.xostack[-1][key]=val
    def handleUnicodeData(self, text):
        if len(self.stack) and containerTags.get(self.stack[-1],True): #skip text not within an element
            self.textstack[-1] += text
def toXOXO(struct,addHTMLWrapper=False,cssUrl=''):
    if type(struct) ==type((1,))or type(struct) ==type([1,]):
        inStruct = struct
    else:
        inStruct = [struct]
    if addHTMLWrapper:
        s= u'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN
http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />'''
        if cssUrl:
            s+=u'<style type="text/css" >@import "%s";</style>' % cssUrl
        s+=u"</head><body>%s</body></html>" % makeXOXO(inStruct,'xoxo')
        return s.encode('utf-8')
    else:
        return makeXOXO(inStruct,'xoxo').encode('utf-8')
    
def fromXOXO(html):
    parser = xoxoParser()
    #parser.feed(unicode(html,'utf-8'))
    parser.feed(html)
    #print parser.structs
    structs=[struct for struct in parser.structs if struct]
    #print structs
    while (len(structs) ==1 and type(structs)==type([1,])):
        structs=structs[0]
    return structs

# Allow direct invocation
# Read HTML from URL, parse into data structures, then re-output

import sys

if __name__ == "__main__":
  if len(sys.argv) < 2: raise SystemExit("Usage: "+sys.argv[0]+" url\n"+__doc__)
  url=sys.argv[1]
  file = urllib.urlopen(url)
  html=file.read(-1)
  file.close
  s=fromXOXO(html)
  p=toXOXO(s,True)
  print p

testxoxo.py

# -*- coding: utf-8 -*-
"""testxoxo.py 
Unit tests for xoxo.py
This file tests the functions in xoxo.py 
The underlying model here is http://diveintopython.org/unit_testing/index.html 

run from command line with
python testxoxo.py -v
"""
import xoxo
reload(xoxo)
import unittest

class xoxoTestCases(unittest.TestCase):
    
    def testSimpleList(self):
        '''make a xoxo file from a list'''
        l = ['1','2','3']
        html = xoxo.toXOXO(l)
        self.assertEqual(html,'<ol class="xoxo"><li>1</li><li>2</li><li>3</li></ol>')
    def testNestedList(self):
        '''make a xoxo file from a list with a list in'''
        l = ['1',['2','3']]
        html = xoxo.toXOXO(l)
        self.assertEqual(html,'<ol class="xoxo"><li>1</li><li><ol><li>2</li><li>3</li></ol></li></ol>')

    def testDictionary(self):
        '''make a xoxo file from a dictionary'''
        d = {'test':'1','name':'Kevin'}
        html = xoxo.toXOXO(d)
        self.assertEqual(html,'<ol class="xoxo"><li><dl><dt>test</dt><dd>1</dd><dt>name</dt><dd>Kevin</dd></dl></li></ol>')

    def testSingleItem(self):
        '''make a xoxo file from a string'''
        l = "test"
        html = xoxo.toXOXO(l)
        self.assertEqual(html,'<ol class="xoxo"><li>test</li></ol>')

    def testWrapDiffers(self):
        '''make a xoxo file from a string with and without html wrapper and check they are different'''
        l = "test"
        html = xoxo.toXOXO(l)
        htmlwrap =  xoxo.toXOXO(l,addHTMLWrapper=True)
        self.failIfEqual(html,htmlwrap)

    def testWrapSingleItem(self):
        '''make a wrapped xoxo file from a string'''
        l = "test"
        html = xoxo.toXOXO(l,addHTMLWrapper=True)
        self.assertEqual(html,'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN
http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /></head><body><ol class="xoxo"><li>test</li></ol></body></html>''')

    def testWrapItemWithCSS(self):
        '''make a wrapped xoxo file from a string'''
        l = "test"
        html = xoxo.toXOXO(l,addHTMLWrapper=True,cssUrl='reaptest.css')
        self.assertEqual(html,'''<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN
http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head profile=""><meta http-equiv="Content-Type" content="text/html; charset=utf-8" /><style type="text/css" >@import "reaptest.css";</style></head><body><ol class="xoxo"><li>test</li></ol></body></html>''')

    def testDictionaryRoundTrip(self):
        ''' make a dictionary into a xoxo file and back again; check it is the same'''
        d = {'test':'1','name':'Kevin'}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)
        
    def testDictionaryWithURLRoundTrip(self):
        ''' make a dictionary wiht an url in into a xoxo file and back again; check it is the same'''
        d = {'url':'http://example.com','name':'Kevin'}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)    
    def testNestedDictionaryRoundTrip(self):
        ''' make a dictionary with a dict in into a xoxo file and back again; check it is the same'''
        d = {'test':'1','inner':{'name':'Kevin'}}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)
    def testNestedDictionaryWithURLRoundTrip(self):
        ''' make a dictionary with an url and a dict into a xoxo file and back again; check it is the same'''
        d = {'url':'http://example.com','inner':{'name':'Kevin'}}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)
    def testNestedDictionariesWithURLsRoundTrip(self):
        ''' make a dictionary with an url and a dict with an url into a xoxo file and back again; check it is the same'''
        d = {'url':'http://example.com','inner':{'name':'Kevin','url':'http://slashdot.org'}}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)
    def testListRoundTrip(self):
        ''' make a list into a xoxo file and back again; check it is the same'''
        l = ['3','2','1']
        html = xoxo.toXOXO(l)
        newdl= xoxo.fromXOXO(html)
        self.assertEqual(l,newdl)
    def testListofDictsRoundTrip(self):
        ''' make a list of Dicts into a xoxo file and back again; check it is the same'''
        l = ['3',{'a':'2'},{'b':'1','c':'4'}]
        html = xoxo.toXOXO(l)
        newdl= xoxo.fromXOXO(html)
        self.assertEqual(l,newdl)
    def testListofListsRoundTrip(self):
        ''' make a list of Lists into a xoxo file and back again; check it is the same'''
        l = ['3',['a','2'],['b',['1',['c','4']]]]
        html = xoxo.toXOXO(l)
        newdl= xoxo.fromXOXO(html)
        self.assertEqual(l,newdl)
    def testDictofListsRoundTrip(self):
        ''' make a dict with lists in into a xoxo file and back again; check it is the same'''
        d = {'test':['1','2'],
        'name':'Kevin',
        'nestlist':['a',['b','c']],
        'nestdict':{'e':'6','f':'7'}}
        html = xoxo.toXOXO(d)
        newd = xoxo.fromXOXO(html)
        self.assertEqual(d,newd)

    def testXOXOjunkInContainers(self):
        '''make sure text outside <li> etc is ignored'''
        d=xoxo.fromXOXO('<ol>bad<li><dl>worse<dt>good</dt><dd>buy</dd> now</dl></li></ol>')
        self.assertEqual(d,{'good': 'buy'})

    def testXOXOjunkInElements(self):
        '''make sure text within <li> but outside a subcontainer is ignored'''
        l=xoxo.fromXOXO('<ol><li>bad<dl><dt>good</dt><dd>buy</dd></dl>worse</li><li>bag<ol><li>OK</li></ol>fish</li></ol>')
        self.assertEqual(l,[{'good': 'buy'},['OK']])

    def testXOXOWithSpacesAndNewlines(self):
        '''unmung some xoxo with spaces in and check result is right'''
        xoxoSample= '''<ol class='xoxo'> 
  <li>
    <dl>
        <dt>text</dt>
        <dd>item 1</dd>
        <dt>description</dt>
        <dd> This item represents the main point we're trying to make.</dd>
        <dt>url</dt>
        <dd>http://example.com/more.xoxo</dd>
        <dt>title</dt>
        <dd>title of item 1</dd>
        <dt>type</dt>
        <dd>text/xml</dd>
        <dt>rel</dt>
        <dd>help</dd>
    </dl>
  </li>
</ol>'''
        d = xoxo.fromXOXO(xoxoSample)
        d2={'text':'item 1',
            'description':" This item represents the main point we're trying to make.",
            'url':'http://example.com/more.xoxo',
            'title':'title of item 1',
            'type':'text/xml',
            'rel':'help'
            }
        xoxoAgain = xoxo.toXOXO(d)
        self.assertEqual(d,d2)
        #this needs a smarter whitespace-sensitive comparison
        #self.assertEqual(xoxoSample,xoxoAgain)

    def testSpecialAttributeDecoding(self):
        '''unmung some xoxo with <a href=' rel= etc in and check result is right'''
        xoxoSample= '''<ol class='xoxo'> 
  <li>
    <dl>
        <dt>text</dt>
        <dd>item 1</dd>
        <dt>url</dt>
        <dd>http://example.com/more.xoxo</dd>
        <dt>title</dt>
        <dd>title of item 1</dd>
        <dt>type</dt>
        <dd>text/xml</dd>
        <dt>rel</dt>
        <dd>help</dd>
    </dl>
  </li>
</ol>'''
        d = xoxo.fromXOXO(xoxoSample)
        smartxoxoSample= '''<ol class='xoxo'> 
  <li><a href="http://example.com/more.xoxo"
         title="title of item 1"
         type="text/xml"
         rel="help">item 1</a> 
<!-- note how the "text" property is simply the contents of the <a> element -->
  </li>
</ol>'''
        d2 = xoxo.fromXOXO(smartxoxoSample)
        self.assertEqual(d,d2)
    def testSpecialAttributeAndDLDecoding(self):
        '''unmung some xoxo with <a href=' rel= etc in plus a <dl> in the same item and check result is right'''
        xoxoSample= '''<ol class="xoxo"> 
  <li>
    <dl>
        <dt>text</dt>
        <dd>item 1</dd>
        <dt>description</dt>
        <dd> This item represents the main point we're trying to make.</dd>
        <dt>url</dt>
        <dd>http://example.com/more.xoxo</dd>
        <dt>title</dt>
        <dd>title of item 1</dd>
        <dt>type</dt>
        <dd>text/xml</dd>
        <dt>rel</dt>
        <dd>help</dd>
    </dl>
  </li>
</ol>'''
        d = xoxo.fromXOXO(xoxoSample)
        smartxoxoSample= '''<ol class="xoxo"> 
  <li><a href="http://example.com/more.xoxo"
         title="title of item 1"
         type="text/xml"
         rel="help">item 1</a> 
<!-- note how the "text" property is simply the contents of the <a> element -->
      <dl>
        <dt>description</dt>
          <dd> This item represents the main point we're trying to make.</dd>
      </dl>
  </li>
</ol>'''
        d2 = xoxo.fromXOXO(smartxoxoSample)
        self.assertEqual(d,d2)
    def testSpecialAttributeEncode(self):
        '''check it makes an <a href with a url parameter'''
        d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help','text':'an example'}
        html=xoxo.toXOXO(d)
        expectedHTML= '<ol class="xoxo"><li><a href="http://example.com/more.xoxo" title="sample url" rel="help" type="text/xml" >an example</a></li></ol>' 
        self.assertEqual(html,expectedHTML)
        
    def testSpecialAttributeRoundTripFull(self):
        '''check it makes an <a href with a url parameter'''
        d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help','text':'an example'}
        html=xoxo.toXOXO(d)
        self.assertEqual(d,xoxo.fromXOXO(html))
    def testSpecialAttributeRoundTripNoText(self):
        '''check it makes an <a href with a url parameter and no text attribute'''
        d={'url':'http://example.com/more.xoxo','title':'sample url','type':"text/xml",'rel':'help'}
        html=xoxo.toXOXO(d)
        self.assertEqual(d,xoxo.fromXOXO(html))
    def testSpecialAttributeRoundTripNoTextOrTitle(self):
        '''check it makes an <a href with a url parameter and no text or title attribute'''
        d={'url':'http://example.com/more.xoxo'}
        html=xoxo.toXOXO(d)
        self.assertEqual(d,xoxo.fromXOXO(html))
    def testAttentionRoundTrip(self):
        '''check nested <a> and <dl> and <a> are preserved'''
        kmattn='''<ol class="xoxo"><li><a href="http://www.boingboing.net/" title="Boing Boing Blog" >Boing Boing Blog</a><dl><dt>alturls</dt><dd><ol><li><a href="http://boingboing.net/rss.xml" >xmlurl</a></li></ol></dd><dt>description</dt><dd>Boing Boing Blog</dd></dl></li><li><a href="http://www.financialcryptography.com/" title="Financial Cryptography" >Financial Cryptography</a><dl><dt>alturls</dt><dd><ol><li><a href="http://www.financialcryptography.com/mt/index.rdf" >xmlurl</a></li></ol></dd><dt>description</dt><dd>Financial Cryptography</dd></dl></li><li><a href="http://hublog.hubmed.org/" title="HubLog" >HubLog</a><dl><dt>alturls</dt><dd><ol><li><a href="http://hublog.hubmed.org/index.xml" >xmlurl</a></li><li><a href="http://hublog.hubmed.org/foaf.rdf" >foafurl</a></li></ol></dd><dt>description</dt><dd>HubLog</dd></dl></li></ol>''';
        d = xoxo.fromXOXO(kmattn)
        newattn = xoxo.toXOXO(d)
        d2 = xoxo.fromXOXO(newattn)
        self.assertEqual(newattn,xoxo.toXOXO(d2))
        self.assertEqual(d,d2)
        self.assertEqual(kmattn,newattn)
        
    def testUnicodeRoundtrip(self):
        '''check unicode characters can go to xoxo and back'''
        src=unicode('Tantek \xc3\x87elik and a snowman \xe2\x98\x83','utf-8')
        html = xoxo.toXOXO(src)
        self.assertEqual(src,xoxo.fromXOXO(html))
    def testUtf8Roundtrip(self):
        '''check utf8 characters can go to xoxo and back'''
        src='Tantek \xc3\x87elik and a snowman \xe2\x98\x83'
        html = xoxo.toXOXO(src)
        self.assertEqual(src,xoxo.fromXOXO(html).encode('utf-8'))
    def testWindows1252Roundtrip(self):
        '''check 1252 characters can go to xoxo and back'''
        src='This is an evil\xa0space'
        html = xoxo.toXOXO(src)
        self.assertEqual(src,xoxo.fromXOXO(html).encode('windows-1252'))
if __name__ == "__main__":
    unittest.main()
else:
    runner = unittest.TextTestRunner()
    suite = unittest.makeSuite(xoxoTestCases,'test')
    runner.run(suite)

Java

XOXOWriter.java

/*
 * Copyright 2005 Robert Sayre
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Portions of this code are derived from the Apache-licensed Python XOXO
 * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code>
 */

package org.atompub.draft.xoxo;

import java.util.*;

public class XOXOWriter {

  public String[] attrs = {"title","rel","type"};

  public String makeXOXO(List struct, String className){
    return makeXOXO(struct, className, 0, true);
  }

  public String makeXOXO(List struct, String className,
                         boolean doNSDeclaration){
    return makeXOXO(struct, className, 0, doNSDeclaration);
  }

  public String makeXOXO(List struct){
    return makeXOXO(struct, "xoxo", 0, true);
  }

  public String makeXOXO(Object struct, int depth){
    return makeXOXO(struct, null, 0, false);
  }

  public String makeXOXO(Object struct, String className,
                         int depth, boolean doNSDeclaration){
    if(struct == null) return "";
    StringBuffer sb = new StringBuffer();
    if(struct instanceof Object[]){
      struct = Arrays.asList((Object[]) struct);
    }
    if(struct instanceof List){
      sb.append("<ol");
      if(doNSDeclaration)
        sb.append(" xmlns=\"http://www.w3.org/1999/xhtml\"");
      if(className != null){
        sb.append(" class=\"");
        sb.append(className);
        sb.append("\"");
      }
      sb.append(">");
    }
    if(struct instanceof Map){
      Map d = new LinkedHashMap((Map) struct);
      if(d.containsKey("url")){
        sb.append("<a href=\"" + d.get("url") + "\" ");
        Object text;
        if(d.containsKey("text")){
          text = d.get("text");
        }else if(d.containsKey("title")){
          text = d.get("title");
        }else{
          text = d.get("url");
        }
        for(int i=0; i<attrs.length; i++){
          String xVal = makeXOXO(d.get(attrs[i]),depth+1);
          if(xVal != null && !xVal.equals("")){
            sb.append(attrs[i] + "=\"" + xVal + "\" ");
          }
          d.remove(attrs[i]);
        }
        sb.append(">" + makeXOXO(text, depth+1) + "</a>");
        d.remove("text");
        d.remove("url");
      }
      if(d.size() > 0){
        sb.append("<dl>");
        for(Iterator i = d.keySet().iterator(); i.hasNext();){
          Object k = i.next();
          String ddVal = makeXOXO(d.get(k),depth+1);
          sb.append("<dt>" + k + "</dt>");
          sb.append("<dd>" + ddVal + "</dd>");
        }
        sb.append("</dl>");
      }
    }else if(struct instanceof List){
      List l = (List) struct;
      for(Iterator i = l.iterator(); i.hasNext();){
        Object item = i.next();
        sb.append("<li>" + makeXOXO(item,depth+1) + "</li>");
      }
      sb.append("</ol>");
    }else{
      sb.append(struct);
    }
    return sb.toString();
  }

  public String toXOXO(List struct){
    return toXOXO(struct, false, null);
  }

  public String toXOXO(Object struct){
    List alist = new ArrayList();
    alist.add(struct);
    return toXOXO(alist);
  }

  public String toXOXO(Object struct,
                       boolean addHTMLWrapper,
                       String cssUrl){
    List alist = new ArrayList();
    alist.add(struct);
    return toXOXO(alist, addHTMLWrapper, cssUrl);
  }

  public String toXOXO(List struct,
                       boolean addHTMLWrapper,
                       String cssUrl){
    String startHTML = "<!DOCTYPE html PUBLIC \"-//W3C//DTD"
        + "XHTML 1.0 Transitional//EN\n"
        + "http://www.w3.org/TR/xhtml1/DTD/"
        + "xhtml1-transitional.dtd\">"
        + "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
        + "<head>";
    if(addHTMLWrapper){
      String s = startHTML;
      if(cssUrl != null){
        s += "<style type=\"text/css\">@import \""
            + cssUrl + "\";</style>";
      }
      s += "</head><body>" + makeXOXO(struct, "xoxo", false)
          + "</body></html>";
      return s;
    }else{
      return makeXOXO(struct, "xoxo");
    }
  }
}

XOXOParser.java

This needs some small additions to handle the XHTML DTD and named character entities.

/*
 * Copyright 2005 Robert Sayre
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Portions of this code are derived from the Apache-licensed Python XOXO
 * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code>
 */

package org.atompub.draft.xoxo;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.Attributes;
import org.xml.sax.helpers.XMLReaderFactory;
import org.xml.sax.helpers.DefaultHandler;

import java.util.*;
import java.io.InputStream;
import java.io.StringReader;
import java.io.IOException;

public class XOXOParser extends DefaultHandler {

  protected String XHTML_NS = "http://www.w3.org/1999/xhtml";
  protected List elStack;
  protected Map listEls;
  public List structs;
  public List xoStack;
  public List textStack;

  public XOXOParser() {
    reset();
  }

  protected void pushStruct(Object struct){
    if((struct instanceof Map) && (((Map) struct).size() > 0)
        && (structs.get(structs.size()-1) instanceof Map)
        && (((Map) struct).containsKey("url"))){
      // put back the <a>-made one for extra defs
      xoStack.add(structs.get(structs.size()-1));
    }else{
      structs.add(struct);
      xoStack.add(struct);
    }
  }

  public void startElement(String nsUri, String localName,
                           String qName, Attributes atts){
    // bounce non-XHTML elements
    if(nsUri.equals(XHTML_NS)){
      elStack.add(localName);
    }else{
      elStack.add("foo");
      return;
    }

    if(localName.equals("a")){
      Map attmap = new LinkedHashMap();
      int len = atts.getLength();
      for(int i=0; i<len; i++){
        attmap.put(atts.getQName(i),atts.getValue(i));
      }
      if(attmap.containsKey("href")){
        attmap.put("url",attmap.get("href"));
        attmap.remove("href");
      }
      pushStruct(attmap);
      textStack.add("");
    }else if(localName.equals("dl")){
      pushStruct(new LinkedHashMap());
    }else if(localName.equals("ol")){
      pushStruct(new ArrayList());
    }else if(localName.equals("ul")){
      pushStruct(new ArrayList());
    }else if(localName.equals("li")){
      textStack.add("");
    }else if(localName.equals("dt")){
      textStack.add("");
    }else if(localName.equals("dd")){
      textStack.add("");
    }
  }

  public void endElement(String nsUri, String localName,
                         String qName){
    elStack.remove(elStack.size()-1);
    // bounce non-XHTML elements
    if(nsUri != XHTML_NS){
      return;
    }

    if(localName.equals("a")){
      String val = (String) textStack.remove(textStack.size()-1);
      if (val.length() > 0){
        Map defs = (Map) xoStack.get(xoStack.size()-1);
        String defVal = (String) defs.get("title");
        if((defVal != null) && (val.equals(defVal))){
          val = "";
        }
        defVal = (String) defs.get("url");
        if((defVal != null) && (val.equals(defVal))){
          val = "";
        }
        if(val.length() > 0){
          defs.put("text",val);
        }
      }
      xoStack.remove(xoStack.size()-1);
    }else if(localName.equals("dl")){
      xoStack.remove(xoStack.size()-1);
    }else if(localName.equals("ol")){
      xoStack.remove(xoStack.size()-1);
    }else if(localName.equals("ul")){
      xoStack.remove(xoStack.size()-1);
    }else if(localName.equals("li")){
      Object val = textStack.remove(textStack.size()-1);
      List last = (List) xoStack.get(xoStack.size()-1);
      if(structs.get(structs.size()-1) != last){
        val = structs.remove(structs.size()-1);
      }
      last.add(val);
    }else if(localName.equals("dd")){
      Object val = textStack.remove(textStack.size()-1);
      Object key = textStack.remove(textStack.size()-1);
      Map last = (Map) xoStack.get(xoStack.size()-1);
      if(structs.get(structs.size()-1) != last){
        val = structs.remove(structs.size()-1);
      }
      last.put(key,val);
    }
  }

  public void characters(char[] ch, int start, int length){
    if((xoStack.size() > 0)
        && (!listEls.containsKey(elStack.get(elStack.size()-1)))){
      String text = (String) textStack.get(textStack.size()-1);
      String test = new String(ch,start,length);
      textStack.set(textStack.size()-1,text+test);
    }
  }

  public Object parse(String s) throws SAXException, IOException{
    return parse(new InputSource(new StringReader(s)));
  }

  public Object parse(InputStream is) throws SAXException, IOException {
    return parse(new InputSource(is));
  }

  public Object parse(InputSource in) throws SAXException, IOException {
    XMLReader parser = XMLReaderFactory.createXMLReader();
    parser.setContentHandler(this);
    parser.parse(in);
    List returnList = new ArrayList();
    for(Iterator i = this.structs.iterator(); i.hasNext();){
      Object thing = i.next();
      if(thing != null){
        returnList.add(thing);
      }
    }
    while((returnList.size()==1)){
      if(returnList.get(0) instanceof List){
        returnList = (List) returnList.get(0);
      }else{
        reset();
        return returnList.get(0);
      }
    }
    reset();
    return returnList;
  }

  protected void reset(){
    elStack = new ArrayList();
    listEls = new HashMap();
    structs = new ArrayList();
    xoStack = new ArrayList();
    textStack = new ArrayList();
    listEls.put("ol","ol");
    listEls.put("ul","ul");
    listEls.put("dl","dl");
  }
}

XOXOTest.java

/*
 * Copyright 2005 Robert Sayre
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * Portions of this code are derived from the Apache-licensed Python XOXO
 * module by Kevin Marks. <http://microformats.org/wiki/xoxo-sample-code>
 */

package org.atompub.draft.xoxo.tests;

import junit.framework.TestSuite;
import junit.framework.TestCase;
import junit.textui.TestRunner;
import org.atompub.draft.xoxo.XOXOWriter;
import org.atompub.draft.xoxo.XOXOParser;

import java.util.*;

public class XOXOTest extends TestCase {

  public static void main(String[] args) {
    new TestRunner().doRun(new TestSuite(XOXOTest.class));
  }
  String XHTML_DEC = "xmlns=\"http://www.w3.org/1999/xhtml\" ";
  public String simpleListHTML = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\">"
  + "<li>1</li><li>2</li><li>3</li></ol>";

  public void testSimpleList(){
    String [] numbers = {"1","2","3"};
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(simpleListHTML,
                 xoxo.toXOXO(Arrays.asList(numbers)));
  }

  public void testStringIntegerList(){
    Object[] numbers = {new Integer(1),"2","3"};
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(simpleListHTML,
                 xoxo.toXOXO(Arrays.asList(numbers)));
  }

  public String nestedListHTML = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\"><li>1</li><li>"
  + "<ol><li>2</li><li>3</li></ol></li></ol>";

  public void testNestedList(){
    Object[] arr = {"2","3"};
    Object[] nested = {"1",Arrays.asList(arr)};
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(nestedListHTML,
                 xoxo.toXOXO(Arrays.asList(nested)));
  }

  public void testNestedArray(){
    Object[] arr = {"2","3"};
    Object[] nested = {"1",arr};
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(nestedListHTML,
                 xoxo.toXOXO(Arrays.asList(nested)));
  }

  public String dictHTML = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\">"
  + "<li><dl><dt>test</dt><dd>1</dd><dt>name</dt>"
  + "<dd>Kevin</dd></dl></li></ol>";

  public void testDictionary(){
    Map dict = new LinkedHashMap();
    dict.put("test", new Integer(1));
    dict.put("name", "Kevin");
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(dictHTML,
                 xoxo.toXOXO(dict));
  }

  public String singleHTML = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\">"
  + "<li>test</li></ol>";

  public void testSingleItem(){
    String item = "test";
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(singleHTML,
                 xoxo.toXOXO(item));
  }

  public void testWrapDiffers(){
    String item = "test";
    XOXOWriter xoxo = new XOXOWriter();
    String nowrap = xoxo.toXOXO(item);
    Object[] itemArr = {item};
    String wrap = xoxo.toXOXO(Arrays.asList(itemArr),true,null);
    assertFalse(wrap.equals(nowrap));
  }

  String startHTML = "<!DOCTYPE html PUBLIC \"-//W3C//DTD"
        + "XHTML 1.0 Transitional//EN\n"
        + "http://www.w3.org/TR/xhtml1/DTD/"
        + "xhtml1-transitional.dtd\">"
        + "<html xmlns=\"http://www.w3.org/1999/xhtml\">"
        + "<head></head><body>";
  public String singleWrapHTML = "<ol "
  + "class=\"xoxo\">"
  + "<li>test</li></ol>";
  public String endHTML = "</body></html>";

  public void testWrapSingleItem(){
    String item = "test";
    XOXOWriter xoxo = new XOXOWriter();
    assertEquals(startHTML + singleWrapHTML + endHTML,
                 xoxo.toXOXO(item,true,null));
  }

  public void testXOXOParser(){
    XOXOParser parser = new XOXOParser();
    try{
      parser.parse(dictHTML);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testDictRoundTrip(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("test", "1");
    dict.put("name", "Kevin");
    String html = xoxo.toXOXO(dict);
     try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testListRoundTrip(){
    Object[] obj = {"1","2","3"};
    List testList = Arrays.asList(obj);
    XOXOWriter xoxo = new XOXOWriter();
    String html = xoxo.toXOXO(testList);
    XOXOParser parser = new XOXOParser();
    try{
      Object newList = parser.parse(html);
      assertEquals(testList,newList);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testListOfDictsRoundTrip(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("test", "1");
    dict.put("name", "Kevin");
    Map dict2 = new LinkedHashMap();
    dict2.put("one", "two");
    dict2.put("three", "four");
    dict2.put("five", "six");
    Object[] obj = {"1",dict,dict2};
    List testList = Arrays.asList(obj);
    String html = xoxo.toXOXO(testList);
    try{
      Object newList = parser.parse(html);
      assertEquals(testList,newList);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testListOfListsRoundTrip(){
    Object[] list1 = {"1","2","3"};
    Object[] list2 = {"4","5","6", Arrays.asList(list1)};
    Object[] list3 = {"7", Arrays.asList(list2)};
    Object[] list4 = {"8", Arrays.asList(list3)};
    List testList = Arrays.asList(list4);
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    String html = xoxo.toXOXO(testList);
    try{
      Object newList = parser.parse(html);
      assertEquals(testList,newList);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testDictOfListsRoundTrip(){
    Object[] list1 = {"1","2","3"};
    Object[] list2 = {"4","5","6"};
    Object[] list3 = {"7"};
    Object[] list4 = {"8", "9"};
    Map dict = new LinkedHashMap();
    dict.put("foo", Arrays.asList(list1));
    dict.put("bar", Arrays.asList(list2));
    dict.put("baz", Arrays.asList(list3));
    dict.put("qux", Arrays.asList(list4));
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    String html = xoxo.toXOXO(dict);
    try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String junkXOXO = "<ol "
  + XHTML_DEC
  + "class=\"xoxo\">"
  + "bad<li><dl>worse<dt>good</dt><dd>buy</dd> now</dl></li></ol>";

  public void testXOXOJunkInContainers(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("good","buy");
    try{
      Object newDict = parser.parse(junkXOXO);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String junkElementXOXO = "<ol "
  + XHTML_DEC
  + "><li>bad<dl><dt>good</dt><dd>buy</dd></dl>"
  + "worse</li><li>bag<ol><li>OK</li></ol>fish</li></ol>";

  public void testXOXOjunkInElements(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("good","buy");
    Object[] ok = {"OK"};
    Object[] obj ={dict, Arrays.asList(ok)};
    List testList = Arrays.asList(obj);
    try{
      Object newList = parser.parse(junkElementXOXO);
      assertEquals(testList,newList);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String xoxoSpacesNewlines = "<ol " +  XHTML_DEC +
      " class='xoxo'> \n" +
      "  <li>\n" +
      "    <dl>\n" +
      "        <dt>text</dt>\n" +
      "        <dd>item 1</dd>\n" +
      "        <dt>description</dt>\n" +
      "        <dd> This item represents the main" +
      " point we're trying to make.</dd>\n" +
      "        <dt>url</dt>\n" +
      "        <dd>http://example.com/more.xoxo</dd>\n" +
      "        <dt>title</dt>\n" +
      "        <dd>title of item 1</dd>\n" +
      "        <dt>type</dt>\n" +
      "        <dd>text/xml</dd>\n" +
      "        <dt>rel</dt>\n" +
      "        <dd>help</dd>\n" +
      "    </dl>\n" +
      "  </li>\n" +
      "</ol>";

  public void testXOXOWithSpacesAndNewlines(){
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("text","item 1");
    dict.put("description"," This item represents the main" +
        " point we're trying to make.");
    dict.put("url","http://example.com/more.xoxo");
    dict.put("title","title of item 1");
    dict.put("type","text/xml");
    dict.put("rel","help");
    try{
      Object newDict = parser.parse(xoxoSpacesNewlines);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String xoxoSample = "<ol " +  XHTML_DEC +
      " class='xoxo'> \n" +
      "  <li>\n" +
      "    <dl>\n" +
      "        <dt>text</dt>\n" +
      "        <dd>item 1</dd>\n" +
      "        <dt>url</dt>\n" +
      "        <dd>http://example.com/more.xoxo</dd>\n" +
      "        <dt>title</dt>\n" +
      "        <dd>title of item 1</dd>\n" +
      "        <dt>type</dt>\n" +
      "        <dd>text/xml</dd>\n" +
      "        <dt>rel</dt>\n" +
      "        <dd>help</dd>\n" +
      "    </dl>\n" +
      "  </li>\n" +
      "</ol>";

  public String smartXOXOSample = "<ol " + XHTML_DEC +
      "class=\"xoxo\"> \n" +
      "  <li><a href=\"http://example.com/more.xoxo\"\n" +
      "         title=\"title of item 1\"\n" +
      "         type=\"text/xml\"\n" +
      "         rel=\"help\">item 1</a> \n" +
      "<!-- note how the \"text\" property is simply" +
      " the contents of the <a> element -->\n" +
      "  </li>\n" +
      "</ol>";

  public void testSpecialAttributeDecoding(){
    XOXOParser parser = new XOXOParser();
    try{
      Object xoxoDict = parser.parse(xoxoSample);
      Object xoxoDict2 = parser.parse(smartXOXOSample);
      assertEquals(xoxoDict,xoxoDict2);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public String specialAttrHTML =  "<ol " + XHTML_DEC +
      "class=\"xoxo\">" +
      "<li><a href=\"http://example.com/more.xoxo\" title=\"sample url\" " +
      "rel=\"help\" type=\"text/xml\" >an example</a></li></ol>";

  public void testSpecialAttributeEncode(){
    XOXOWriter xoxo = new XOXOWriter();
    Map dict = new LinkedHashMap();
    dict.put("url","http://example.com/more.xoxo");
    dict.put("title","sample url");
    dict.put("type","text/xml");
    dict.put("rel","help");
    dict.put("text","an example");
    String html = xoxo.toXOXO(dict);
    assertEquals(specialAttrHTML,html);
  }

  public void testSpecialAttributeRoundTripFull(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("url","http://example.com/more.xoxo");
    dict.put("title","sample url");
    dict.put("type","text/xml");
    dict.put("rel","help");
    dict.put("text","an example");
    String html = xoxo.toXOXO(dict);
    try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testSpecialAttributeRoundTripNoText(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("url","http://example.com/more.xoxo");
    dict.put("title","sample url");
    dict.put("type","text/xml");
    dict.put("rel","help");
    String html = xoxo.toXOXO(dict);
    try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testSpecialAttributeRoundTripNoTextOrTitle(){
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    Map dict = new LinkedHashMap();
    dict.put("url","http://example.com/more.xoxo");
    dict.put("type","text/xml");
    dict.put("rel","help");
    String html = xoxo.toXOXO(dict);
    try{
      Object newDict = parser.parse(html);
      assertEquals(dict,newDict);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

  public void testUnicodeRoundTrip(){
    String s = "Tantek Çelik and a snowman ?";
    XOXOWriter xoxo = new XOXOWriter();
    XOXOParser parser = new XOXOParser();
    String html = xoxo.toXOXO(s);
    try{
      Object newString = parser.parse(html);
      assertEquals(s,newString);
    }catch (Exception e){
      fail(e.getMessage());
      e.printStackTrace();
    }
  }

}

PHP