Skip to content

Commit

Permalink
Added unicode pre- and post-processing
Browse files Browse the repository at this point in the history
  • Loading branch information
fyngyrz committed Jan 7, 2019
1 parent 4ed3f63 commit 9645fb5
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 8 deletions.
92 changes: 86 additions & 6 deletions aa_macro.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,12 @@ class macro(object):
you written your congresscritter about patent and
copyright reform yet?
Incep Date: June 17th, 2015 (for Project)
LastRev: January 6th, 2019 (for Class)
LastDocRev: January 6th, 2019 (for Class)
LastRev: January 7th, 2019 (for Class)
LastDocRev: January 7th, 2019 (for Class)
Version:
"""
def version_set(self):
return('1.0.133 Beta')
return('1.0.134 Beta')
"""
Tab spacing: 4 (set your editor to this for sane formatting while reading)
Dev Env: OS X 10.6.8, Python 2.6.1 from inception
Expand Down Expand Up @@ -467,13 +467,34 @@ class macro() is for authoring by you, the person who ISN'T trying to
noembrace -- False (default) or True disables [embrace] built-in
noinclude -- False (default) or True disables [include] built-in
back ------- ffffff (default) HEX3 or HEX color for background color in HTML 4.01s mode
ucin ------- False (default) presumes input is 0-127 ASCII; True is Unicode
ucout ------ False (default) output is ASCII; True converts output to Unicode
dothis ----- None (default) you can pass in initial text to be processed here if you like
the object returns the result in its string method:
mod = macro(dothis='[style x foo [b]]'{x bar})
print mod # prints 'foo bar'
print mod # prints 'foo bar' if the data is ASCII
if it is unicode, you need to do a little more
Unicode
=======
Unicode presents encoding issues for Python 2.7
The following examples show how to deal with
unicode in the context of aa_macro:
Processing unicode input to ASCII / HTML output:
------------------------------------------------
mod = macro(ucin=True)
s = mod.unido(testBlock) # s will be ASCII with unicode HTML entities
Processing Unicode input to Unicode output:
-------------------------------------------
mod = macro(ucin=True,ucout=True)
mod.unido(testBlock)
s = mod.uniget() # s will be unicode
The Rules:
----------
o Unicode requires specific processing as shown above.
o Do not attempt to define one style inside another.
o Style names may contain anything but a space or a newline
o repeat gets a number or a variable parameter. Nothing else. No nesting in the parameter!
Expand All @@ -490,12 +511,14 @@ class macro() is for authoring by you, the person who ISN'T trying to
content can have commas, but the macro system won't see them. Of course, you can't
use that on anything that *needs* commas for parameters. Life is so complicated. :)
"""
def __init__(self,dothis=None,mode='3.2',back="ffffff",nodinner=False,noshell=False,noinclude=False,noembrace=False,debug=False,locklipath='',lockwepath='',xlimit=0,dlimit=0):
def __init__(self,dothis=None,mode='3.2',back="ffffff",nodinner=False,noshell=False,noinclude=False,noembrace=False,debug=False,locklipath='',lockwepath='',xlimit=0,dlimit=0,ucin=False,ucout=False):
self.locklipath = locklipath
self.lockwepath = lockwepath
self.xlimit = xlimit
self.dlimit = dlimit
self.xdcount = 0
self.ucin = ucin
self.ucout = ucout
self.lipath = locklipath
self.wepath = lockwepath
self.setMode(mode)
Expand Down Expand Up @@ -595,11 +618,68 @@ def __init__(self,dothis=None,mode='3.2',back="ffffff",nodinner=False,noshell=Fa
self.months = ['January','February','March','April','may','June','July','August','September','October','November','December']
self.setup_urle()
if dothis != None:
self.do(dothis)
if self.ucin == True:
self.unido(dothis)
else:
self.do(dothis)

def __str__(self):
return self.result

def uniget(self):
if self.ucout == True:
self.result = self.asciitounicode(self.result)
return self.result

def unido(self,text):
tmp = unicode(text)
text = self.unicodetoascii(tmp) # convert input unicode to ASCII
self.do(text)
return self.result

def asciitounicode(self,text):
state = 0 # nothing detected
accum = u''
o = u''
for c in text:
if state == 0: # nothing as yet?
if c == u'&': # ampersand?
state = 1 # ampersand!
else:
o += c
elif state == 1: # ampersand found?
if c == u'#': # hash?
state = 2 # hash!
accum = u'' # clear accumulator
else: # not a hash, so not an entity encoding
state = 0 # abort
o += u'&'+c # flush char, done
elif state == 2: # expecting digits or terminating semicolon
if c.isdigit(): # digit?
accum += c # add it to accumulator if so
elif c == u';': # terminating
s = u'\\U%08x' % (int(accum))
ss= s.decode('unicode-escape')
o += ss
state = 0
else: # bad encoding?
o += u'&#'
o += accum
state = 0
return o

def unicodetoascii(self,text):
# global ucrep
o = ''
n = len(text)
for i in range(0,n):
try:
c = text[i].encode("ascii")
o += c
except:
o += '&#{:d};'.format(ord(text[i]))
return o

def setDebug(self,db):
if db != False:
self.debug = True
Expand Down
10 changes: 9 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,16 @@ This log reflects changes to the aa_macro.py import library. Other changes
such as to the associated utilities and sample files are not tracked here.

### Log
1.0.134
* added unicode pre- and post-processing

1.0.133
* added [gstyle] and [style] secondary help strings
* added [helpg2 stylename] and [helps2 stylename] to return secondary style help strings

1.0.132
* added [helpg] and [helps] to return style help strings
* added [gstyle] and [style] help strings
* added [helpg stylename] and [helps stylename] to return style help strings

1.0.131
* added (digits=decdigits,) option to [round value]
Expand Down
31 changes: 30 additions & 1 deletion test_aa_macro.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#!/usr/bin/python

import unittest
import codecs
from aa_macro import *
import difflib
import sys
Expand Down Expand Up @@ -58,11 +59,39 @@ def test_aa_macro(self):
expect = 'expected.html'
badout = 'badoutput.html'

# read unicode sample:
try:
fh = codecs.open('unisample.txt', encoding='utf-8')
testBlock = fh.read()
fh.close()
st = str(type(testBlock))
if st != "<type 'unicode'>": print 'failure to convert file to unicode'
except:
print 'failed to read unicode sample'

# process unicode to ASCII:
try:
umod = macro(ucin=True)
s = umod.unido(testBlock)
st = str(type(s))
if st != "<type 'str'>": print 'failure to convert unicode to ASCII'
except:
print 'failed to process unicode to ASCII'

# process unicode to unicode:
try:
umod = macro(ucin=True,ucout=True)
umod.unido(testBlock)
s = umod.uniget()
st = str(type(s))
if st != "<type 'unicode'>": print 'failure to process unicode to unicode'
except:
print 'failed to process unicode to unicode'

rebuild = 1
fh = open('mactest.txt')
testBlock = fh.read()
fh.close()
# mod = macro()
mod = macro(debug=True)
output = mod.do(testBlock)
dtrace = mod.getdebug()
Expand Down
4 changes: 4 additions & 0 deletions unisample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[b bold]
๐ŸŒฎ๐Ÿ’ฉ๐Ÿ•๐Ÿ๐Ÿท๐Ÿฑโš“๐Ÿ”—โœ…โœ”๏ธโ˜‘๏ธ๐Ÿ’ƒโš ๏ธ๐Ÿ˜Š
๐Ÿ†๐Ÿฅœ
[i italics]

0 comments on commit 9645fb5

Please sign in to comment.