Difference between revisions of "Databar decoding"
m |
m (→Patent) |
||
Line 27: | Line 27: | ||
[[File:Pocorgtfo12-US4550247-1.png|600px]][[File:Pocorgtfo12-US4550247-5.png|600px]] |
[[File:Pocorgtfo12-US4550247-1.png|600px]][[File:Pocorgtfo12-US4550247-5.png|600px]] |
||
+ | ==Python decoder== |
||
+ | With the patent in hands, it's doable to write a Python script to decode the symbols and do some consistency checks and interpretations. |
||
+ | |||
+ | But first we've to extract the images from the Poc||GTFO 12 pdf: |
||
+ | |||
+ | <source lang=bash> |
||
+ | pdftk pocorgtfo12.pdf cat 53-54 output databar.pdf |
||
+ | pdfimages -all databar.pdf databar |
||
+ | </source> |
||
+ | |||
+ | You can download the following Python script [{{#file: oscar.py}} as oscar.py]: |
||
+ | <source lang=python> |
||
+ | #!/usr/bin/env python |
||
+ | |||
+ | import sys |
||
+ | from PIL import Image |
||
+ | |||
+ | debuglevel=0 |
||
+ | |||
+ | # Reference: patent US4550247 |
||
+ | conv={ |
||
+ | 0b0100110:0x0, 0b0101010:0x1, 0b0110010:0x2, 0b0110110:0x3, |
||
+ | 0b1011010:0x4, 0b1010010:0x5, 0b1010110:0x6, 0b1001010:0x7, |
||
+ | 0b0100101:0x8, 0b0101101:0x9, 0b0101001:0xa, 0b0110101:0xb, |
||
+ | 0b1011001:0xc, 0b1010101:0xd, 0b1001101:0xe, 0b1001001:0xf, |
||
+ | 0b1111111:0x10} |
||
+ | |||
+ | class Pix: |
||
+ | def __init__(self, filename): |
||
+ | self.im = Image.open(filename).convert('1') |
||
+ | self.pixels = list(self.im.getdata()) |
||
+ | self.width, self.height = self.im.size |
||
+ | def getpix(self, x, y): |
||
+ | return self.pixels[x+y*self.width] |
||
+ | def getline(self, y): |
||
+ | return self.pixels[y*self.width:(y+1)*self.width] |
||
+ | def getavgline(self, y, radius): |
||
+ | lines=[] |
||
+ | lines.append(self.getline(lineindex)) |
||
+ | for i in range(radius): |
||
+ | lines.append(self.getline(lineindex-i)) |
||
+ | lines.append(self.getline(lineindex+i)) |
||
+ | line=[] |
||
+ | for i in range(self.width): |
||
+ | line.append(sum([lines[x][i] |
||
+ | for x in range(radius*2+1)]) < 255*radius) |
||
+ | return line |
||
+ | |||
+ | def line2nibbles(v): |
||
+ | factor_learn=0.5 |
||
+ | factor_squeeze=0.95 |
||
+ | # train width on START seq |
||
+ | # train ones and zeroes separately due to image contrast |
||
+ | smallw=[(v[1][1]+v[3][1]+v[5][1])/3.0*factor_squeeze, |
||
+ | (v[0][1]+v[2][1]+v[4][1])/3.0*factor_squeeze] |
||
+ | if debuglevel > 1: |
||
+ | print "bitwidths:", smallw |
||
+ | symbols=[] |
||
+ | currentsymbol=1 |
||
+ | for i in range(len(v)): |
||
+ | b,w = v[i] |
||
+ | if debuglevel > 2: |
||
+ | print w, smallw[currentsymbol], w/smallw[currentsymbol], |
||
+ | print round(w/smallw[currentsymbol],0) |
||
+ | if w < smallw[currentsymbol] * 1.5: |
||
+ | guess=1 |
||
+ | elif w > smallw[currentsymbol] * 5: |
||
+ | guess=8 # max is EOL + last bit==1 |
||
+ | else: |
||
+ | guess=2 |
||
+ | if guess <=2: |
||
+ | sw=float(w)/guess |
||
+ | smallw[currentsymbol]=factor_squeeze*\ |
||
+ | (smallw[currentsymbol]/factor_squeeze+sw*factor_learn)/\ |
||
+ | (1+factor_learn) |
||
+ | if guess <=2: |
||
+ | for k in range(guess): |
||
+ | symbols.append(b) |
||
+ | else: #EOL |
||
+ | for k in range(guess): |
||
+ | symbols.append(1) |
||
+ | currentsymbol^=1 |
||
+ | if debuglevel > 1: |
||
+ | print "symbols:", len(symbols) |
||
+ | if debuglevel > 2: |
||
+ | print symbols |
||
+ | nibbles=[] |
||
+ | nn=0 |
||
+ | while symbols: |
||
+ | s, symbols=symbols[:7],symbols[7:] |
||
+ | bit7=0 |
||
+ | for k in s: |
||
+ | bit7<<=1 |
||
+ | bit7+=k |
||
+ | if debuglevel > 2: |
||
+ | print "symbols #%02i:" % nn, s, ("7-bit:%x" % bit7), |
||
+ | nn+=1 |
||
+ | assert bit7 in conv |
||
+ | if debuglevel > 2: |
||
+ | print "conv: %x" % conv[bit7] |
||
+ | if conv[bit7]<0x10: |
||
+ | nibbles.append(conv[bit7]) |
||
+ | else: |
||
+ | assert len(symbols)<=1 |
||
+ | break |
||
+ | return nibbles |
||
+ | |||
+ | def findlines(pix): |
||
+ | # find middle of horizontal lines |
||
+ | # candlines = lines with at least 20% black |
||
+ | candlines=[sum(pix.getline(i))<(255*pix.width)*0.80 |
||
+ | for i in xrange(pix.height)] |
||
+ | goodlines=[] |
||
+ | offset=0 |
||
+ | while True in candlines: |
||
+ | startline=candlines.index(True) |
||
+ | stopline=candlines.index(False,startline) |
||
+ | candlines=candlines[stopline:] |
||
+ | oldoffset, offset=offset, offset+stopline |
||
+ | bandwidth=stopline - startline |
||
+ | if bandwidth < 30: |
||
+ | continue |
||
+ | midline=startline+(bandwidth/2)+oldoffset |
||
+ | if debuglevel > 0: |
||
+ | print "Found %i-px wide line at y=%i" % (bandwidth, midline) |
||
+ | goodlines.append((bandwidth, midline)) |
||
+ | return goodlines |
||
+ | |||
+ | def linepx2pulses(linepx): |
||
+ | pulses=[] |
||
+ | while linepx: |
||
+ | if linepx[0]==0: |
||
+ | if 1 in linepx: |
||
+ | stop=linepx.index(1) |
||
+ | linepx=linepx[stop:] |
||
+ | else: |
||
+ | stop=len(linepx) |
||
+ | linepx=[] |
||
+ | if len(pulses)>0 and 1 in linepx: |
||
+ | pulses.append((0,stop)) |
||
+ | else: |
||
+ | if 0 in linepx: |
||
+ | stop=linepx.index(0) |
||
+ | linepx=linepx[stop:] |
||
+ | else: |
||
+ | stop=len(linepx) |
||
+ | linepx=[] |
||
+ | pulses.append((1,stop)) |
||
+ | # where is the END marker? |
||
+ | # revert line if needed |
||
+ | if pulses[0][1] > pulses[-1][1]: |
||
+ | pulses=pulses[::-1] |
||
+ | return pulses |
||
+ | |||
+ | def parsenibbles(nibbles): |
||
+ | if debuglevel > 1: |
||
+ | print "raw: ", ''.join(["%x" % x for x in nibbles]) |
||
+ | rawlen=len(nibbles) |
||
+ | assert nibbles[0]==0xd |
||
+ | nibbles.pop(0) |
||
+ | assert len(nibbles)%2==0 |
||
+ | bytes=[] |
||
+ | for i in range(len(nibbles)/2): |
||
+ | bytes.append((nibbles[i*2+1]<<4) + nibbles[i*2]) |
||
+ | linenumber=bytes[0] |
||
+ | if debuglevel > 0: |
||
+ | print "linenumber: %3i" % linenumber |
||
+ | functionbyte=bytes[1] |
||
+ | assert functionbyte < 9 |
||
+ | function=['GENERATE TONES', 'CONTROL ARRAY', 'RAM FIRMWARE', |
||
+ | 'DATA LINE3', 'DATA LINE4', 'DATA LINE5', 'DATA LINE6', |
||
+ | 'DATA LINE7', 'ENDFW'][functionbyte] |
||
+ | if debuglevel > 0: |
||
+ | print "function: %3i = %s" % (functionbyte, function) |
||
+ | lastnibblenumber=bytes[2] |
||
+ | assert lastnibblenumber==rawlen |
||
+ | if function == 'CONTROL ARRAY': |
||
+ | assert linenumber==0 |
||
+ | mtbyte=bytes[3] |
||
+ | assert 0 < mtbyte < 10 |
||
+ | machinetype=['Atari400/800/1200', 'Commodore', 'TI99/4', |
||
+ | 'Timex 1000', 'Timex 2000', 'RADIO SHACK', |
||
+ | 'RS-232', 'Commodore no loader', |
||
+ | 'Commodore no loader/no header'][mtbyte-1] |
||
+ | if debuglevel > 0: |
||
+ | print "machinetype:%3i = %s" % (mtbyte, machinetype) |
||
+ | check=bytes[-1] |
||
+ | if debuglevel > 0: |
||
+ | print "checksum: 0x%02X = %s" % (bytes[-1], |
||
+ | ['incorrect!', 'correct'][(sum(bytes) & 0xff)==0]) |
||
+ | if 'DATA LINE' in function: |
||
+ | data=bytes[3:-1] |
||
+ | print(''.join(["%02x" % x for x in data])) |
||
+ | |||
+ | for f in sys.argv[1:]: |
||
+ | pix=Pix(f) |
||
+ | goodlines=findlines(pix) |
||
+ | for n in range(len(goodlines)): |
||
+ | bandwidth, lineindex = goodlines[n] |
||
+ | if debuglevel > 1: |
||
+ | print "line #%i: y=%i" % (n, lineindex) |
||
+ | # sample average of the middle 2/3 of the databar |
||
+ | radius=int(bandwidth/3) |
||
+ | linepx=pix.getavgline(lineindex, radius) |
||
+ | pulses=linepx2pulses(linepx) |
||
+ | # do we have a END marker? |
||
+ | assert pulses[-1][1] > (pulses[0][1]*5) |
||
+ | nibbles=line2nibbles(pulses) |
||
+ | parsenibbles(nibbles) |
||
+ | </source> |
Revision as of 11:50, 22 June 2016
Intro
PoC||GTFO 12 contains two pages (pp53-54) of giant barcodes as a puzzle:
The pages are available in high resolution here and here.
Identification
The hardest part was to find which obscure format was used.
Initially I tried to parse manually the barcode: we easily see start and stop sequences, pulse widths being unit or double, and and some repetitions 7-unit large. But I couldn't go further.
The revelation came when I stumbled upon this example:
It's taken from http://www.mainbyte.com/ti99/hardware/oscar/oscar.html where we learn about Oscar, a "databar" optical reader meant to input programs into computers:
The manual (pdf) refers to several supported computers of the 80' : Atari 1200XL/1400XL, Atari 400/600/800, Commodore Pet, Commodore VIC 20/64, TI99/4A, TRS 80. Regarding the computer it acts as an ordinary cassette reader.
Patent
Oscar was produced by Databar Corporation.
Looking a patents from Databar Corporation, we can find two describing the Oscar reader, perfect!
The most relevant one for us is US4550247A which is easier to read as pdf.
Note that both patents have expired.
Python decoder
With the patent in hands, it's doable to write a Python script to decode the symbols and do some consistency checks and interpretations.
But first we've to extract the images from the Poc||GTFO 12 pdf:
pdftk pocorgtfo12.pdf cat 53-54 output databar.pdf
pdfimages -all databar.pdf databar
You can download the following Python script [{{#file: oscar.py}} as oscar.py]:
#!/usr/bin/env python
import sys
from PIL import Image
debuglevel=0
# Reference: patent US4550247
conv={
0b0100110:0x0, 0b0101010:0x1, 0b0110010:0x2, 0b0110110:0x3,
0b1011010:0x4, 0b1010010:0x5, 0b1010110:0x6, 0b1001010:0x7,
0b0100101:0x8, 0b0101101:0x9, 0b0101001:0xa, 0b0110101:0xb,
0b1011001:0xc, 0b1010101:0xd, 0b1001101:0xe, 0b1001001:0xf,
0b1111111:0x10}
class Pix:
def __init__(self, filename):
self.im = Image.open(filename).convert('1')
self.pixels = list(self.im.getdata())
self.width, self.height = self.im.size
def getpix(self, x, y):
return self.pixels[x+y*self.width]
def getline(self, y):
return self.pixels[y*self.width:(y+1)*self.width]
def getavgline(self, y, radius):
lines=[]
lines.append(self.getline(lineindex))
for i in range(radius):
lines.append(self.getline(lineindex-i))
lines.append(self.getline(lineindex+i))
line=[]
for i in range(self.width):
line.append(sum([lines[x][i]
for x in range(radius*2+1)]) < 255*radius)
return line
def line2nibbles(v):
factor_learn=0.5
factor_squeeze=0.95
# train width on START seq
# train ones and zeroes separately due to image contrast
smallw=[(v[1][1]+v[3][1]+v[5][1])/3.0*factor_squeeze,
(v[0][1]+v[2][1]+v[4][1])/3.0*factor_squeeze]
if debuglevel > 1:
print "bitwidths:", smallw
symbols=[]
currentsymbol=1
for i in range(len(v)):
b,w = v[i]
if debuglevel > 2:
print w, smallw[currentsymbol], w/smallw[currentsymbol],
print round(w/smallw[currentsymbol],0)
if w < smallw[currentsymbol] * 1.5:
guess=1
elif w > smallw[currentsymbol] * 5:
guess=8 # max is EOL + last bit==1
else:
guess=2
if guess <=2:
sw=float(w)/guess
smallw[currentsymbol]=factor_squeeze*\
(smallw[currentsymbol]/factor_squeeze+sw*factor_learn)/\
(1+factor_learn)
if guess <=2:
for k in range(guess):
symbols.append(b)
else: #EOL
for k in range(guess):
symbols.append(1)
currentsymbol^=1
if debuglevel > 1:
print "symbols:", len(symbols)
if debuglevel > 2:
print symbols
nibbles=[]
nn=0
while symbols:
s, symbols=symbols[:7],symbols[7:]
bit7=0
for k in s:
bit7<<=1
bit7+=k
if debuglevel > 2:
print "symbols #%02i:" % nn, s, ("7-bit:%x" % bit7),
nn+=1
assert bit7 in conv
if debuglevel > 2:
print "conv: %x" % conv[bit7]
if conv[bit7]<0x10:
nibbles.append(conv[bit7])
else:
assert len(symbols)<=1
break
return nibbles
def findlines(pix):
# find middle of horizontal lines
# candlines = lines with at least 20% black
candlines=[sum(pix.getline(i))<(255*pix.width)*0.80
for i in xrange(pix.height)]
goodlines=[]
offset=0
while True in candlines:
startline=candlines.index(True)
stopline=candlines.index(False,startline)
candlines=candlines[stopline:]
oldoffset, offset=offset, offset+stopline
bandwidth=stopline - startline
if bandwidth < 30:
continue
midline=startline+(bandwidth/2)+oldoffset
if debuglevel > 0:
print "Found %i-px wide line at y=%i" % (bandwidth, midline)
goodlines.append((bandwidth, midline))
return goodlines
def linepx2pulses(linepx):
pulses=[]
while linepx:
if linepx[0]==0:
if 1 in linepx:
stop=linepx.index(1)
linepx=linepx[stop:]
else:
stop=len(linepx)
linepx=[]
if len(pulses)>0 and 1 in linepx:
pulses.append((0,stop))
else:
if 0 in linepx:
stop=linepx.index(0)
linepx=linepx[stop:]
else:
stop=len(linepx)
linepx=[]
pulses.append((1,stop))
# where is the END marker?
# revert line if needed
if pulses[0][1] > pulses[-1][1]:
pulses=pulses[::-1]
return pulses
def parsenibbles(nibbles):
if debuglevel > 1:
print "raw: ", ''.join(["%x" % x for x in nibbles])
rawlen=len(nibbles)
assert nibbles[0]==0xd
nibbles.pop(0)
assert len(nibbles)%2==0
bytes=[]
for i in range(len(nibbles)/2):
bytes.append((nibbles[i*2+1]<<4) + nibbles[i*2])
linenumber=bytes[0]
if debuglevel > 0:
print "linenumber: %3i" % linenumber
functionbyte=bytes[1]
assert functionbyte < 9
function=['GENERATE TONES', 'CONTROL ARRAY', 'RAM FIRMWARE',
'DATA LINE3', 'DATA LINE4', 'DATA LINE5', 'DATA LINE6',
'DATA LINE7', 'ENDFW'][functionbyte]
if debuglevel > 0:
print "function: %3i = %s" % (functionbyte, function)
lastnibblenumber=bytes[2]
assert lastnibblenumber==rawlen
if function == 'CONTROL ARRAY':
assert linenumber==0
mtbyte=bytes[3]
assert 0 < mtbyte < 10
machinetype=['Atari400/800/1200', 'Commodore', 'TI99/4',
'Timex 1000', 'Timex 2000', 'RADIO SHACK',
'RS-232', 'Commodore no loader',
'Commodore no loader/no header'][mtbyte-1]
if debuglevel > 0:
print "machinetype:%3i = %s" % (mtbyte, machinetype)
check=bytes[-1]
if debuglevel > 0:
print "checksum: 0x%02X = %s" % (bytes[-1],
['incorrect!', 'correct'][(sum(bytes) & 0xff)==0])
if 'DATA LINE' in function:
data=bytes[3:-1]
print(''.join(["%02x" % x for x in data]))
for f in sys.argv[1:]:
pix=Pix(f)
goodlines=findlines(pix)
for n in range(len(goodlines)):
bandwidth, lineindex = goodlines[n]
if debuglevel > 1:
print "line #%i: y=%i" % (n, lineindex)
# sample average of the middle 2/3 of the databar
radius=int(bandwidth/3)
linepx=pix.getavgline(lineindex, radius)
pulses=linepx2pulses(linepx)
# do we have a END marker?
assert pulses[-1][1] > (pulses[0][1]*5)
nibbles=line2nibbles(pulses)
parsenibbles(nibbles)