Databar decoding
Intro
PoC||GTFO 12 contains two pages (pp53-54) of giant barcodes as a puzzle:
The pages are available in high resolution here and here.
Identification
The hardest part was to find which obscure format was used.
Initially I tried to parse manually the barcode: we easily see start and stop sequences, pulse widths being unit or double, and and some repetitions 7-unit large. But I couldn't go further.
The revelation came when I stumbled upon this example:
It's taken from http://www.mainbyte.com/ti99/hardware/oscar/oscar.html where we learn about Oscar, a "databar" optical reader meant to input programs into computers:
The manual (pdf) refers to several supported computers of the 80' : Atari 1200XL/1400XL, Atari 400/600/800, Commodore Pet, Commodore VIC 20/64, TI99/4A, TRS 80. Regarding the computer it acts as an ordinary cassette reader.
Patent
Oscar was produced by Databar Corporation.
Looking a patents from Databar Corporation, we can find two describing the Oscar reader, perfect!
The most relevant one for us is US4550247A which is easier to read as pdf.
Note that both patents have expired.
Python decoder
With the patent in hands, it's doable to write a Python script to decode the symbols and do some consistency checks and interpretations.
Code
You can download the following Python script [{{#file: oscar.py}} as oscar.py]:
#!/usr/bin/env python
import sys
from PIL import Image
debuglevel=0
# Reference: patent US4550247
conv={
0b0100110:0x0, 0b0101010:0x1, 0b0110010:0x2, 0b0110110:0x3,
0b1011010:0x4, 0b1010010:0x5, 0b1010110:0x6, 0b1001010:0x7,
0b0100101:0x8, 0b0101101:0x9, 0b0101001:0xa, 0b0110101:0xb,
0b1011001:0xc, 0b1010101:0xd, 0b1001101:0xe, 0b1001001:0xf,
0b1111111:0x10}
class Pix:
def __init__(self, filename):
self.im = Image.open(filename).convert('1')
self.pixels = list(self.im.getdata())
self.width, self.height = self.im.size
def getpix(self, x, y):
return self.pixels[x+y*self.width]
def getline(self, y):
return self.pixels[y*self.width:(y+1)*self.width]
def getavgline(self, y, radius):
lines=[]
lines.append(self.getline(lineindex))
for i in range(radius):
lines.append(self.getline(lineindex-i))
lines.append(self.getline(lineindex+i))
line=[]
for i in range(self.width):
line.append(sum([lines[x][i]
for x in range(radius*2+1)]) < 255*radius)
return line
def line2nibbles(v):
factor_learn=0.5
factor_squeeze=0.95
# train width on START seq
# train ones and zeroes separately due to image contrast
smallw=[(v[1][1]+v[3][1]+v[5][1])/3.0*factor_squeeze,
(v[0][1]+v[2][1]+v[4][1])/3.0*factor_squeeze]
if debuglevel > 1:
print "bitwidths:", smallw
symbols=[]
currentsymbol=1
for i in range(len(v)):
b,w = v[i]
if debuglevel > 2:
print w, smallw[currentsymbol], w/smallw[currentsymbol],
print round(w/smallw[currentsymbol],0)
if w < smallw[currentsymbol] * 1.5:
guess=1
elif w > smallw[currentsymbol] * 5:
guess=8 # max is EOL + last bit==1
else:
guess=2
if guess <=2:
sw=float(w)/guess
smallw[currentsymbol]=factor_squeeze*\
(smallw[currentsymbol]/factor_squeeze+sw*factor_learn)/\
(1+factor_learn)
if guess <=2:
for k in range(guess):
symbols.append(b)
else: #EOL
for k in range(guess):
symbols.append(1)
currentsymbol^=1
if debuglevel > 1:
print "symbols:", len(symbols)
if debuglevel > 2:
print symbols
nibbles=[]
nn=0
while symbols:
s, symbols=symbols[:7],symbols[7:]
bit7=0
for k in s:
bit7<<=1
bit7+=k
if debuglevel > 2:
print "symbols #%02i:" % nn, s, ("7-bit:%x" % bit7),
nn+=1
assert bit7 in conv
if debuglevel > 2:
print "conv: %x" % conv[bit7]
if conv[bit7]<0x10:
nibbles.append(conv[bit7])
else:
assert len(symbols)<=1
break
return nibbles
def findlines(pix):
# find middle of horizontal lines
# candlines = lines with at least 20% black
candlines=[sum(pix.getline(i))<(255*pix.width)*0.80
for i in xrange(pix.height)]
goodlines=[]
offset=0
while True in candlines:
startline=candlines.index(True)
stopline=candlines.index(False,startline)
candlines=candlines[stopline:]
oldoffset, offset=offset, offset+stopline
bandwidth=stopline - startline
if bandwidth < 30:
continue
midline=startline+(bandwidth/2)+oldoffset
if debuglevel > 0:
print "Found %i-px wide line at y=%i" % (bandwidth, midline)
goodlines.append((bandwidth, midline))
return goodlines
def linepx2pulses(linepx):
pulses=[]
while linepx:
if linepx[0]==0:
if 1 in linepx:
stop=linepx.index(1)
linepx=linepx[stop:]
else:
stop=len(linepx)
linepx=[]
if len(pulses)>0 and 1 in linepx:
pulses.append((0,stop))
else:
if 0 in linepx:
stop=linepx.index(0)
linepx=linepx[stop:]
else:
stop=len(linepx)
linepx=[]
pulses.append((1,stop))
# where is the END marker?
# revert line if needed
if pulses[0][1] > pulses[-1][1]:
pulses=pulses[::-1]
return pulses
def parsenibbles(nibbles):
if debuglevel > 1:
print "raw: ", ''.join(["%x" % x for x in nibbles])
rawlen=len(nibbles)
assert nibbles[0]==0xd
nibbles.pop(0)
assert len(nibbles)%2==0
bytes=[]
for i in range(len(nibbles)/2):
bytes.append((nibbles[i*2+1]<<4) + nibbles[i*2])
linenumber=bytes[0]
if debuglevel > 0:
print "linenumber: %3i" % linenumber
functionbyte=bytes[1]
assert functionbyte < 9
function=['GENERATE TONES', 'CONTROL ARRAY', 'RAM FIRMWARE',
'DATA LINE3', 'DATA LINE4', 'DATA LINE5', 'DATA LINE6',
'DATA LINE7', 'ENDFW'][functionbyte]
if debuglevel > 0:
print "function: %3i = %s" % (functionbyte, function)
lastnibblenumber=bytes[2]
assert lastnibblenumber==rawlen
if function == 'CONTROL ARRAY':
assert linenumber==0
mtbyte=bytes[3]
assert 0 < mtbyte < 10
machinetype=['Atari400/800/1200', 'Commodore', 'TI99/4',
'Timex 1000', 'Timex 2000', 'RADIO SHACK',
'RS-232', 'Commodore no loader',
'Commodore no loader/no header'][mtbyte-1]
if debuglevel > 0:
print "machinetype:%3i = %s" % (mtbyte, machinetype)
check=bytes[-1]
if debuglevel > 0:
print "checksum: 0x%02X = %s" % (bytes[-1],
['incorrect!', 'correct'][(sum(bytes) & 0xff)==0])
if 'DATA LINE' in function:
data=bytes[3:-1]
print(''.join(["%02x" % x for x in data]))
for f in sys.argv[1:]:
pix=Pix(f)
goodlines=findlines(pix)
for n in range(len(goodlines)):
bandwidth, lineindex = goodlines[n]
if debuglevel > 1:
print "line #%i: y=%i" % (n, lineindex)
# sample average of the middle 2/3 of the databar
radius=int(bandwidth/3)
linepx=pix.getavgline(lineindex, radius)
pulses=linepx2pulses(linepx)
# do we have a END marker?
assert pulses[-1][1] > (pulses[0][1]*5)
nibbles=line2nibbles(pulses)
parsenibbles(nibbles)
Usage=
First we've to extract the images from the Poc||GTFO 12 pdf, then use our script:
pdftk pocorgtfo12.pdf cat 53-54 output databar.pdf
pdfimages -all databar.pdf databar
./oscar.py databar-000.png databar-001.png |tee oscar_output.hex
ff0edb392537fe3fff0302392702f8394202ee395f02e4397d02da399e02d039bc02c639e002bc3a0402b23a28 02a83a60029e3a7e02943a9f028a3aa202803aa6fa0000000000000000ff02763a46026c3aae02623ac002583a c4024e3acf02443aed023a3b0f02303b2f02263b50021c3b7302123b7702083b7e01fe3b8a01f43b8e01ea3ba5 01e03bc3200000000000000000ff01d63be301cc3be701c23c0201b83c1601ae3c1901a43c30019a3c3f01903c 4301863c5c017c3c7f01723c8301683ca1015e3cc401543ce4014a3d0601403d25ff0000000000000000ff0136 3d47012c3d6801223d8901183dad010e3dcd01043df000fa3e1200f03e3500e63e5500dc3e7600d23e7a00c83e 9e00be3eb600b43eba00aa3ed800a03ef99d0000000000000000ff00963f17008c3f3500823f5300783f71006e ...
This is only the data part being extracted from the databar encapsulation.
With debuglevel=1 we get some more details:
linenumber: 0 function: 1 = CONTROL ARRAY machinetype: 3 = TI99/4 checksum: 0x4E = correct linenumber: 1 function: 3 = DATA LINE3 checksum: 0x75 = correct ff0edb392537fe3fff0302392702f8394202ee395f02e4397d02da399e02d039bc02c639e002bc3a0402b23a28 linenumber: 2 function: 3 = DATA LINE3 checksum: 0x8A = correct 02a83a60029e3a7e02943a9f028a3aa202803aa6fa0000000000000000ff02763a46026c3aae02623ac002583a ...
The control array contains far more info, ainly to tell to Oscar how to emulate a cassette reader for the specific machine.
According to the patent description, the control array contains a byte that tells us the databar is foreseen for a TI99/4.
But we've seen in the User Manual that it's more likely to be for its successor TI99/4A.
TI99/4A format
So the databar payload is likely to be interpretable by a TI99/4A.
We can observe regular repetitions of "0000000000000000", let's realign the data:
cat oscar_output.hex|tr -d '\n'|sed 's/00000000ff/00000000\nff/g'|tee oscar_output2.hex
ff0edb392537fe3fff0302392702f8394202ee395f02e4397d02da399e02d039bc02c639e002bc3a0402b23a2802a83a60029e3a7e02943a9f028a3aa202803aa6fa0000000000000000 ff02763a46026c3aae02623ac002583ac4024e3acf02443aed023a3b0f02303b2f02263b50021c3b7302123b7702083b7e01fe3b8a01f43b8e01ea3ba501e03bc3200000000000000000 ... ff4e30393937303030370080808080b0b2a5b3b380a580b4af80a5b8a9b480808080808080808080808080808080808080808080808080808080808080808080801d0000000000000000 000000000000000000000000000000000000000000000000000000000000000000
sed 's/^ff//;s/..0000000000000000$//' oscar_output2.hex > oscar_output3.hex
xxd -r -p oscar_output3.hex > oscar_output3.hex.bin
strings oscar_output3.hex.bin|tac > oscar_output3.txt