#!/bin/sh -f

### Make perceptron data:
### Two classes, each a mixture of two gaussians, in a simple
### non-linearly-separable checkerboard pattern:
###  O X
###  X O

## number of patterns in each class
nc1A=25
nc1B=25
nc2A=25
nc2B=25

## class means
c1Axm=1
c1Aym=1
c1Bxm=-1
c1Bym=-1
c2Axm=1
c2Aym=-1
c2Bxm=-1
c2Bym=1

## class standard deviations: all spherical so just one
s=0.75

## generate data
randg -mean $c1Axm -stdev $s -rows $nc1A > tmp-c1Ax
randg -mean $c1Aym -stdev $s -rows $nc1A > tmp-c1Ay
randg -mean $c1Bxm -stdev $s -rows $nc1B > tmp-c1Bx
randg -mean $c1Bym -stdev $s -rows $nc1B > tmp-c1By
randg -mean $c2Axm -stdev $s -rows $nc2A > tmp-c2Ax
randg -mean $c2Aym -stdev $s -rows $nc2A > tmp-c2Ay
randg -mean $c2Bxm -stdev $s -rows $nc2B > tmp-c2Bx
randg -mean $c2Bym -stdev $s -rows $nc2B > tmp-c2By

## agglomerate x/y coordinates & target outputs (class id)
paste tmp-c1Ax tmp-c1Ay | awk '{print 1, $0, 1}' >  tmp-all
paste tmp-c1Bx tmp-c1By | awk '{print 1, $0, 1}' >> tmp-all
paste tmp-c2Ax tmp-c2Ay | awk '{print 1, $0, 0}' >> tmp-all
paste tmp-c2Bx tmp-c2By | awk '{print 1, $0, 0}' >> tmp-all

## concatenate & shuffle to generate training set
rl < tmp-all > training-set

## Notes

# $ gnuplot
# gnuplot> p "tmp-c1" w points 2, "tmp-c2" w points 3
# gnuplot> p "output-data" using ($3) w d
# gnuplot> set log y
# gnuplot> replot

# $ tail +98700 < output-data | head -100 | awk '{print $4}' | stats
