Category: Open Source

libraryDependencies += "net.sf.opencsv" % "opencsv" % "2.1"


val opencsv = "net.sf.opencsv" % "opencsv" % "2.1"


Year,Total
1995,146
1996,184
1997,235
1998,200
1999,226
2000,251
2001,299
2002,273
2003,281
2004,304
2005,203
2006,136
2007,150
2008,123
2009,135
2010,121


import au.com.bytecode.opencsv.CSVReader
import java.io.FileReader
import scala.collection.JavaConversions._

val reader = new CSVReader(new FileReader("unruly_passengers.txt"))
for (row <- reader.readAll) {
    println("In " + row(0) + " there were " + row(1) + " unruly passengers.")
}


In Year there were Total unruly passengers.
In 1995 there were 146 unruly passengers.
In 1996 there were 184 unruly passengers.
In 1997 there were 235 unruly passengers.
In 1998 there were 200 unruly passengers.
In 1999 there were 226 unruly passengers.
In 2000 there were 251 unruly passengers.
In 2001 there were 299 unruly passengers.
In 2002 there were 273 unruly passengers.
In 2003 there were 281 unruly passengers.
In 2004 there were 304 unruly passengers.
In 2005 there were 203 unruly passengers.
In 2006 there were 136 unruly passengers.
In 2007 there were 150 unruly passengers.
In 2008 there were 123 unruly passengers.
In 2009 there were 135 unruly passengers.
In 2010 there were 121 unruly passengers.


val reader = new CSVReader(new FileReader("unruly_passengers.txt"), ",", "\"", 1)

head -1000000
conf/mrjob-emr.conf
jobs/follower_count.py
jobs/follower_histogram.py
pip install MrJob
easy_install MrJob
$ python jobs/follower_count.py data/twitter_synthetic.txt

5       2
6       1
7       3
8       2
9       1

$ python jobs/follower_count.py data/twitter_sample.txt.gz

conf/mrjob-emr.conf
$ python jobs/follower_count.py -c conf/mrjob-emr.conf -r emr \
 -o s3://your-bucket/your-output-location --no-output data/twitter_sample.txt.gz

$ python jobs/follower_count.py -c conf/mrjob-emr.conf -r emr \
 -o s3://your-bucket/your-output-location --no-output s3://your-bucket/twitter_sample.txt.gz

split -l 10000000
python jobs/follower_histogram.py -c conf/mrjob-emr.conf -r emr \
-o s3://your-bucket/your-output-location --no-output s3://your-split-input-bucket/

# Use PyPy instead of system Python
bootstrap_scripts:
- bootstrap-pypy-64bit.sh
python_bin: /home/hadoop/bin/pypy

conf/mrjob-emr-pypy-32bit.conf
conf/mrjob-emr-pypy-64bit.conf
conf/bootstrap-pypy-32bit.sh
conf/bootstrap-pypy-64bit.sh
# Extend Pygments' DiffLexer using a non-standard comment (#) for literate diffing using pycco.
diff -r cfa0f44daad1 pygments/lexers/text.py

--- a/pygments/lexers/text.py	Fri Apr 29 14:03:50 2011 +0200
+++ b/pygments/lexers/text.py	Sat Apr 30 20:28:56 2011 -0500
@@ -231,6 +231,7 @@
             (r'@.*\n', Generic.Subheading),
             (r'([Ii]ndex|diff).*\n', Generic.Heading),
             (r'=.*\n', Generic.Heading),
# Add non-standard diff comments.  This has to go above the Text capture below
# in order to be active.
+            (r'#.*\n', Comment),
             (r'.*\n', Text),
         ]
     }
cat literate.diff | awk '!/\#/' | patch -p0
$ sudo aptitude install build-essential checkinstall

$ sudo aptitude install postgresql postgresql-client \\
postgresql-client-common postgresql-contrib \\
postgresql-server-dev-8.3

$ wget http://www.sphinxsearch.com/downloads/sphinx-0.9.8.1.tar.gz
$ tar xzvf sphinx-0.9.8.1.tar.gz
$ cd sphinx-0.9.8.1

$ ./configure --without-mysql --with-pgsql \\
--with-pgsql-includes=/usr/include/postgresql/ \\
--with-pgsql-lib=/usr/lib/postgresql/8.3/lib/
$ make

$ mkdir /usr/local/var
$ sudo checkinstall

/usr/local
/usr/local/etc/
$ createdb -U postgres test
$ psql -U postgres test
test=# create table test (id integer primary key not null, text text);
test=# insert into test (text) values ('Hello, World!');
test=# insert into test (text) values ('This is a test.');
test=# insert into test (text) values ('I have another thing to test.');
test=# -- A user with a password is required.
test=# create user foo with password 'bar';
test=# alter table test owner to foo;
test=# \\q

nano
$ cd /usr/local/etc
$ sudo cp sphinx-min.conf.dist sphinx.conf
$ sudo nano sphinx.conf

source src1
{
  type = pgsql
  sql_host = localhost
  sql_user = foo
  sql_pass = bar
  sql_db = test
  sql_port = 5432
  sql_query = select id, text from test
  sql_query_info = SELECT * from test WHERE id=$id
}

index test1
{
  source = src1
  path = /var/data/test1
  docinfo = extern
  charset_type = utf-8
}

$ sudo mkdir /var/data
$ sudo indexer --all

$ sudo searchd

$ search world

Sphinx 0.9.8.1-release (r1533)
Copyright (c) 2001-2008, Andrew Aksyonoff

using config file '/usr/local/etc/sphinx.conf'...
index 'test1': query 'world ': returned 1 matches of 1 total in 0.000 sec

displaying matches:
1. document=1, weight=1

words:
1. 'world': 1 documents, 1 hits

cd sphinx-0.9.8.1/api
python
>>> import sphinxapi, pprint
>>> c = sphinxapi.SphinxClient()
>>> q = c.Query('world')
>>> pprint.pprint(q)
{'attrs': [],
 'error': '',
 'fields': ['text'],
 'matches': [{'attrs': {}, 'id': 1, 'weight': 1}],
 'status': 0,
 'time': '0.000',
 'total': 1,
 'total_found': 1,
 'warning': '',
 'words': [{'docs': 1, 'hits': 1, 'word': 'world'}]}

--rotate
sudo indexer --rotate --all

/etc/init.d
supervised
uuencode(BitStr) ->
<< (X+32):8 || <<X:6>> <= BitStr >>.
uudecode(Text) ->
<< (X-32):6 || <<X:8>> <= Text >>.

list_to_binary([<<0:8,ContentLength:16,UsernameLength:8>>, Username, 
<<PasswordLength:8>>, Password, <<?REQUEST_SIZE:32>>]),
<<Response:8, Reason:8, Length:32, Data:Length/binary, 
_Rest/binary>> = Bin,
<<Int:32/integer-signed, Rest/binary>> = Bin,
5> {Int, RestData} = qrbg:extract_int(Data).
{-427507221,
 <<0,254,163,8,239,180,51,164,169,160,170,248,94,132,220,79,234,4,117,
   248,174,59,167,49,165,170,154,...>>}
6> Int.
-427507221
root@monkey:~/inst/simplejson# python setup.py install
The required version of setuptools (>=0.6c6) is not available, and
can't be installed while this script is running. Please install
 a more recent version first.

(Currently using setuptools 0.6c3 
(/usr/lib/python2.4/site-packages/setuptools-0.6c3-py2.4.egg))

mcroydon$ erl
Erlang (BEAM) emulator version 5.5.4 [source] [async-threads:0] [kernel-poll:false]

Eshell V5.5.4  (abort with ^G)
1> c('isbn.erl').
{ok,isbn}
2> isbn:validate_13([9,7,8,1,9,3,4,3,5,6,0,0,5]).
true
check_digit_10(Isbn) when length(Isbn) /= 9 ->
    throw(wrongLength);
check_digit_10(Isbn) -> 
    check_digit_10(Isbn, 0).
check_digit_10([H|T], Total) ->
    check_digit_10(T, Total + (H * (length(T) + 2)));
check_digit_10([], Total) when 11 - (Total rem 11) =:= 10 ->
    'X';
check_digit_10([], Total) ->
    11 - (Total rem 11).
print "Status: 404"
print "Content-type: text/html"
print
print # (X)HTML error response goes here

Nokia-N800-51:/media/mmc1# dpkg -i python2.5-imaging_1.1.6-1_armel.deb 
Selecting previously deselected package python2.5-imaging.
(Reading database ... 13815 files and directories currently installed.)
Unpacking python2.5-imaging (from python2.5-imaging_1.1.6-1_armel.deb) ...
Setting up python2.5-imaging (1.1.6-1) ...
Nokia-N800-51:/media/mmc1# python2.5 
Python 2.5 (r25:9277, Jan 23 2007, 15:56:37) 
[GCC 3.4.4 (release) (CodeSourcery ARM 2005q3-2)] on linux2
Type "help", "copyright", "credits" or "license" for more information.
>>> from PIL import Image
>>>

Category: Open Source

Parsing CSV data in Scala with opencsv

Social Graph Analysis using Elastic MapReduce and PyPy

Designing MapReduce Jobs

Analyzing the data

Running the jobs

Speeding things up with PyPy

Thoughts on Elastic MapReduce

Play along at home

Literate Diffing

Sphinx Search with PostgreSQL

Arduino: Transforming the DIY UAV Community

Kansas covered by OpenStreetMap!

Maemo blows me away again

There is an Erlang community, it’s just smaller than you’re used to

Erlang bit syntax and network programming

Reason 3,287 why I hate setuptools

isbn.erl: My first Erlang module

Darwin Calendar Server Status Update

Properly serving a 404 with lighttpd’s server.error-handler-404

Packaging Python Imaging Library for maemo 3.0 (bora) and the N800

Nokia N800 and camera.py

From GPX to PostGIS

All I want to do is convert my schema!

Oh the CalDAV Possibilities

Darwin Calendar Server

PyS60 1.3.8 Released