Category: Python

Later…

head -1000000
conf/mrjob-emr.conf
jobs/follower_count.py
jobs/follower_histogram.py
pip install MrJob
easy_install MrJob
$ python jobs/follower_count.py data/twitter_synthetic.txt

5       2
6       1
7       3
8       2
9       1

$ python jobs/follower_count.py data/twitter_sample.txt.gz

conf/mrjob-emr.conf
$ python jobs/follower_count.py -c conf/mrjob-emr.conf -r emr \
 -o s3://your-bucket/your-output-location --no-output data/twitter_sample.txt.gz

$ python jobs/follower_count.py -c conf/mrjob-emr.conf -r emr \
 -o s3://your-bucket/your-output-location --no-output s3://your-bucket/twitter_sample.txt.gz

split -l 10000000
python jobs/follower_histogram.py -c conf/mrjob-emr.conf -r emr \
-o s3://your-bucket/your-output-location --no-output s3://your-split-input-bucket/

# Use PyPy instead of system Python
bootstrap_scripts:
- bootstrap-pypy-64bit.sh
python_bin: /home/hadoop/bin/pypy

conf/mrjob-emr-pypy-32bit.conf
conf/mrjob-emr-pypy-64bit.conf
conf/bootstrap-pypy-32bit.sh
conf/bootstrap-pypy-64bit.sh
# Extend Pygments' DiffLexer using a non-standard comment (#) for literate diffing using pycco.
diff -r cfa0f44daad1 pygments/lexers/text.py

--- a/pygments/lexers/text.py	Fri Apr 29 14:03:50 2011 +0200
+++ b/pygments/lexers/text.py	Sat Apr 30 20:28:56 2011 -0500
@@ -231,6 +231,7 @@
             (r'@.*\n', Generic.Subheading),
             (r'([Ii]ndex|diff).*\n', Generic.Heading),
             (r'=.*\n', Generic.Heading),
# Add non-standard diff comments.  This has to go above the Text capture below
# in order to be active.
+            (r'#.*\n', Comment),
             (r'.*\n', Text),
         ]
     }
cat literate.diff | awk '!/\#/' | patch -p0
$ ./pypy-c 
Python 2.7.0 (61fefec7abc6, Mar 18 2011, 06:59:57)
[PyPy 1.5.0-alpha0] on darwin
Type "help", "copyright", "credits" or "license" for more information.
And now for something completely different: ``1.1 final released:
http://codespeak.net/pypy/dist/pypy/doc/release-1.1.0.html''
>>>> 
$ svn co http://svn.apache.org/repos/asf/lucene/pylucene/trunk pylucene

$ cd pylucene/jcc
$ python setup.py build
$ sudo python setup.py install

$ cd ..
$ nano Makefile

# Mac OS X (Python 2.5, Java 1.5)
# Mac OS X  (Python 2.5, Java 1.5)
PREFIX_PYTHON=/usr
ANT=ant
PYTHON=$(PREFIX_PYTHON)/bin/python
JCC=$(PYTHON) -m jcc --shared
NUM_FILES=2

$ make

$ sudo make install

$ python
Python 2.5.1 (r251:54863, Nov 11 2008, 17:46:48)
[GCC 4.0.1 (Apple Inc. build 5465)] on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import lucene
>>>

$ sudo aptitude install build-essential checkinstall

$ sudo aptitude install postgresql postgresql-client \\
postgresql-client-common postgresql-contrib \\
postgresql-server-dev-8.3

$ wget http://www.sphinxsearch.com/downloads/sphinx-0.9.8.1.tar.gz
$ tar xzvf sphinx-0.9.8.1.tar.gz
$ cd sphinx-0.9.8.1

$ ./configure --without-mysql --with-pgsql \\
--with-pgsql-includes=/usr/include/postgresql/ \\
--with-pgsql-lib=/usr/lib/postgresql/8.3/lib/
$ make

$ mkdir /usr/local/var
$ sudo checkinstall

/usr/local
/usr/local/etc/
$ createdb -U postgres test
$ psql -U postgres test
test=# create table test (id integer primary key not null, text text);
test=# insert into test (text) values ('Hello, World!');
test=# insert into test (text) values ('This is a test.');
test=# insert into test (text) values ('I have another thing to test.');
test=# -- A user with a password is required.
test=# create user foo with password 'bar';
test=# alter table test owner to foo;
test=# \\q

nano
$ cd /usr/local/etc
$ sudo cp sphinx-min.conf.dist sphinx.conf
$ sudo nano sphinx.conf

source src1
{
  type = pgsql
  sql_host = localhost
  sql_user = foo
  sql_pass = bar
  sql_db = test
  sql_port = 5432
  sql_query = select id, text from test
  sql_query_info = SELECT * from test WHERE id=$id
}

index test1
{
  source = src1
  path = /var/data/test1
  docinfo = extern
  charset_type = utf-8
}

$ sudo mkdir /var/data
$ sudo indexer --all

$ sudo searchd

$ search world

Sphinx 0.9.8.1-release (r1533)
Copyright (c) 2001-2008, Andrew Aksyonoff

using config file '/usr/local/etc/sphinx.conf'...
index 'test1': query 'world ': returned 1 matches of 1 total in 0.000 sec

displaying matches:
1. document=1, weight=1

words:
1. 'world': 1 documents, 1 hits

cd sphinx-0.9.8.1/api
python
>>> import sphinxapi, pprint
>>> c = sphinxapi.SphinxClient()
>>> q = c.Query('world')
>>> pprint.pprint(q)
{'attrs': [],
 'error': '',
 'fields': ['text'],
 'matches': [{'attrs': {}, 'id': 1, 'weight': 1}],
 'status': 0,
 'time': '0.000',
 'total': 1,
 'total_found': 1,
 'warning': '',
 'words': [{'docs': 1, 'hits': 1, 'word': 'world'}]}

--rotate
sudo indexer --rotate --all

/etc/init.d
supervised
root@monkey:~/inst/simplejson# python setup.py install
The required version of setuptools (>=0.6c6) is not available, and
can't be installed while this script is running. Please install
 a more recent version first.

(Currently using setuptools 0.6c3 
(/usr/lib/python2.4/site-packages/setuptools-0.6c3-py2.4.egg))

print "Status: 404"
print "Content-type: text/html"
print
print # (X)HTML error response goes here

>>> import serial
>>> ser = serial.Serial('/dev/tty.usbserial', 9600)
>>> while 1:
...     ser.readline()
'1 Hello world!\r\n'
'2 Hello world!\r\n'
'3 Hello world!\r\n'
>>> import serial
>>> ser = serial.Serial('/dev/tty.usbserial', 9600)  
>>> ser.write('5')

Category: Python

Social Graph Analysis using Elastic MapReduce and PyPy

Designing MapReduce Jobs

Analyzing the data

Running the jobs

Speeding things up with PyPy

Thoughts on Elastic MapReduce

Play along at home

Literate Diffing

PyPy is Fast (And So Can You)

Installing PyLucene on OSX 10.5

Sphinx Search with PostgreSQL

Kansas Primary 2008 recap

DjangoCon!

Python for S60: back in the saddle

PyCon 2008

Covering Kansas Democratic Caucus Results

We’re hiring!

2008 Digital Edge Award Finalists

Reason 3,287 why I hate setuptools

Darwin Calendar Server Status Update

Properly serving a 404 with lighttpd’s server.error-handler-404

Nokia N800 and camera.py

Mapping Every airport and helipad in America

Arduino serial communication with Python

All I want to do is convert my schema!

Pardon the Dust