Storing SNPs in CouchDB
Install Python and CouchDB with pacman under Arch Linux based distros:
$ sudo pacman -Sy $ sudo pacman -S python2 couchdb
Create a new virtualenv (see howto) and install CouchDB-Python:
$ mkvirtualenv snpdb --python=/usr/bin/pypy $ pip install couchdb==0.9 click
Start CouchDB:
$ sudo systemctl start couchdb
I found here a Java code for creating and queering SNPs in CouchDB. The Java code I converted to the following Python code:
#!/usr/bin/env python import click from couchdb import Server from couchdb.mapping import Document, TextField, IntegerField, FloatField from couchdb.mapping import Mapping, DictField, ViewField from random import randrange, random class SNP(Document): _id = TextField() rs = TextField() avHet = FloatField() snpClass = TextField() mapping = DictField(Mapping.build( chromosome=TextField(), position=IntegerField() )) snpMutation = ViewField('genotypage', '''\ function(doc) { if(doc.snpClass == 'mutation') { emit(null,doc); } }''') snpByClass = ViewField('genotypage', '''\ function(doc) { emit(doc.snpClass,doc.avHet); }''') snpByName = ViewField('genotypage', '''\ function(doc) { emit(doc.rs,doc); }''') snpByClassMaxHet = ViewField('genotypage', '''\ function(doc) { emit(doc.snpClass,doc.avHet); }''', '''\ function(keys, values) { var mean=0.0; for ( var i = 0; i < values.length; ++i) { mean+=values[i]; } return mean/(values.length); }''') def createSNPid(chr_no, pos): return "chr%02d_%08d" % (chr_no, pos) def uniqueRandrange(range_size, sample_size): unique_values = set() unique_values_len = 0 while unique_values_len < sample_size: r = randrange(range_size) if r not in unique_values: unique_values_len += 1 unique_values.add(r) return list(unique_values) def createSNPs(snp_no): print "Creating SNP positions ..." position = uniqueRandrange(snp_no, snp_no) print "Loading SNPs to DB ..." for i in range(0, snp_no - 1): chr_no = 1 + randrange(4) snp = SNP( _id=createSNPid(chr_no, 1 + position[i]), rs="rs" + str(i + 1), avHet=random() * 0.5, snpClass="mutation" if (i % 2 == 0) else "silent", mapping=dict( chromosome="chr" + str(chr_no), position=1 + position[i] ) ) snp.store(db) @click.command() @click.option('--snp_no', type=int, help="How many random SNPs are to generate?") def run(snp_no): createSNPs(snp_no) print "startkey=createSNPid(2, 30), endkey=createSNPid(2, 60)" for row in db.view('_all_docs', startkey=createSNPid(2, 30), endkey=createSNPid(2, 60)): print row print print "limit=1" for row in db.view('_all_docs', limit=1): print row print print "snpMutation" for row in db.query(SNP.snpMutation.map_fun): print row print print "snpByClass" for row in db.query(SNP.snpByClass.map_fun): print row print print "snpByName" for row in db.query(SNP.snpByName.map_fun): print row if __name__ == "__main__": server = Server() db = server.create('snpdb') run()
The following command run the script and stores 4999 SNPs in CouchDB:
$ time python snps.py --snp_no 5000 real 4m4.128s user 0m13.653s sys 0m1.583s
The SNPs can also be viewed in Futon at http://127.0.0.1:5984/_utils/















