LiftMap.py
From Genome Analysis Wiki
Jump to navigationJump to searchThe printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
#!/usr/bin/python
import sys, os
def usage():
print("%s -m old.map -p old.ped -d old.dat -o new.prefix" % sys.argv[0] )
print("lift old.map (optionally .ped and .dat) to new.prefix.map (.ped/.dat if possible)")
def die(msg):
print msg
sys.exit(2)
def myopen(fn):
import gzip
try:
h = gzip.open(fn)
ln = h.read(2) # read arbitrary bytes so check if @param fn is a gzipped file
except:
# cannot read in gzip format
return open(fn)
h.close()
return gzip.open(fn)
def map2bed(fin, fout):
fo = open(fout, 'w')
for ln in myopen(fin):
chrom, rs, mdist, pos = ln.split()
chrom = 'chr' + chrom
pos = int(pos)
fo.write('%s\t%d\t%d\t%s\n' % (chrom, pos-1, pos, rs))
fo.close()
return True
# global var:
LIFTED_SET = set()
UNLIFTED_SET = set()
def liftBed(fin, fout, funlifted):
params = dict()
params['LIFTOVER_BIN'] = '/net/wonderland/home/zhanxw/mycode/liftOver/liftOver'
params['OLD'] = fin
params['CHAIN'] = '/home/zhanxw/mycode/liftOver/hg18ToHg19.over.chain.gz'
params['NEW'] = fout
params['UNLIFTED'] = fout + '.unlifted'
from string import Template
cmd = Template('$LIFTOVER_BIN $OLD $CHAIN $NEW $UNLIFTED')
cmd = cmd.substitute(params)
os.system(cmd)
#record lifted/unliftd rs
for ln in myopen(params['UNLIFTED']):
if len(ln) == 0 or ln[0] == '#':continue
UNLIFTED_SET.add(ln.strip().split()[-1])
for ln in myopen(params['NEW']):
if len(ln) == 0 or ln[0] == '#':continue
LIFTED_SET.add(ln.strip().split()[-1])
return True
def bed2map(fin, fout):
fo = open(fout, 'w')
for ln in myopen(fin):
chrom, pos0, pos1, rs = ln.split()
chrom = chrom.replace('chr', '')
fo.write('%s\t%s\t0.0\t%s\n' % (chrom, rs, pos1))
fo.close()
return True
def liftDat(fin, fout):
fo = open(fout, 'w')
for ln in myopen(fin):
if len(ln) == 0 or ln[0] != 'M':
fo.write(ln)
else:
t, rs = ln.strip().split()
if rs in LIFTED_SET:
fo.write(ln)
fo.close()
return True
def liftPed(fin, fout, fOldMap):
# two ways to do it:
# 1. write unlifted snp list
# use PLINK to do this job using --exclude
# 2. alternatively, we can write our own method
# we will use method 2
marker = [i.strip().split()[1] for i in open(fOldMap)]
flag = map(lambda x: x not in UNLIFTED_SET, marker)
# print marker[:10]
# print flag[:10]
fo = open(fout, 'w')
for ln in myopen(fin):
f = ln.strip().split()
l = len(f)
f = f[:6] + [ f[i*2] + ' '+f[i*2 +1] for i in xrange(3, l/2 )]
fo.write('\t'.join(f[:6]))
fo.write('\t')
if len(f[6:]) != len(flag):
die('Inconsistent length of ped and map files')
newMarker = [m for i, m in enumerate(f[6:]) if flag[i]]
fo.write('\t'.join(newMarker))
fo.write('\n')
#print marker[:10]
#die('test')
return True
def makesure(result, succ_msg, fail_msg = "ERROR"):
if result:
print 'SUCC: ', succ_msg
else:
print 'FAIL: ', fail_msg
sys.exit(2)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='')
parser.add_argument('-m', dest='mapFile', required = True)
parser.add_argument('-p', dest='pedFile')
parser.add_argument('-d', dest='datFile')
parser.add_argument('-o', dest='prefix', required = True)
args = parser.parse_args()
oldBed = args.mapFile + '.bed'
makesure(map2bed(args.mapFile, oldBed),
'map->bed succ')
newBed = args.prefix + '.bed'
unlifted = args.prefix + '.unlifted'
makesure(liftBed(oldBed, newBed, unlifted),
'liftBed succ')
newMap = args.prefix + '.map'
makesure(bed2map(newBed, newMap),
'bed->map succ')
if args.datFile:
newDat = args.prefix + '.dat'
makesure(liftDat(args.datFile, newDat),
'liftDat succ')
if args.pedFile:
newPed = args.prefix + '.ped'
makesure(liftPed(args.pedFile, newPed, args.mapFile),
'liftPed succ')