swh:1:snp:c7a7c70e5b66e638bde1708a5782ffd2d0417b34
Tip revision: 3e823abcca5b8c1e5e89dd9bd4c49e8673b3e957 authored by Stephen Floor on 24 June 2017, 00:52:49 UTC
email update
email update
Tip revision: 3e823ab
TargetscanScores.py
# TargetscanScores.py
# class to read a targetscan Summary_Counts.txt type file and parse it into gene-specific information with reasonable accessor methods
from collections import defaultdict
class TargetscanScores:
def __init__(self, targetscanScoreFile):
# compute:
# 1) sum of context+ scores
# 2) min context+ score
# 3) # of sites
# 4) sum of conserved site context+ scores
# 5) min conserved context+ score
# 6) # of conserved sites
self.refSeqToScores = defaultdict(list) # stores a list of all miRNA sites per refseq ID
self.refSeqToScoreSum = defaultdict(float) # stores the sum of context+ scores per refseq ID
self.refSeqToMinScore = defaultdict(float) # stores the min context+ score per refseq ID
self.refSeqToNumSites = defaultdict(int) # stores the number of sites per refseq ID
self.refSeqToCnsvScores = defaultdict(list) # stores a list of all conserved miRNA sites per refseq ID
self.refSeqToCnsvScoreSum = defaultdict(float) # stores a list of the sum of scores for conserved site per refseq ID
self.refSeqToCnsvMinScore = defaultdict(float) # stores the min conserved score per refseq ID
self.refSeqToCnsvNumSites = defaultdict(int) # stores the number of conserved sites
self.refSeqToNoncnsvNumSites = defaultdict(int) # stores the number of nonconserved sites
self.refSeqToNoncnsvScores = defaultdict(list) # stores a list of all conserved miRNA sites per refseq ID
self.refSeqToNoncnsvScoreSum = defaultdict(float) # stores a list of the sum of scores for conserved site per refseq ID
self.refSeqToNoncnsvMinScore = defaultdict(float) # stores the min conserved score per refseq ID
self.refSeqToCnsv8merSites = defaultdict(int) # stores the number of conserved 8mer sites
self.refSeqToCnsv7merm8Sites = defaultdict(int) # cnsv 7mer-m8 sites
self.refSeqToCnsv7mer1aSites = defaultdict(int) # cnsv 7mer-1a sites
self.refSeqToNoncnsv8merSites = defaultdict(int) # stores the number of nonconserved 8mer sites
self.refSeqToNoncnsv7merm8Sites = defaultdict(int) # noncnsv 7mer-m8 sites
self.refSeqToNoncnsv7mer1aSites = defaultdict(int) # noncnsv 7mer-1a sites
#0 Transcript ID 1 Gene Symbol 2 miRNA family 3 Species ID 4 Total num conserved sites 5 Number of conserved 8mer sites 6 Number of conserved 7mer-m8 sites 7 Number of conserved 7mer-1a sites
# 8 Total num nonconserved sites 9 Number of nonconserved 8mer sites 10 Number of nonconserved 7mer-m8 sites 11 Number of nonconserved 7mer-1a sites 12 Representative miRNA 13 Total context score 14 Aggregate PCT
#NM_000014 A2M AAAAGUG 9606 0 0 0 0 1 0 1 0 hsa-miR-548t -0.128 NULL
# skip the header
targetscanScoreFile.readline()
for line in targetscanScoreFile:
if (not line.strip()):
continue
line = line.split()
if (line[13] == "NULL"):
continue
isConserved = int(line[4])
self.refSeqToScores[line[0]].append( (line[12], float(line[13]) ) )
self.refSeqToScoreSum[line[0]] += float(line[13])
self.refSeqToNumSites[line[0]] += int(line[4]) + int(line[8])
if (isConserved):
self.refSeqToCnsvScores[line[0]].append( (line[12], float(line[13]) ) )
self.refSeqToCnsvScoreSum[line[0]] += float(line[13])
self.refSeqToCnsv8merSites[line[0]] += int(line[5])
self.refSeqToCnsv7merm8Sites[line[0]] += int(line[6])
self.refSeqToCnsv7mer1aSites[line[0]] += int(line[7]) # cnsv 7mer-1a sites
self.refSeqToCnsvNumSites[line[0]] += int(line[4])
else:
self.refSeqToNoncnsvScores[line[0]].append( (line[12], float(line[13]) ) )
self.refSeqToNoncnsvScoreSum[line[0]] += float(line[13])
self.refSeqToNoncnsv8merSites[line[0]] += int(line[9])
self.refSeqToNoncnsv7merm8Sites[line[0]] += int(line[10])
self.refSeqToNoncnsv7mer1aSites[line[0]] += int(line[11]) # cnsv 7mer-1a sites
self.refSeqToNoncnsvNumSites[line[0]] += int(line[8])
#0 Transcript ID 1 Gene Symbol 2 miRNA family 3 Species ID 4 Total num conserved sites 5 Number of conserved 8mer sites 6 Number of conserved 7mer-m8 sites 7 Number of conserved 7mer-1a sites
# 8 Total num nonconserved sites 9 Number of nonconserved 8mer sites 10 Number of nonconserved 7mer-m8 sites 11 Number of nonconserved 7mer-1a sites 12 Representative miRNA 13 Total context score 14 Aggregate PCT
# compute meta properties here (min score, etc)
for key,val in self.refSeqToScores.iteritems():
self.refSeqToMinScore[key] = min([score[1] for score in val])
for key,val in self.refSeqToCnsvScores.iteritems():
self.refSeqToCnsvMinScore[key] = min([score[1] for score in val])
for key,val in self.refSeqToNoncnsvScores.iteritems():
self.refSeqToNoncnsvMinScore[key] = min([score[1] for score in val])
def getScores(self, refSeqGene):
return self.refSeqToScores[refSeqGene]
def getScoreSum(self, refSeqGene):
return self.refSeqToScoreSum[refSeqGene]
def getMinScore(self, refSeqGene):
return self.refSeqToMinScore[refSeqGene]
def getNumSites(self, refSeqGene):
return self.refSeqToNumSites[refSeqGene]
def getCnsvScores(self, refSeqGene):
return self.refSeqToCnsvScores[refSeqGene]
def getCnsvScoreSum(self, refSeqGene):
return self.refSeqToCnsvScoreSum[refSeqGene]
def getCnsvMinScore(self, refSeqGene):
return self.refSeqToCnsvMinScore[refSeqGene]
def getCnsvNumSites(self, refSeqGene):
return self.refSeqToCnsvNumSites[refSeqGene]
def getNoncnsvNumSites(self, refSeqGene):
return self.refSeqToNoncnsvNumSites[refSeqGene]
def getNoncnsvScores(self, refSeqGene):
return self.refSeqToNoncnsvScores[refSeqGene]
def getNoncnsvScoreSum(self, refSeqGene):
return self.refSeqToNoncnsvScoreSum[refSeqGene]
def getNoncnsvMinScore(self, refSeqGene):
return self.refSeqToNoncnsvMinScore[refSeqGene]
def getCnsv8merSites(self, refSeqGene):
return self.refSeqToCnsv8merSites[refSeqGene]
def getCnsv7merm8Sites(self, refSeqGene):
return self.refSeqToCnsv7merm8Sites[refSeqGene]
def getCnsv7mer1aSites(self, refSeqGene):
return self.refSeqToCnsv7mer1aSites[refSeqGene]
def getNoncnsv8merSites(self, refSeqGene):
return self.refSeqToCnsv8merSites[refSeqGene]
def getNoncnsv7merm8Sites(self, refSeqGene):
return self.refSeqToCnsv7merm8Sites[refSeqGene]
def getNoncnsv7mer1aSites(self, refSeqGene):
return self.refSeqToCnsv7mer1aSites[refSeqGene]
# print self.refSeqToScores
# print self.refSeqToScoreSum
# print self.refSeqToMinScore
# print "self.refSeqToNumSites"
# print self.refSeqToNumSites
# print "self.refSeqToCnsvScores"
# print self.refSeqToCnsvScores
# print "self.refSeqToCnsvScoreSum"
# print self.refSeqToCnsvScoreSum
# print "self.refSeqToCnsvMinScore"
# print self.refSeqToCnsvMinScore
# print "self.refSeqToNumCnsvSites"
# print self.refSeqToCnsvNumSites
# print "self.refSeqToNoncnsvNumSites"
# print self.refSeqToNoncnsvNumSites
# print "self.refSeqToNoncnsvScores"
# print self.refSeqToNoncnsvScores
# print "self.refSeqToNoncnsvScoreSum"
# print self.refSeqToNoncnsvScoreSum
# print "self.refSeqToNoncnsvMinScore"
# print self.refSeqToNoncnsvMinScore
# print "self.refSeqToCnsv8merSites"
# print self.refSeqToCnsv8merSites
# print "self.refSeqToCnsv7merm8Sites"
# print self.refSeqToCnsv7merm8Sites
# print "self.refSeqToCnsv7mer1aSites"
# print self.refSeqToCnsv7mer1aSites
# print "self.refSeqToNoncnsv8merSites"
# print self.refSeqToNoncnsv8merSites
# print "self.refSeqToNoncnsv7merm8Sites"
# print self.refSeqToNoncnsv7merm8Sites
# print "self.refSeqToNoncnsv7mer1aSites"
# print self.refSeqToNoncnsv7mer1aSites