count-matrix.py
import pandas as pd
def get_column(strandedness):
if pd.isnull(strandedness) or strandedness == "none":
return 1 #non stranded protocol
elif strandedness == "yes":
return 2 #3rd column
elif strandedness == "reverse":
return 3 #4th column, usually for Illumina truseq
else:
raise ValueError(("'strandedness' column should be empty or have the "
"value 'none', 'yes' or 'reverse', instead has the "
"value {}").format(repr(strandedness)))
counts = [pd.read_table(f, index_col=0, usecols=[0, get_column(strandedness)],
header=None, skiprows=4)
for f, strandedness in zip(snakemake.input, snakemake.params.strand)]
for t, sample in zip(counts, snakemake.params.samples):
t.columns = [sample]
matrix = pd.concat(counts, axis=1)
matrix.index.name = "gene"
# collapse technical replicates
matrix = matrix.groupby(matrix.columns, axis=1).sum()
matrix.to_csv(snakemake.output[0], sep="\t")