Skip to main content
  • Home
  • Development
  • Documentation
  • Donate
  • Operational login
  • Browse the archive

swh logo
SoftwareHeritage
Software
Heritage
Archive
Features
  • Search

  • Downloads

  • Save code now

  • Add forge now

  • Help

https://github.com/kussell-lab/ReferenceAlignmentGenerator
18 February 2022, 11:37:47 UTC
  • Code
  • Branches (3)
  • Releases (0)
  • Visits
    • Branches
    • Releases
    • HEAD
    • refs/heads/2019natmethods
    • refs/heads/2021biorxiv
    • refs/heads/master
    No releases to show
  • c9e6ce9
  • /
  • GenomicConsensus
  • /
  • main.go
Raw File Download
Take a new snapshot of a software origin

If the archived software origin currently browsed is not synchronized with its upstream version (for instance when new commits have been issued), you can explicitly request Software Heritage to take a new snapshot of it.

Use the form below to proceed. Once a request has been submitted and accepted, it will be processed as soon as possible. You can then check its processing state by visiting this dedicated page.
swh spinner

Processing "take a new snapshot" request ...

Permalinks

To reference or cite the objects present in the Software Heritage archive, permalinks based on SoftWare Hash IDentifiers (SWHIDs) must be used.
Select below a type of object currently browsed in order to display its associated SWHID and permalink.

  • content
  • directory
  • revision
  • snapshot
origin badgecontent badge Iframe embedding
swh:1:cnt:7e8e30667fa624202890998a59028a4a2f12192e
origin badgedirectory badge Iframe embedding
swh:1:dir:c16a46936fdbcb811d2df27bfcea5befb7100b7b
origin badgerevision badge
swh:1:rev:49bf399a9d1ceae722c0c8c4aeb8376f2644d40f
origin badgesnapshot badge
swh:1:snp:14ae65545ac5d89be3e3e69d3ac6b8b27db9c01e
Citations

This interface enables to generate software citations, provided that the root directory of browsed objects contains a citation.cff or codemeta.json file.
Select below a type of object currently browsed in order to generate citations for them.

  • content
  • directory
  • revision
  • snapshot
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Generate software citation in BibTex format (requires biblatex-software package)
Generating citation ...
Tip revision: 49bf399a9d1ceae722c0c8c4aeb8376f2644d40f authored by Asher Preska Steinberg on 07 February 2022, 16:12:20 UTC
Commit for ReduceCoreGenome
Tip revision: 49bf399
main.go
// GenomicConsensus converts a samtools pileup file to the genomic sequence.
// Created by Mingzhi Lin (mingzhi9@gmail.com).
package main

import (
	"bytes"
	"io"
	"log"
	"os"

	"github.com/alecthomas/kingpin"
	"github.com/kussell-lab/biogo/pileup"
	"github.com/kussell-lab/biogo/seq"
)

func main() {
	var pileFile = kingpin.Flag("pileup_file", "pileup file (default reading from stdin).").Default("").String()
	var baseQual = kingpin.Flag("base_qual", "base quality").Default("30").Int8()
	var outFile = kingpin.Flag("out_file", "output sequence file in FASTA format (default writing to stdout).").Default("").String()
	var fnaFile = kingpin.Flag("fna_file", "reference sequences file").Required().String()
	kingpin.Parse()

	ss := readFastas(*fnaFile)
	seqLens := getSequenceLengths(ss)
	snpChan := readPileup(*pileFile)
	baseChan := pileupSupport(snpChan, *baseQual)
	seqChan := generateSequence(baseChan, seqLens)
	writeFasta(seqChan, *outFile)
}

// openFile is a helper to open a file.
func openFile(fileName string) (f *os.File) {
	var err error
	if fileName == "" {
		f = os.Stdin
		return
	}

	f, err = os.Open(fileName)
	if err != nil {
		log.Fatalf("Error when openning file %s: %v", fileName, err)
	}
	return
}

// createFile is a helper to open a file.
func createFile(fileName string) (f *os.File) {
	var err error
	if fileName == "" {
		f = os.Stdout
		return
	}

	f, err = os.Create(fileName)
	if err != nil {
		log.Fatalf("Error when openning file %s: %v", fileName, err)
	}
	return
}

// readPileup returns a channel of *pileup.SNP.
func readPileup(fileName string) chan *pileup.SNP {
	c := make(chan *pileup.SNP)
	go func() {
		defer close(c)

		f := openFile(fileName)
		defer f.Close()

		r := pileup.NewReader(f)
		for {
			snp, err := r.Read()
			if err != nil {
				if err != io.EOF {
					log.Fatalf("Error when parsing pileup file: %v", err)
				}
				break
			}
			c <- snp
		}
	}()
	return c
}

// Base is a record store the position, ref, alt base and its supports.
type Base struct {
	Ref     string
	Pos     int
	RefBase byte
	AltBase byte
}

// pileupSupport filter and return SNP calls from a pileup source.
func pileupSupport(c chan *pileup.SNP, baseQual int8) chan Base {
	baseChan := make(chan Base)

	go func() {
		defer close(baseChan)
		letters := []byte{'a', 'A', 'c', 'C', 'g', 'G', 't', 'T'}

		for s := range c {
			cc := make([]int, len(letters))
			for i, b := range s.Bases {
				if int8(s.Quals[i]) >= baseQual {
					index := bytes.IndexByte(letters, b)
					if index >= 0 {
						cc[index]++
					}
				}
			}

			dp := 0
			for _, c := range cc {
				dp += c
			}

			var baseCalled byte = '-'
			for i := 0; i < 4; i++ {
				n := cc[i*2] + cc[i*2+1]
				if float64(n)/float64(dp) >= 0.75 && cc[i*2] >= 2 && cc[i*2+1] >= 2 {
					baseCalled = byte(letters[i*2+1])
				}
			}

			if baseCalled != '-' {
				refBase := bytes.ToUpper([]byte{s.RefBase})[0]
				altBase := bytes.ToUpper([]byte{baseCalled})[0]
				b := Base{
					Ref:     s.Reference,
					Pos:     s.Position,
					RefBase: refBase,
					AltBase: altBase,
				}
				baseChan <- b
			}
		}
	}()
	return baseChan
}

// generateSequence generates a sequence from the SNP chan.
func generateSequence(baseChan chan Base, seqLens map[string]int) chan seq.Sequence {
	seqChan := make(chan seq.Sequence)
	go func() {
		defer close(seqChan)
		currentRef := ""
		ss := []byte{}
		for b := range baseChan {
			ref := b.Ref
			if ref != currentRef {
				if len(ss) > 0 {
					l := seqLens[currentRef]
					for len(ss) < l {
						ss = append(ss, '-')
					}
					s := seq.Sequence{}
					s.Id = currentRef
					s.Name = currentRef
					s.Seq = ss
					seqChan <- s
				}
				currentRef = ref
				ss = []byte{}
			}
			pos := b.Pos
			for len(ss) < pos-1 {
				ss = append(ss, '-')
			}
			ss = append(ss, b.AltBase)
		}

		if len(ss) > 0 {
			l := seqLens[currentRef]
			for len(ss) < l {
				ss = append(ss, '-')
			}
			s := seq.Sequence{}
			s.Id = currentRef
			s.Name = currentRef
			s.Seq = ss
			seqChan <- s
		}
	}()
	return seqChan
}

// writeFasta write genomes into a fasta file.
func writeFasta(seqChan chan seq.Sequence, file string) {
	f := createFile(file)
	defer f.Close()

	for s := range seqChan {
		f.WriteString(">" + s.Id + "\n")
		bases := s.Seq
		numOfLines := len(bases)/80 + 1
		for i := 0; i < numOfLines; i++ {
			start := i * 80
			end := (i + 1) * 80
			if end > len(bases) {
				end = len(bases)
			}

			if start < len(bases) {
				f.WriteString(string(bases[start:end]) + "\n")
			}
		}
	}
}

// readFastas returns a genome sequence.
func readFastas(file string) []*seq.Sequence {
	f, err := os.Open(file)
	if err != nil {
		panic(err)
	}
	defer f.Close()
	fastaReader := seq.NewFastaReader(f)
	s, err := fastaReader.ReadAll()
	if err != nil {
		log.Fatalf("Error when reading fasta file: %v", err)
	}
	return s
}

// getSequenceLengths returns a map of sequence name to its length.
func getSequenceLengths(ss []*seq.Sequence) map[string]int {
	m := make(map[string]int)
	for _, s := range ss {
		m[s.Id] = len(s.Seq)
	}
	return m
}

Software Heritage — Copyright (C) 2015–2025, The Software Heritage developers. License: GNU AGPLv3+.
The source code of Software Heritage itself is available on our development forge.
The source code files archived by Software Heritage are available under their own copyright and licenses.
Terms of use: Archive access, API— Contact— JavaScript license information— Web API

back to top