https://github.com/galaxyproject/galaxy
Raw File
Tip revision: 95036639dd4f7bd986e93acc99d5dd1be7823286 authored by Dannon Baker on 11 April 2023, 13:10:41 UTC
Update version to 23.0.
Tip revision: 9503663
codingSnps.xml
<tool id="hgv_codingSnps" name="aaChanges" version="1.0.1">
  <description>amino-acid changes caused by a set of SNPs</description>

  <requirements>
    <requirement type="package" version="8.25">coreutils</requirement>
    <requirement type="package" version="357">ucsc-twobittofa</requirement>
    <requirement type="package" version="357">ucsc-nibfrag</requirement>
  </requirements>

  <code file="codingSnps_filter.py"></code>

  <command><![CDATA[
perl '$__tool_directory__/codingSnps.pl' '$input1' '$input2' Galaxy build=${input1.metadata.dbkey} loc=${GALAXY_DATA_INDEX_DIR}/codingSnps.loc chr=${input1.metadata.chromCol} start=${input1.metadata.startCol} end=${input1.metadata.endCol} snp=$col1 keepColumns=$keep strand=${strand_source.strand_col} unique=$uniqpos > '$out_file1'
  ]]></command>

  <inputs>
    <param format="interval" name="input1" type="data" label="SNP dataset">
      <validator type="dataset_metadata_in_file" filename="codingSnps.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are not currently available for the specified build." split="\t" />
    </param>
    <param name="col1" type="data_column" data_ref="input1" label="Column with SNPs" />
    <param format="interval" name="input2" type="data" label="Gene dataset">
      <validator type="dataset_metadata_in_file" filename="codingSnps.loc" metadata_name="dbkey" metadata_column="0" message="Sequences are not currently available for the specified build." split="\t" />
    </param>
    <param name="keep" type="select" label="Keep columns from SNP dataset">
      <option value="0" selected="true">No</option>
      <option value="1">Yes</option>
    </param>
    <param name="uniqpos" type="select" label="Only report each SNP position once">
      <option value="1" selected="true">Yes</option>
      <option value="0">No</option>
    </param>
    <conditional name="strand_source">
      <param name="strand_choice" type="select" label="Strand info">
        <option value="data_column">a column in the dataset</option>
        <option value="all_pos" selected="true">all on sense/forward/+ strand</option>
        <option value="all_neg">all on antisense/reverse/- strand</option>
      </param>
      <when value="data_column">
        <param name="strand_col" type="data_column" data_ref="input1" label="Column with strand"/>
      </when>
      <when value="all_pos">
        <param name="strand_col" type="hidden" value="+"/>
      </when>
      <when value="all_neg">
        <param name="strand_col" type="hidden" value="-"/>
      </when>
    </conditional>
  </inputs>

  <outputs>
    <data format="interval" name="out_file1" />
  </outputs>

  <tests>
    <test>
      <param name="input1" ftype="interval" value="codingSnps_input1.interval" dbkey="hg18" />
      <param name="col1" value="6" />
      <param name="input2" ftype="interval" value="codingSnps_inputGenes1.bed" dbkey="hg18" />
      <param name="strand_choice" value="all_pos" />
      <param name="strand_col" value="+" />
      <param name="uniqpos" value="0" />
      <output name="out_file1" file="codingSnps_output1.interval" />
    </test>
    <test>
      <param name="input1" ftype="interval" value="codingSnps_input2.interval" dbkey="hg18" />
      <param name="input2" ftype="interval" value="codingSnps_inputGenes2.bed" dbkey="hg18" />
      <param name="col1" value="4" />
      <param name="strand_choice" value="all_pos" />
      <param name="strand_col" value="+" />
      <param name="uniqpos" value="0" />
      <output name="out_file1" file="codingSnps_output2.interval" />
    </test>
    <test>
      <param name="input1" ftype="interval" value="codingSnps_input2.interval" dbkey="hg18" />
      <param name="input2" ftype="interval" value="codingSnps_inputGenes2.bed" dbkey="hg18" />
      <param name="col1" value="4" />
      <param name="strand_choice" value="all_neg" />
      <param name="strand_col" value="-" />
      <output name="out_file1" file="codingSnps_output3.interval" />
    </test>
  </tests>

  <help>
.. class:: infomark

The build must be defined for the input files and must be the same for both files.
Use the pencil icon to add the build to the files if necessary.

-----

**Dataset formats**

The SNP dataset is in interval_ format, with a column of SNPs as described below.
The gene dataset is in BED_ format with 12 columns.  The output dataset is also interval.
(`Dataset missing?`_)

.. _interval: ${static_path}/formatHelp.html#interval
.. _BED: ${static_path}/formatHelp.html#bed
.. _Dataset missing?: ${static_path}/formatHelp.html

-----

**What it does**

This tool identifies which SNPs create amino-acid changes in the specified 
coding regions.  The first input file contains the SNPs and must be an interval file.
It needs the chromosome, start, and end position as well as the SNP.  The 
SNP can be given using ambiguous-nucleotide symbols or a list of two to four
alleles 
separated by '/'.  Any other columns in the first input file will not be
used but will be kept for the output.  The second input file contains the genes
to be used for defining the coding regions.  This file must be a BED file with
the first 12 columns standard BED columns.  The output is the same as the
first input file with
several columns added: the name field from the line of the gene input file
used, the amino acids, the codon number, the reference nucleotide that 
changed in the amino acid (in the same strand as the gene), and the codons 
that go with the amino acids.
The amino acids are listed with the reference amino acid first, then a colon,
and then the amino acids for the alleles.  If a SNP is not in a coding region
or is synonymous then it is not included in the output file.

-----

**Example**

- first input file, with SNPs::

    chr22  15660821  15660822  A/G
    chr22  15825725  15825726  G/T
    chr22  15827035  15827036  G
    chr22  15827135  15827136  C/G
    chr22  15830928  15830929  A/G
    chr22  15830951  15830952  G
    chr22  15830955  15830956  C/T
    chr22  15848885  15848886  C/T
    chr22  15849048  15849049  A/C
    chr22  15919711  15919712  A/G
    etc.

  or, indicating polymorphisms using ambiguous-nucleotide symbols::

    chr22  15660821  15660822  R
    chr22  15825725  15825726  K
    chr22  15827035  15827036  G
    chr22  15827135  15827136  S
    chr22  15830928  15830929  R
    chr22  15830951  15830952  G
    chr22  15830955  15830956  Y
    chr22  15848885  15848886  Y
    chr22  15849048  15849049  M
    chr22  15919711  15919712  R
    etc.

- second input file, with UCSC annotations for human genes::

    chr22  15688363  15690225  uc010gqr.1  0  +  15688363  15688363  0  2   587,794,  0,1068,
    chr22  15822826  15869112  uc002zlw.1  0  -  15823622  15869004  0  10  940,105,97,91,265,86,251,208,304,282,  0,1788,2829,3241,4163,6361,8006,26023,29936,46004,
    chr22  15826991  15869112  uc010gqs.1  0  -  15829218  15869004  0  5   1380,86,157,304,282,  0,2196,21858,25771,41839,
    chr22  15897459  15919682  uc002zlx.1  0  +  15897459  15897459  0  4   775,128,103,1720,  0,8303,10754,20503,
    chr22  15945848  15971389  uc002zly.1  0  +  15945981  15970710  0  13  271,25,147,113,127,48,164,84,85,12,102,42,2193,  0,12103,12838,13816,15396,17037,17180,18535,19767,20632,20894,22768,23348,
    etc.

- output file, showing non-synonymous substitutions in coding regions::

    chr22  15825725  15825726  G/T  uc002zlw.1  Gln:Pro/Gln   469  A  CAA:CCA/CAA
    chr22  15827035  15827036  G    uc002zlw.1  Glu:Asp       414  G  GAG:GAC
    chr22  15827135  15827136  C/G  uc002zlw.1  Gly:Gly/Ala   381  G  GGT:GGT/GCT
    chr22  15830928  15830929  A/G  uc002zlw.1  Ala:Ser/Pro   281  G  GCA:TCA/CCA
    chr22  15830951  15830952  G    uc002zlw.1  Leu:Pro       273  T  CTT:CCT
    chr22  15830955  15830956  C/T  uc002zlw.1  Ser:Gly/Ser   272  A  AGC:GGC/AGC
    chr22  15848885  15848886  C/T  uc002zlw.1  Ser:Trp/Stop  217  C  TCG:TGG/TAG
    chr22  15848885  15848886  C/T  uc010gqs.1  Ser:Trp/Stop  200  C  TCG:TGG/TAG
    chr22  15849048  15849049  A/C  uc002zlw.1  Gly:Stop/Gly  163  G  GGA:TGA/GGA
    etc.
  </help>
</tool>
back to top