ConfigurationΒΆ

The following is the example configuration file used for running tests:

sampletable: pasilla_sampletable.tsv
sample_dir: pasilla
assembly: dm6
data_dir: references-test

rawLevel: 'pasilla/{sampleID}/{sampleID}_R1'
runLevel: 'pasilla/{sampleID}/{sampleID}_R1'
sampleLevel: 'pasilla/{sampleID}/{sampleID}'
aggLevel: 'pasilla/agg/{treatment}'

shell_prefix: "source lcdb-workflows.bashrc; "
cluster_default:
  args: "--time=04:00:00"

rules:
  cutadapt:
    params:
      extra: '-a file:adapters.fa --quality-cutoff 20 --minimum-length 25 --overlap 10'

  align:
    aligner: hisat2
    prefix: dm6/hisat2/dm6_default
    params:
      extra: '--max-intronlen=5000'
    threads: 8
    cluster:
      args: "--mem=32g"

  fastqc:
    params:
      extra: ""

  fastq_screen:
    indexes:
      ecoli: ecoli/bowtie2/ecoli_default
      wolbachia: wolbachia/bowtie2/wolbachia_default
      dm6: dm6/bowtie2/dm6_default
      sacCer3: sacCer3/bowtie2/sacCer3_default
    aligner: bowtie2
    cluster:
      args: "--mem=32g"


  samtools_unique:
    params:
      extra: '-q 20 -b'

  picard_MarkDuplicates:
    params:
      extra: "REMOVE_DUPLICATES=true VALIDATION_STRINGENCY=SILENT"
    cluster:
      args: "--mem=32g"

references:

  -
    assembly: 'sacCer3'
    type: 'fasta'
    url: 'http://hgdownload.cse.ucsc.edu/goldenPath/sacCer3/bigZips/chromFa.tar.gz'
    postprocess: 'sacCer3.fasta_postprocess'
    indexes:
        - 'bowtie2'

  -
    assembly: 'dm6'
    type: 'gtf'
    url: 'ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/current/gtf/dmel-all-r6.11.gtf.gz'
    postprocess: "dm6.gtf_postprocess"
    tag: 'r6-11'
    conversions:
      - 'intergenic'
  -
    assembly: 'dm6'
    url: 'ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/current/fasta/dmel-all-chromosome-r6.11.fasta.gz'
    postprocess: "dm6.fasta_postprocess"
    type: 'fasta'
    indexes:
      - 'bowtie2'
      - 'hisat2'
  -
    assembly: 'dm6'
    type: 'fasta'
    tag: 'r6-11_transcriptome'
    url: 'ftp://ftp.flybase.net/genomes/Drosophila_melanogaster/current/fasta/dmel-all-transcript-r6.11.fasta.gz'
    indexes:
      - 'kallisto'
  -
    assembly: 'hg19'
    type: 'fasta'
    url: 'ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/GRCh37.p13.genome.fa.gz'
    indexes:
      - 'bowtie2'
      - 'hisat2'
  -
    assembly: 'hg19'
    type: 'gtf'
    url: 'ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.annotation.gtf.gz'
    tag: 'gencode-v19'
  -
    assembly: 'hg19'
    type: 'fasta'
    tag: 'gencode_v19_plus_lncRNA_transcriptome'
    url:
      - 'ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.pc_transcripts.fa.gz'
      - 'ftp://ftp.sanger.ac.uk/pub/gencode/Gencode_human/release_19/gencode.v19.lncRNA_transcripts.fa.gz'
    postprocess: "hg19.plus_lncrna_fasta_postprocess"
    indexes:
      - 'kallisto'
  -
    assembly: 'ecoli'
    type: 'fasta'
    url: 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF_000005845.2_ASM584v2/GCF_000005845.2_ASM584v2_genomic.fna.gz'
    indexes:
      - 'bowtie2'
  -
    assembly: 'wolbachia'
    type: 'fasta'
    url: 'ftp://ftp.ncbi.nlm.nih.gov/genomes/all/GCF_000008025.1_ASM802v1/GCF_000008025.1_ASM802v1_genomic.fna.gz'
    indexes:
      - 'bowtie2'