@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix np: <http://www.nanopub.org/nschema#> .
@prefix npx: <http://purl.org/nanopub/x/> .
@prefix ns1: <https://w3id.org/np/snakemake/> .
@prefix orcid: <https://orcid.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix sub1: <http://purl.org/np/RABclDqteji-4kj5nx9JM9QvuMB5UID5F_FNZESKNjUps#> .
@prefix this: <http://purl.org/np/RABclDqteji-4kj5nx9JM9QvuMB5UID5F_FNZESKNjUps> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sub1:Head {
  this: np:hasAssertion sub1:assertion;
    np:hasProvenance sub1:provenance;
    np:hasPublicationInfo sub1:pubinfo;
    a np:Nanopublication .
}

sub1:assertion {
  sub1:config-1 dcterms:identifier "/fshpc/meesters/projects/snakemake-workflows/rna-longseq-de-isoform/config/Drosophila_trl/config.yml";
    schema:text """## General Workflow Parameters:

# sample information and experimental design
samples: samples.csv

## Workflow-specific Parameters:
# Define reference genome/transcriptome
ref:
    species: \"Drosophila melanogaster\"
    # Local reference data
    # Genome file path (supported extensions: .fa, .fna, .fasta, case-insensitive)
    # may be a path or left empty, if download using an accession number is preferred
    genome: \"\"
    # Annotation file (supported extensions: .gff, .gtf, case-insensitive)
    # may be a path or left empty, if download using an accession number is preferred
    annotation: \"\"
    # Remote reference data
    # NCBI accession number of the reference data set; can be left empty if both reference files are available locally\"
    #accession: \"GCF_000001215.2\"
    accession: \"GCF_000001215.4\"
    ensembl_species: \"\"  # e.g., \"homo_sapiens\"
    build: \"\"  # e.g., \"GRCh38\"
    release: \"\"  # e.g., \"105\"

read_filter:
    # Minimum read length; set 0 to keep all reads.
    min_length: 200

# minimap2 alignment parameters
minimap2:
    # Minimap2 indexing options
    index_opts: \"\"
    # Minimap2 mapping options
    opts: \"\"
    # Maximum secondary alignments
    maximum_secondary: 100
    # Secondary score ratio (-p for minimap2)
    secondary_score_ratio: 1.0

# samtools processing parameters
samtools:
    # Samtools view opts, \"-b\" creates BAM from SAM.
    samtobam_opts: \"-b\"
    # Samtools sort opts,
    bamsort_opts: \"\"
    # Samtools index opts,
    bamindex_opts: \"\"
    # Samtools stats opts
    bamstats_opts: \"\"

# salmon quantification parameters
quant:
    # Salmon library type (Default: U)
    salmon_libtype: \"U\"

# This section defines the pyDESeq2 plot and data handling parameters
deseq2:
    # normalization fit type, must be 'parametric' or 'mean'
    fit_type: \"\"
    # the \"design factors\" are the confounding variables to be adjusted for
    # during normalization. They must be given in the configuration (samples.csv).
    design_factors:
        - \"condition\"
    #
    # the \"continuous factors\" are non-categorial factors to be considered
    #continuous_factors:
    #    -
    #
    # The (log2) log fold change under the null hypothesis. (default: 0).
    lfc_null: 1.0
    #
    # The alternative hypothesis for computing wald p-values. By default,
    # the normal Wald test assesses deviation of the estimated log fold
    # change from the null hypothesis, as given by lfc_null.
    # One of [\"greaterAbs\", \"lessAbs\", \"greater\", \"less\"] or None.
    # The alternative hypothesis corresponds to what the user wants to
    # find rather than the null hypothesis. (default: None).
    alt_hypothesis: \"greaterAbs\"
    #
    # The marker size in points**2 (typographic points are 1/72 in.).
    # Default is rcParams['lines.markersize'] ** 2.# minimum count to
    # be considered for subsequent analysis
    point_width: 20
    #
    # we disregard loci with count number lower 'mincount'
    mincount: 10
    #
    # Type I error cutoff value:
    alpha: 0.05
    #
    # in addition to the full heatmap, plot the top number of different
    # values, ranked by the top ratio between the two traits
    threshold_plot: 10
    #
    # the heatmap color map
    # see https://seaborn.pydata.org/tutorial/color_palettes.htm for an overview
    colormap: \"Blues\"
    #plot figure type
    figtype: \"png\"
    batch_effect: 
      - \"\"
    #
## Differential Isoform Analysis

# The FLAIR splice-isoform analysis pipeline includes resource-intensive computations and only works with additional constraints.
#     1. In 'samples.csv: The 'condition' column must contain exactly two distinct values. For example 'control' and 'treated'.
#     2. In 'samples.csv: Refrain from using underscores when naming samples. The 'sample' column may contain underscores, but be aware that underscores will be removed from the name for isoform quantification steps.
#     3. In this file: the variable 'FLAIR' below must be:'true'. This is a check to determine if users are aware of the constraints and wish to proceed.
isoform_analysis:
    # Enables FLAIR Isoform Analysis if 'true'
    FLAIR: true
    # Minimum MAPQ of read assignment to an isoform (default: 1).
    qscore: 1
    # min read count expression threshold. Isoforms which contain fewer than 'exp_thresh' (Default=10) reads in both conditions are filtered out.
    exp_thresh: 10
    # 'flair_collapse' options
    # '--annotation-reliant' makes FLAIR align reads to the annotation before identifying novel transcripts for the remaining reads
    # '--generate-map' to generate a txt file of read-isoform assignments
    # '--stringent' for full-length supporting reads (>=80% coverage)
    col_opts: \"--annotation_reliant generate --generate_map --stringent\"


# Query genes to identify similar proteins using \"lambda\"
protein_annotation:
    # Enables lambda sequence alignment if 'true'
    lambda: false
    # Pre-formatted UniProt Reference Cluster database (default: UniRef50)
    uniref: \"https://ftp.imp.fu-berlin.de/pub/lambda/index/lambda3/gen_0/uniref50_20230713.lba.gz\"
    # maximum number of protein matches returned per sequence (default: 3)
    num_matches: 3

# Enrichment Analysis Parameters
#enrichment:
#    # Enable enrichment analysis if 'true'
#    perform_enrichment: true
#    # minimum number of genes to consider per pathway (default: 3)
#    min_genes: 3
""" .
  
  sub1:dataset a schema:Dataset;
    ns1:describesWorkflow <https://w3id.org/np/RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE>;
    ns1:description """<div class=\"document\">
<blockquote>
This workflow performs differential expression analysis of RNA-seq data obtained from Oxford Nanopore long-read sequencing technology.
First a transcriptome FASTA is constructed using <a class=\"reference external\" href=\"https://github.com/gpertea/gffread\">gffread</a>. Reads are then mapped to the transcriptome with the long-read optimized alignment tool <a class=\"reference external\" href=\"https://github.com/lh3/minimap2\">minimap2</a>.
Next quantification is performed using <a class=\"reference external\" href=\"https://github.com/COMBINE-lab/salmon\">salmon</a> before normalization and differential expression analysis are conducted by <a class=\"reference external\" href=\"https://github.com/owkin/PyDESeq2\">PyDESeq2</a>.
The workflow can optionally analyze splice-isoforms through integrating the <a class=\"reference external\" href=\"https://github.com/BrooksLabUCSC/flair\">FLAIR</a> workflow.
Additionaly, <a class=\"reference external\" href=\"https://github.com/wdecoster/NanoPlot\">NanoPlot</a> is employed to analyze initial sequencing data and <a class=\"reference external\" href=\"https://github.com/EagleGenomics-cookbooks/QualiMap\">QualiMap</a> is used to evaluate mapping results.</blockquote>
</div>
""";
    ns1:generatedAt "2026-04-16T11:36:17.403465+00:00"^^xsd:dateTime;
    ns1:hasConfigurationSection sub1:workflow-configuration .
  
  sub1:workflow-configuration rdfs:label "from workflow configuration";
    ns1:hasConfigurationFile sub1:config-1 .
}

sub1:provenance {
  sub1:assertion prov:generatedAtTime "2026-04-16T13:36:18.795233"^^xsd:dateTime;
    prov:wasAttributedTo orcid:0000-0003-2408-7588 .
}

sub1:pubinfo {
  sub1:sig npx:hasAlgorithm "RSA";
    npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAkOFUnnRCp/k9/0ugvx8zQJ+Qc675W1Ug6F839+xvJ2QsSBu4iOJ1O2kJCmb3tALp3gJOt8sffRot3VrfY1hbgXxL7BVtDsfHHmXVff4YCeg5Ycdn5cDpLawDpAdYwMhK0LwIkZ3fwH9/o9JniYKXLV/jpF9bMKyiw/6tqlCHaMW1r8gzZzoxVIAakwvlABoY0iNoToLTlBRXEI4mLUNjDMnMwQgfh1KXMxMruNjW3wJyeDEIfa2ooAt0E4CRM9pkrEb37NzD9Jz8aSUFFY6BvIxF4ixK7rm6IUDvQ76LqXkEmgSeRv1kw7gnCe9wV/wHd0ZeW4heoBXmLHX3MvHfjwIDAQAB";
    npx:hasSignature "GILSOm9j4/YZbOPETN7Q8HnmVlg4JkpZwkxxbSGrZU7K9xcaeO95D2Rxs1eyByTKLvbzOr8pIT7Qy6fsHPWyXjotR2inWRfrJtlgMWGe9aacFbs6KQSIQQNkOHgf0hn2bXjXu1DQqi2p1n/GkzvfFoRumRnjeQbEYZ8vbcXsdVobO7dDYAo7seYfYrBUocniDE70hHpEQHd3kBeBLuHEz5JkKAkcv8BK/tmAarwkGXArPGg0WfHmryivyl7sN3bHtVoU9P7mmsdmpXBC4xhQWxLPvqxT1PA5QcMk2+V6zraEh0eXcnzG9Z33O48mpDoHlOxySPEMgMtWCTbYro8h6Q==";
    npx:hasSignatureTarget this: .
  
  this: dcterms:created "2026-04-16T11:36:17.403465+00:00"^^xsd:dateTime;
    dcterms:creator orcid:0000-0003-2408-7588;
    npx:hasNanopubType schema:Dataset;
    rdfs:label "Snakemake workflow metadata: https://w3id.org/np/RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE";
    prov:generatedAtTime "2026-04-16T13:36:18.795233"^^xsd:dateTime;
    prov:wasAttributedTo orcid:0000-0003-2408-7588 .
}