@prefix dcterms: . @prefix np: . @prefix npx: . @prefix ns1: . @prefix orcid: . @prefix prov: . @prefix rdfs: . @prefix schema: . @prefix sub1: . @prefix this: . @prefix xsd: . sub1:Head { this: np:hasAssertion sub1:assertion; np:hasProvenance sub1:provenance; np:hasPublicationInfo sub1:pubinfo; a np:Nanopublication . } sub1:assertion { sub1:config-1 dcterms:identifier "/fshpc/meesters/projects/snakemake-workflows/rna-longseq-de-isoform/config/Drosophila_trl/config.yml"; schema:text """## General Workflow Parameters: # sample information and experimental design samples: samples.csv ## Workflow-specific Parameters: # Define reference genome/transcriptome ref: species: \"Drosophila melanogaster\" # Local reference data # Genome file path (supported extensions: .fa, .fna, .fasta, case-insensitive) # may be a path or left empty, if download using an accession number is preferred genome: \"\" # Annotation file (supported extensions: .gff, .gtf, case-insensitive) # may be a path or left empty, if download using an accession number is preferred annotation: \"\" # Remote reference data # NCBI accession number of the reference data set; can be left empty if both reference files are available locally\" #accession: \"GCF_000001215.2\" accession: \"GCF_000001215.4\" ensembl_species: \"\" # e.g., \"homo_sapiens\" build: \"\" # e.g., \"GRCh38\" release: \"\" # e.g., \"105\" read_filter: # Minimum read length; set 0 to keep all reads. min_length: 200 # minimap2 alignment parameters minimap2: # Minimap2 indexing options index_opts: \"\" # Minimap2 mapping options opts: \"\" # Maximum secondary alignments maximum_secondary: 100 # Secondary score ratio (-p for minimap2) secondary_score_ratio: 1.0 # samtools processing parameters samtools: # Samtools view opts, \"-b\" creates BAM from SAM. samtobam_opts: \"-b\" # Samtools sort opts, bamsort_opts: \"\" # Samtools index opts, bamindex_opts: \"\" # Samtools stats opts bamstats_opts: \"\" # salmon quantification parameters quant: # Salmon library type (Default: U) salmon_libtype: \"U\" # This section defines the pyDESeq2 plot and data handling parameters deseq2: # normalization fit type, must be 'parametric' or 'mean' fit_type: \"\" # the \"design factors\" are the confounding variables to be adjusted for # during normalization. They must be given in the configuration (samples.csv). design_factors: - \"condition\" # # the \"continuous factors\" are non-categorial factors to be considered #continuous_factors: # - # # The (log2) log fold change under the null hypothesis. (default: 0). lfc_null: 1.0 # # The alternative hypothesis for computing wald p-values. By default, # the normal Wald test assesses deviation of the estimated log fold # change from the null hypothesis, as given by lfc_null. # One of [\"greaterAbs\", \"lessAbs\", \"greater\", \"less\"] or None. # The alternative hypothesis corresponds to what the user wants to # find rather than the null hypothesis. (default: None). alt_hypothesis: \"greaterAbs\" # # The marker size in points**2 (typographic points are 1/72 in.). # Default is rcParams['lines.markersize'] ** 2.# minimum count to # be considered for subsequent analysis point_width: 20 # # we disregard loci with count number lower 'mincount' mincount: 10 # # Type I error cutoff value: alpha: 0.05 # # in addition to the full heatmap, plot the top number of different # values, ranked by the top ratio between the two traits threshold_plot: 10 # # the heatmap color map # see https://seaborn.pydata.org/tutorial/color_palettes.htm for an overview colormap: \"Blues\" #plot figure type figtype: \"png\" batch_effect: - \"\" # ## Differential Isoform Analysis # The FLAIR splice-isoform analysis pipeline includes resource-intensive computations and only works with additional constraints. # 1. In 'samples.csv: The 'condition' column must contain exactly two distinct values. For example 'control' and 'treated'. # 2. In 'samples.csv: Refrain from using underscores when naming samples. The 'sample' column may contain underscores, but be aware that underscores will be removed from the name for isoform quantification steps. # 3. In this file: the variable 'FLAIR' below must be:'true'. This is a check to determine if users are aware of the constraints and wish to proceed. isoform_analysis: # Enables FLAIR Isoform Analysis if 'true' FLAIR: true # Minimum MAPQ of read assignment to an isoform (default: 1). qscore: 1 # min read count expression threshold. Isoforms which contain fewer than 'exp_thresh' (Default=10) reads in both conditions are filtered out. exp_thresh: 10 # 'flair_collapse' options # '--annotation-reliant' makes FLAIR align reads to the annotation before identifying novel transcripts for the remaining reads # '--generate-map' to generate a txt file of read-isoform assignments # '--stringent' for full-length supporting reads (>=80% coverage) col_opts: \"--annotation_reliant generate --generate_map --stringent\" # Query genes to identify similar proteins using \"lambda\" protein_annotation: # Enables lambda sequence alignment if 'true' lambda: false # Pre-formatted UniProt Reference Cluster database (default: UniRef50) uniref: \"https://ftp.imp.fu-berlin.de/pub/lambda/index/lambda3/gen_0/uniref50_20230713.lba.gz\" # maximum number of protein matches returned per sequence (default: 3) num_matches: 3 # Enrichment Analysis Parameters #enrichment: # # Enable enrichment analysis if 'true' # perform_enrichment: true # # minimum number of genes to consider per pathway (default: 3) # min_genes: 3 """ . sub1:dataset a schema:Dataset; ns1:describesWorkflow ; ns1:description """
This workflow performs differential expression analysis of RNA-seq data obtained from Oxford Nanopore long-read sequencing technology. First a transcriptome FASTA is constructed using gffread. Reads are then mapped to the transcriptome with the long-read optimized alignment tool minimap2. Next quantification is performed using salmon before normalization and differential expression analysis are conducted by PyDESeq2. The workflow can optionally analyze splice-isoforms through integrating the FLAIR workflow. Additionaly, NanoPlot is employed to analyze initial sequencing data and QualiMap is used to evaluate mapping results.
"""; ns1:generatedAt "2026-04-16T11:36:17.403465+00:00"^^xsd:dateTime; ns1:hasConfigurationSection sub1:workflow-configuration . sub1:workflow-configuration rdfs:label "from workflow configuration"; ns1:hasConfigurationFile sub1:config-1 . } sub1:provenance { sub1:assertion prov:generatedAtTime "2026-04-16T13:36:18.795233"^^xsd:dateTime; prov:wasAttributedTo orcid:0000-0003-2408-7588 . } sub1:pubinfo { sub1:sig npx:hasAlgorithm "RSA"; npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAkOFUnnRCp/k9/0ugvx8zQJ+Qc675W1Ug6F839+xvJ2QsSBu4iOJ1O2kJCmb3tALp3gJOt8sffRot3VrfY1hbgXxL7BVtDsfHHmXVff4YCeg5Ycdn5cDpLawDpAdYwMhK0LwIkZ3fwH9/o9JniYKXLV/jpF9bMKyiw/6tqlCHaMW1r8gzZzoxVIAakwvlABoY0iNoToLTlBRXEI4mLUNjDMnMwQgfh1KXMxMruNjW3wJyeDEIfa2ooAt0E4CRM9pkrEb37NzD9Jz8aSUFFY6BvIxF4ixK7rm6IUDvQ76LqXkEmgSeRv1kw7gnCe9wV/wHd0ZeW4heoBXmLHX3MvHfjwIDAQAB"; npx:hasSignature "GILSOm9j4/YZbOPETN7Q8HnmVlg4JkpZwkxxbSGrZU7K9xcaeO95D2Rxs1eyByTKLvbzOr8pIT7Qy6fsHPWyXjotR2inWRfrJtlgMWGe9aacFbs6KQSIQQNkOHgf0hn2bXjXu1DQqi2p1n/GkzvfFoRumRnjeQbEYZ8vbcXsdVobO7dDYAo7seYfYrBUocniDE70hHpEQHd3kBeBLuHEz5JkKAkcv8BK/tmAarwkGXArPGg0WfHmryivyl7sN3bHtVoU9P7mmsdmpXBC4xhQWxLPvqxT1PA5QcMk2+V6zraEh0eXcnzG9Z33O48mpDoHlOxySPEMgMtWCTbYro8h6Q=="; npx:hasSignatureTarget this: . this: dcterms:created "2026-04-16T11:36:17.403465+00:00"^^xsd:dateTime; dcterms:creator orcid:0000-0003-2408-7588; npx:hasNanopubType schema:Dataset; rdfs:label "Snakemake workflow metadata: https://w3id.org/np/RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE"; prov:generatedAtTime "2026-04-16T13:36:18.795233"^^xsd:dateTime; prov:wasAttributedTo orcid:0000-0003-2408-7588 . }