Nanopublications

@prefix dcterms: <http://purl.org/dc/terms/> .
@prefix np: <http://www.nanopub.org/nschema#> .
@prefix npx: <http://purl.org/nanopub/x/> .
@prefix ns1: <https://w3id.org/np/snakemake/> .
@prefix orcid: <https://orcid.org/> .
@prefix prov: <http://www.w3.org/ns/prov#> .
@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .
@prefix schema: <https://schema.org/> .
@prefix sub1: <http://purl.org/np/RAZOCV8jrA0_J2BBQA7lNn9Tge0JyBY0huS1hd4FJ3hEw#> .
@prefix this: <http://purl.org/np/RAZOCV8jrA0_J2BBQA7lNn9Tge0JyBY0huS1hd4FJ3hEw> .
@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .

sub1:Head {
  this: np:hasAssertion sub1:assertion ;
    np:hasProvenance sub1:provenance ;
    np:hasPublicationInfo sub1:pubinfo ;
    a np:Nanopublication .
}

sub1:assertion {
  sub1:config-1 dcterms:identifier "config.yml" ;
    schema:text """## General Workflow Parameters:

# sample information and experimental design
samples: samples.csv

## Workflow-specific Parameters:
# Define reference genome/transcriptome
ref:
species: \"Drosophila melanogaster\"
# Local reference data
# Genome file path (supported extensions: .fa, .fna, .fasta, case-insensitive)
# may be a path or left empty, if download using an accession number is preferred
genome: \"\"
# Annotation file (supported extensions: .gff, .gtf, case-insensitive)
# may be a path or left empty, if download using an accession number is preferred
annotation: \"\"
# Remote reference data
# NCBI accession number of the reference data set; can be left empty if both reference files are available locally\"
#accession: \"GCF_000001215.2\"
accession: \"GCF_000001215.4\"
ensembl_species: \"\" # e.g., \"homo_sapiens\"
build: \"\" # e.g., \"GRCh38\"
release: \"\" # e.g., \"105\"

read_filter:
# Minimum read length; set 0 to keep all reads.
min_length: 200

# minimap2 alignment parameters
minimap2:
# Minimap2 indexing options
index_opts: \"\"
# Minimap2 mapping options
opts: \"\"
# Maximum secondary alignments
maximum_secondary: 100
# Secondary score ratio (-p for minimap2)
secondary_score_ratio: 1.0

# samtools processing parameters
samtools:
# Samtools view opts, \"-b\" creates BAM from SAM.
samtobam_opts: \"-b\"
# Samtools sort opts,
bamsort_opts: \"\"
# Samtools index opts,
bamindex_opts: \"\"
# Samtools stats opts
bamstats_opts: \"\"

# salmon quantification parameters
quant:
# Salmon library type (Default: U)
salmon_libtype: \"U\"

# This section defines the pyDESeq2 plot and data handling parameters
deseq2:
# normalization fit type, must be 'parametric' or 'mean'
fit_type: \"\"
# the \"design factors\" are the confounding variables to be adjusted for
# during normalization. They must be given in the configuration (samples.csv).
design_factors:
- \"condition\"
#
# the \"continuous factors\" are non-categorial factors to be considered
#continuous_factors:
# -
#
# The (log2) log fold change under the null hypothesis. (default: 0).
lfc_null: 1.0
#
# The alternative hypothesis for computing wald p-values. By default,
# the normal Wald test assesses deviation of the estimated log fold
# change from the null hypothesis, as given by lfc_null.
# One of [\"greaterAbs\", \"lessAbs\", \"greater\", \"less\"] or None.
# The alternative hypothesis corresponds to what the user wants to
# find rather than the null hypothesis. (default: None).
alt_hypothesis: \"greaterAbs\"
#
# The marker size in points**2 (typographic points are 1/72 in.).
# Default is rcParams['lines.markersize'] ** 2.# minimum count to
# be considered for subsequent analysis
point_width: 20
#
# we disregard loci with count number lower 'mincount'
mincount: 10
#
# Type I error cutoff value:
alpha: 0.05
#
# in addition to the full heatmap, plot the top number of different
# values, ranked by the top ratio between the two traits
threshold_plot: 10
#
# the heatmap color map
# see https://seaborn.pydata.org/tutorial/color_palettes.htm for an overview
colormap: \"Blues\"
#plot figure type
figtype: \"png\"
batch_effect:
- \"\"
#
## Differential Isoform Analysis

# The FLAIR splice-isoform analysis pipeline includes resource-intensive computations and only works with additional constraints.
# 1. In 'samples.csv: The 'condition' column must contain exactly two distinct values. For example 'control' and 'treated'.
# 2. In 'samples.csv: Refrain from using underscores when naming samples. The 'sample' column may contain underscores, but be aware that underscores will be removed from the name for isoform quantification steps.
# 3. In this file: the variable 'FLAIR' below must be:'true'. This is a check to determine if users are aware of the constraints and wish to proceed.
isoform_analysis:
# Enables FLAIR Isoform Analysis if 'true'
FLAIR: true
# Minimum MAPQ of read assignment to an isoform (default: 1).
qscore: 1
# min read count expression threshold. Isoforms which contain fewer than 'exp_thresh' (Default=10) reads in both conditions are filtered out.
exp_thresh: 10
# 'flair_collapse' options
# '--annotation-reliant' makes FLAIR align reads to the annotation before identifying novel transcripts for the remaining reads
# '--generate-map' to generate a txt file of read-isoform assignments
# '--stringent' for full-length supporting reads (>=80% coverage)
col_opts: \"--annotation_reliant generate --generate_map --stringent\"

# Query genes to identify similar proteins using \"lambda\"
protein_annotation:
# Enables lambda sequence alignment if 'true'
lambda: false
# Pre-formatted UniProt Reference Cluster database (default: UniRef50)
uniref: \"https://ftp.imp.fu-berlin.de/pub/lambda/index/lambda3/gen_0/uniref50_20230713.lba.gz\"
# maximum number of protein matches returned per sequence (default: 3)
num_matches: 3

# Enrichment Analysis Parameters
#enrichment:
# # Enable enrichment analysis if 'true'
# perform_enrichment: true
# # minimum number of genes to consider per pathway (default: 3)
# min_genes: 3
""" .
  sub1:dataset a schema:Dataset ;
    ns1:describesWorkflow "RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE" ;
    ns1:description """<div class=\"document\">
<blockquote>
This workflow performs differential expression analysis of RNA-seq data obtained from Oxford Nanopore long-read sequencing technology.
First a transcriptome FASTA is constructed using <a class=\"reference external\" href=\"https://github.com/gpertea/gffread\">gffread</a>. Reads are then mapped to the transcriptome with the long-read optimized alignment tool <a class=\"reference external\" href=\"https://github.com/lh3/minimap2\">minimap2</a>.
Next quantification is performed using <a class=\"reference external\" href=\"https://github.com/COMBINE-lab/salmon\">salmon</a> before normalization and differential expression analysis are conducted by <a class=\"reference external\" href=\"https://github.com/owkin/PyDESeq2\">PyDESeq2</a>.
The workflow can optionally analyze splice-isoforms through integrating the <a class=\"reference external\" href=\"https://github.com/BrooksLabUCSC/flair\">FLAIR</a> workflow.
Additionaly, <a class=\"reference external\" href=\"https://github.com/wdecoster/NanoPlot\">NanoPlot</a> is employed to analyze initial sequencing data and <a class=\"reference external\" href=\"https://github.com/EagleGenomics-cookbooks/QualiMap\">QualiMap</a> is used to evaluate mapping results.</blockquote>
</div>
""" ;
    ns1:generatedAt "2026-04-16T12:39:53.038569+00:00"^^xsd:dateTime ;
    ns1:hasConfigurationSection sub1:workflow-configuration .
  sub1:workflow-configuration rdfs:label "from workflow configuration" ;
    ns1:hasConfigurationFile sub1:config-1 .
}

sub1:provenance {
sub1:assertion prov:generatedAtTime "2026-04-16T14:40:02.674392"^^xsd:dateTime ;
prov:wasAttributedTo orcid:0000-0003-2408-7588 .
}

sub1:pubinfo {
  sub1:sig npx:hasAlgorithm "RSA" ;
    npx:hasPublicKey "MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAkOFUnnRCp/k9/0ugvx8zQJ+Qc675W1Ug6F839+xvJ2QsSBu4iOJ1O2kJCmb3tALp3gJOt8sffRot3VrfY1hbgXxL7BVtDsfHHmXVff4YCeg5Ycdn5cDpLawDpAdYwMhK0LwIkZ3fwH9/o9JniYKXLV/jpF9bMKyiw/6tqlCHaMW1r8gzZzoxVIAakwvlABoY0iNoToLTlBRXEI4mLUNjDMnMwQgfh1KXMxMruNjW3wJyeDEIfa2ooAt0E4CRM9pkrEb37NzD9Jz8aSUFFY6BvIxF4ixK7rm6IUDvQ76LqXkEmgSeRv1kw7gnCe9wV/wHd0ZeW4heoBXmLHX3MvHfjwIDAQAB" ;
    npx:hasSignature "i5nNzwIVs4+Wc6YIJ5T0TPYAkKgaBptDSvfG1UaARi6MHHMQFxAvVAPhlmL5P6DJy3JNambG/HG33A5r8I83lE/qPeHOuy2Wv04QjUY9HCRMpmqWA6J4c8daxyml5nTCirqSA2Lus3pDggLdg6namIPGLxDP9LM7+M6ujF27zjtRDwL+aWytPqWQy15nkZrZJqQEUQGCUlQ6ODENHM70nhBDmoT51VzGBtvTAKVyKwS+Bif4j8dzQb+ZNsPkgf9tky5knmNAWZtH/dUkldgulLq/He4+H7ZaW4wtjqcr9wNBM2YTUEe2X5YwRhPlmugkpPfxHNcw0V0BGYERnDRggQ==" ;
    npx:hasSignatureTarget this: .
  this: dcterms:created "2026-04-16T12:39:53.038569+00:00"^^xsd:dateTime ;
    dcterms:creator orcid:0000-0003-2408-7588 ;
    npx:hasNanopubType schema:Dataset ;
    rdfs:label "Snakemake workflow metadata: RAjHDlPDghZzc9ZvQ3uJQNJ9Jd_KAYzZt7dk5PXKgjRyE" ;
    prov:generatedAtTime "2026-04-16T14:40:02.674392"^^xsd:dateTime ;
    prov:wasAttributedTo orcid:0000-0003-2408-7588 .
}