My sequencing files are named according to the folowing pattern lane5651_AAGAGGCA_00h_Cell_WT3_L008_R1.fastq.gz. I would like to capture the Sample ID as 00h_Cell_WT3 in order to name all downstream files accordingly. To this end, I wrote the following snippet:

 1#!/usr/bin/env nextflow
 2
 3// fastq files are stored in reads as paired ends R1 and R4
 4params.reads = 'reads/lane*_*_*_*_R{1,4}.fastq.gz'
 5
 6Channel
 7     .fromFilePairs(params.reads, flat: true)
 8     .map { prefix, file1, file2 -> tuple(getSampleID(prefix), file1, file2) }
 9     .set { samples_ch }
10
11def getSampleID( file ) {
12    // using RegEx to extract the SampleID
13    // in paried ends, fromFilePairs (with flat: true) returns a triplate
14    //     where the first item is the filename without `R{1,4}.fastq.gz`
15    //     thus the RegEx needs to be adjusted as follow
16    regexpPE = /([a-z]{4}[0-9]{4})_([A-Z]{8})_(.+)_(L[0-9]{3})/
17    (file =~ regexpPE)[0][3]
18}
19
20process printNames {
21    input:
22    set sampleId, forward_reads, reverse_reads from samples_ch
23
24    output:
25    stdout result
26
27    """
28    echo $sampleId 'and' $forward_reads 'and' $reverse_reads
29    """
30}
31
32result.subscribe { println it }

For single ends experiments, the following snippet can be used:

 1#!/usr/bin/env nextflow
 2
 3// fastq files are stored in reads as single ends R1
 4params.reads = 'reads/lane*_*_*_*_R1.fastq.gz'
 5
 6Channel
 7     .fromPath(params.reads)
 8     .map { sample -> tuple(getLibraryId(sample), sample) }
 9     .set { samples_ch }
10
11def getLibraryId( file ) {
12  regexp = /([a-z]{4}[0-9]{4})_([A-Z]{8})_(.+)_(L[0-9]{3})_(R[1234])(.fastq.gz)/
13  (file.name =~ regexp)[0][3]
14}
15
16process printNames {
17	input:
18	set sampleId, file from samples_ch
19
20	output:
21	stdout result
22
23	"""
24    echo $sampleId 'and' $file
25	"""
26}
27
28result.subscribe { println it }