/* Anduril Case study Array CGH analysis pipeline author: Riku Louhimo */ //datadir = INPUT(path="/storageExt/tcga-glioblastoma/acgh/acgh-data/cangem/DataFiles/") function Segmentation(CSV normalized, CSV samples, AnnotationTable annotation, int chr ) -> (Latex report, CSV segments, CSV GTSgain, CSV GTSloss, CSV tholds) { chrFilter = CSVFilter(annotation, regexp="Chr="+chr) filtNormCaseChan = CSVFilter(csv = normalized, auxiliary = chrFilter, matchColumn = "ProbeName") upperLimit = 0.631759089316322 lowerLimit = -0.631759089316322 // Segment aCGH data and call copy number changes segmented = ACGHsegment(caseChan = filtNormCaseChan, force geneAnnotation = chrFilter, analysisAlgorithm = 0, upperLimit = upperLimit, lowerLimit = lowerLimit) return record( report = segmented.report, segments = segmented.segments, GTSgain = segmented.GTSgain, GTSloss = segmented.GTSloss, tholds = segmented.tholds) } function ACGHAnalysis(string datadir) -> (CSV aberrations, CSV tholds, CSV association, CSV loclist, CSV segmentedVals, Latex report) { /** Physical location of array CGH data in the file system. */ dataLoc = INPUT(path=datadir) /** Sample Annotations for array CGH data. */ samples = INPUT(path="csv/ACGHsamples.lst") // Read Agilent files and output annotations and log2 expression ratios aCGH = AgilentReader(sampleNames = samples, agilent = dataLoc, filter = "ControlType!=0 || (gIsSaturated==1 && rIsSaturated==1) || gIsFeatNonUnifOL!=0 || "+ "rIsFeatNonUnifOL!=0 || gIsBGNonUnifOL!=0 || rIsBGNonUnifOL!=0 || gIsFeatPopnOL!=0 || "+ "rIsFeatPopnOL!=0 || IsManualFlag!=0 || gIsPosAndSignif!=1 || rIsPosAndSignif!=1 || "+ "SystematicName==\"unmapped\" ", channelColumns = "gProcessedSignal,gMedianSignal,rProcessedSignal,rMedianSignal", probeAnnotation = "SystematicName,Row,Col") /** Filter out bad quality probes from raw data channels. */ fChannels = QuantileFilter(matrix = aCGH.green, matrix2 = aCGH.red, lowQuantile = 0.05) /** Filter out probeIDs from the geneAnnotation table. */ probeIDs = CSV2IDList(table1 = fChannels.matrix2, columnIn = "ProbeName") fAnnot = CSVFilter(csv = aCGH.probeAnnotation, auxiliary = probeIDs) // Normalize aCGH data within arrays using LOESS normalized = ACGHnorm(casechannel = fChannels.matrix2, control = fChannels.matrix) /** Agilent probe mappings from human genome 36 to 37. */ agil36to37map = INPUT(path=dataDir+"/acgh/agilentProbes37.csv") reMappedAnnot = TableQuery(table1 = agil36to37map, table2 = fAnnot, query = "SELECT table2.\"ProbeName\", "+ " table1.\"Chr\", "+ " table1.\"start\", "+ " table1.\"end\" "+ "FROM table1, table2 "+ "WHERE table1.\"ProbeName\" = table2.\"ProbeName\" ") /** Rules for search and replace of DNA region annotations. */ rules = INPUT(path="csv/ACGHchr2.rules") srFAnnot = SearchReplace(force reMappedAnnot.table, rules = rules) include "ACGHSegments.and" /** Filename to patient id mappings. */ sampFileMap = INPUT(path="csv/ACGHsample2file-map.lst") /** SQL query for aberration pattern extraction. */ abberSQL = INPUT(path="sql/ACGHabber.sql") /** Print out aberration patterns. */ aberrations = QueryReport(table1 = segmented, table2 = sampFileMap, query = abberSQL, showQuery = false, showCols = "ID,chr,start,end,nroOfprobes,segmean,call", colFormat = "lp{1cm}llp{2cm}p{1cm}p{1cm}", section = "Copy number abberations") include "ACGHSegmentAnalysis.and" /** Analyze interesting regions from segmented Array CGH regions exhibiting copy number gain. */ gains = ACGHSegmentAnalysis(segments = aberrations.data, threshold=0.1, gainOrLoss= 1, annotateRegs=true) /** Analyze interesting regions from segmented Array CGH regions exhibiting copy number loss. */ losses = ACGHSegmentAnalysis(segments = aberrations.data, threshold=0.1, gainOrLoss=-1, annotateRegs=true) /** Analyze interesting regions from segmented Array CGH regions exhibiting copy number gain and loss. */ both = ACGHSegmentAnalysis(segments = aberrations.data, threshold=0.2, gainOrLoss= 0, annotateRegs=false) segplot = SegmentPlot(aberrations.data, lineWidth = 1, sampleColumn = "Sample", sThold = 0.1) // Quality Control boxPlot = BoxPlot(normalized.casechannel) maPlot = Plot2D(@enabled = false, y = normalized.casechannel, x = normalized.controlchannel, xLabel = "Fit curve", yLabel = "Residuals", title="%s vs %s", caption = "MA plot for normalized values (%s vs %s)") // Final output qc = LatexCombiner(boxPlot.report, maPlot.plot, sectionTitle = "Quality Control") analysis = LatexCombiner(segmentedReport, segplot.plot, gains.report, losses.report, aberrations.report, qc, sectionTitle = "ArrayCGH: Segmentation Results") template = LatexTemplate(authors = "Riku Louhimo, Sampsa Hautaniemi", title = "Glioblastoma Array CGH analysis") summaryReport = LatexPDF(analysis, header = template.header, footer = template.footer, useRefs = false) return record( aberrations = aberrations.data, tholds = chr1.tholds, association = both.association, loclist = loclist, force segmentedVals = segmentedVals.stdout, report = analysis ) }