// ------------------------------------------------------------ // Andril Case Study - Determining interesting ArrayCGH regions // based on segmentation results. // author: Riku Louhimo // September 2009 // Revised and refactored on 17/2/2010 // ------------------------------------------------------------ // ---------- // Functions // ---------- function associations(CSV segplot, CSV regions, CSV segments) -> (CSV regionBand, CSV association) { /** SQL query for ACGH region association manipulation. */ asc = INPUT(path="sql/ACGHassoc.sql") /** R script for ACGH region association manipulation. */ rev = INPUT(path="r/ACGHreval.r") association1 = TableQuery(query = asc, table1 = segments, table2 = segplot, table3 = regions) rBand = TableQuery(table1 = association1, query = "SELECT DISTINCT(\"Region\"), "+ "\"band\", "+ "\"chr\", "+ "\"end\", "+ "\"start\", "+ "SUM(\"rank\")/COUNT(\"Region\") AS \"rank\" "+ "FROM table1 "+ "GROUP BY \"Region\",\"band\",\"chr\",\"end\",\"start\" ") association = REvaluate(table1 = association1, script = rev) return record( regionBand = rBand, association = association.table ) } function AnnotateSegs(CSV segplot) -> (CSV annotations, CSV reportTable, CSV regions) { annot = KorvasieniAnnotator(segplot, connection = ensembl, inputDB = ".DNARegion", inputType = "Gene", keyColumn = "chromosome:start-end", targetDB = ".GeneId,.DNABand", unique = true, rename = "chromosome:start-end=chr", echoColumns = "ID,freq,chromosome") annotExpand = ExpandCollapse(annot, listCols=".GeneId") annotGenes = KorvasieniAnnotator(annotExpand, connection = ensembl, inputDB = ".GeneId", inputType = "Gene", keyColumn = ".GeneId", targetDB = ".GeneName,.GeneDesc,.DNABand,.DNARegion,GO", unique = true, echoColumns = "freq,chr,chromosome,ID", rename = ".GeneName=gene name,ID=Region") filtSegs = TableQuery(annotGenes, query = "SELECT * FROM table1 "+ "ORDER BY \"freq\" DESC ") return record( annotations = annotGenes, reportTable = filtSegs.table, regions = annot ) } function ACGHSegmentAnalysis(CSV segments, float threshold, int gainOrLoss, boolean annotateRegs) -> (Latex report, CSV regionSummary, CSV association, CSV regionBand) { // ---------- // Components // ---------- segplot = SegmentPlot(chrSegments = segments, sampleColumn = "ID", lineWidth = 1, sThold = threshold, gainsOrLosses = gainOrLoss) /** This function annotates interesting genomic regions with ensembl genes. */ gAndLoss = AnnotateSegs(segplot=segplot.SignRegion) /** Perform data transformation to enable regionwise association analysis. */ associationLysis = associations(segplot.SignRegion, gAndLoss.regions, segments=segments) if (annotateRegs){ refs = INPUT(path="csv/refsSegRank.csv") abb = TableQuery(table1 = gAndLoss.reportTable, table2 = associationLysis.regionBand, query = "SELECT T1.*, "+ "ROUND(T2.\"rank\", 5) AS \"rank\" "+ "FROM table1 T1, table2 T2 "+ "WHERE (T1.\"Region\" = T2.\"Region\") "+ "ORDER BY \"rank\" DESC ") abbers = CSV2Latex(abb.table, columns = "gene name,chr,.DNABand,.GeneDesc,rank", colFormat = "llp{2cm}p{10cm}r", listCols = ".DNABand", refs = refs, section = "Interesting copy number "+gainOrLoss+" segments", countRows = true) // -------- // Summary report outputs // -------- summaryReport = LatexCombiner(segplot.plot, abbers.report) //gAndLoss.reportTable) //gAndLoss.GOenrichment, //confrep.report) } else { summaryReport = LatexCombiner(segplot.plot) } // Summary table output // -------------------- ArrayCGHgenes = TableQuery(table1 = gAndLoss.annotations, query = "SELECT \".GeneId\", \"freq\" FROM table1") return record ( report = summaryReport.document, regionSummary = ArrayCGHgenes.table, association = associationLysis.association, regionBand = associationLysis.regionBand ) }