//--------------------------------------------- // Anduril configuration for anduril casestudy // September 2009, Riku Louhimo, // Revised and refactored on 17/2/2010 // // Integration of Exon expression data // with Array CGH results obtained by CBS //-------------------------------------------- //---------- // FUNCTIONS //---------- function ACGHexonIntegration(CSV GTSsegm, CSV gtsOutput, CSV tholds, boolean gain) -> (CSV exons, CSV labels, Latex reportTable) { // Scripts /** R script that compares two matrices and orders them column- and rowwise in the same order. */ script = INPUT(path="r/ACGHcheckInput.r") /** R script that compares two matrices and orders them column- and rowwise in the same order. */ script2 = INPUT(path="r/ACGHcheckInput2.r") // Sample annotation /** Sample ID mappings between array CGH and exon data. */ ExonAnnot = INPUT(path="csv/ACGHexonAnnot.csv") // INPUT data /** Transcript expression matrix. */ expr = INPUT(path=genelevelExpressionFileName) /** Transcript expression annotations. */ exprAnnot = INPUT(path=transcriptAnnotationFileName) // Build explanatory ("label") matrix gainsList = TableQuery(table1 = gtsOutput, query = "SELECT SUBSTR(\"gts.chr\", 4) AS \"gts.chr\", "+ " \"gts.pos\" "+ "FROM table1 ") if (gain){ /** Filter script for segmented array CGH data (gain regions) from GTS algorithm. */ GTSoutputG = INPUT(path="r/ExonIntegrationG.r") ProbeIDs = REvaluate(GTSoutputG, table1 = GTSsegm, table2 = ExonAnnot, table3 = gainsList, table4 = tholds) } else { /** Filter script for segmented array CGH data (loss regions) from GTS algorithm. */ GTSoutputL = INPUT(path="r/ExonIntegrationL.r") ProbeIDs = REvaluate(GTSoutputL, table1 = GTSsegm, table2 = ExonAnnot, table3 = gainsList, table4 = tholds) } AnnotProbeGenes = NextGene(sourceKeys = ProbeIDs.table, connection = ensembl, bpBefore = 0, bpAfter = 1, idColumn = "ID", chrColumn = "chr", baseColumn = "loci", nearestOnly = false) matchAnnot = TableQuery(table1 = ProbeIDs.table, table2 = AnnotProbeGenes, query = "SELECT \".GeneId\", table1.* FROM table1 "+ "LEFT OUTER JOIN table2 ON (table2.\"ID\" = table1.\"ID\") ") assocExpand = ExpandCollapse(matchAnnot, expand = false, listCols="ID,chr,loci") transAnnot = TableQuery(table1 = assocExpand, table2 = expr, query = "SELECT \"GeneID\", table1.* FROM table2 "+ "LEFT OUTER JOIN table1 ON (table2.\"GeneID\" = table1.\".GeneId\") ") // Filter out duplicate probes unDuplic = RowJoin(matrix = transAnnot, idColumn = 1, startColumn = 6) // Construct matrices such that the rows and columns correspond to one another labelMat = REvaluate(script, table1 = unDuplic, //transAnnot, table2 = expr, @enabled = true) exprMat = REvaluate(script2, table1 = unDuplic, //transAnnot, table2 = expr, @enabled = true) // Integrate explanatory data with expression data intGreat = ExpExpIntegration(exprMatrix = exprMat.table, labelMatrix = labelMat.table, nroOfperms = 1000, gainData = gain, idColumn = 1) /** Process output from copy number analysis and exon integration together. */ outputList = TableQuery(table1 = intGreat.Values, query = "SELECT \"GeneID\" AS \"Gene\", "+ " \"weight\", \"alpha\" FROM table1 ") integratedExonACGH = KorvasieniAnnotator(outputList, ensembl, echoColumns = "Gene,weight,alpha", keyColumn = "Gene", inputDB = ".GeneId", inputType = "Gene", targetDB = ".GeneName,.DNABand,GO") integrated = CSVCleaner(integratedExonACGH, columns = ".GeneName,Gene,.DNABand,weight,alpha", rename = ".GeneName=GeneName,.DNABand=band") sortedIntegrated = TableQuery(integrated, query = "SELECT * FROM table1 "+ "WHERE \"alpha\" IS NOT NULL AND "+ " \"weight\" > 0 "+ "ORDER BY \"alpha\",\"weight\" DESC "+ "LIMIT 100 ") refs = INPUT(path="csv/ACGHexonRefs.csv") if (gain){ aberrationType = "gain" } else { aberrationType = "loss" } report = CSV2Latex(sortedIntegrated, columns = "GeneName,Gene,band,weight,alpha", colFormat = "lp{2.5cm}p{2cm}p{2cm}p{2cm}", numberFormat = "weight=#0.00000,alpha=#0.00000", refs = refs, listCols = "band", section = "Array CGH: integration results for CN "+aberrationType+" regions and expression", sectionType = "section", caption = "Top 100 integration results of gene level transcript expression values with "+ "copy number aberration regions. The weight indicates the "+ "correlation in gene level transcript expression between aberrated and non-aberrated samples. ") return record (labels = transAnnot, force exons = integratedExonACGH, reportTable = report ) }