/* Create a summary table that includes expression, array CGH, SNP survival and other results and gene annotation. Author: Kristian Ovaska, kristian.ovaska@helsinki.fi */ function SummaryTable(CSV exprProbes, CSV transcriptProbes, CSV exonProbes, CSV miRNAProbes, CSV miRNAgain, CSV miRNAloss, CSV acghExonCorrLoss, CSV acghExonCorrGain, CSV acghGain, CSV acghLoss, Latex kaplanMeier, CSV kaplanMeierStatistics, Latex exonKaplanMeier, CSV exonKaplanMeierStat, Latex miRNAKaplanMeier, CSV miRNAKaplanMeierStat, CSV survBloodGenes, Latex snpSurvImages, CSV snpSurvAnnotation, CSV methylation) -> (Excel table, HTML resultSite, Latex report) { summaryQuery = INPUT(path="sql/summary.sql") correlation = INPUT(path=dataDir+"/results/correlation/summary/allcorrelations.csv") sequenced = INPUT(path="csv/sequenced.csv") vttHits = INPUT(path="csv/vtt-hits.csv") vttHits2 = INPUT(path="csv/vtt-hits2.csv") vttScreen = INPUT(path="csv/vtt-screen.csv") style = INPUT(path="csv/summary-style.csv") refs = INPUT(path="csv/summary-refs.csv", @enabled=false) siteMapping = INPUT(path="csv/summarysite-mapping.csv") siteRefs = INPUT(path="csv/summarysite-refs.csv") siteLabels = INPUT(path="csv/summarysite-labels.csv") imageMappingQuery = INPUT(path="sql/imageMapping.sql") geneAlias = INPUT(path="csv/gene-alias.csv") // Excel references are disabled for now because loading 5000 URLs in // OpenOffice takes some time. acghGainConv = CSVFilter(acghGain, rename="freq=GainFreq") acghLossConv = CSVFilter(acghLoss, rename="freq=LossFreq") acghSummary = CSVJoin(acghGainConv, acghLossConv, intersection=false) acghExonCorrLossConv = CSVFilter(acghExonCorrLoss, rename="weight=LossWeight,alpha=LossAlpha", includeColumns="Gene,weight,alpha") acghExonCorrGainConv = CSVFilter(acghExonCorrGain, rename="weight=GainWeight,alpha=GainAlpha", includeColumns="Gene,weight,alpha") acghExonCorrSummary = CSVJoin(acghExonCorrLossConv, acghExonCorrGainConv, intersection=false) // exonHits = IDConvert(detTable, transcriptMap, sourceColumn=".TranscriptId", // keyColumn="Transcript", conversionColumn="Gene", targetColumn="Gene") // correlationHits = CSVFilter(correlation, highBound="correlation=-0.7") // hits = CSV2IDList(exonHits, survBloodGenes, survTumorGenes, exprDegs, correlationHits, // columnOut = "Gene", // columnIn = ",,,EnsemblId,") genes = CSV2IDList(exprProbes, transcriptProbes, exonProbes, columnIn="GeneID") annotation = KorvasieniAnnotator(sourceKeys = genes.ids, connection = ensembl, inputDB = ".GeneId", targetDB = ".GeneName,_Uniprot/SWISSPROT,.GeneDesc,.DNABand,_EntrezGene", inputType = "Gene") uniprot = CSVFilter(annotation.bioAnnotation, includeColumns="GeneID,Uniprot/SWISSPROT") uniprotExp = ExpandCollapse(uniprot.csv, listCols="Uniprot/SWISSPROT") pathways = KEGGPathway(uniprotExp, column="Uniprot/SWISSPROT", cacheURL="ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/hsa_xrefall.list", proteinNames=false, pathwaySizes=false) pathwaysEnsemblPre = IDConvert(pathways.pathways, uniprotExp.relation, keyColumn="Uniprot/SWISSPROT", conversionColumn="GeneID", sourceColumn="QueryProteins", unique=true, split=true) pathwaysEnsembl = CSVFilter(pathwaysEnsemblPre, includeColumns="Pathway,Name,QueryProteins,Description", rename="QueryProteins=Members") summaryTablePre2 = TableQuery(table1 = annotation, table2 = exprProbes, table3 = transcriptProbes, table4 = survBloodGenes, table5 = correlation, table6 = sequenced, table7 = acghSummary, table8 = vttHits, table9 = vttHits2, table10 = kaplanMeierStatistics, table11 = acghExonCorrSummary, table12 = methylation, table13 = vttScreen, table14 = geneAlias, table15 = exonProbes, query = summaryQuery) summaryTablePre = TableQuery(table1 = summaryTablePre2, table2 = exonKaplanMeierStat, query = "SELECT T1.*, "+ " T2.\"pValue\" AS \"MedianExonExpression:Survival\" "+ "FROM table1 T1 "+ "LEFT OUTER JOIN table2 T2 ON T1.\"GeneID\" = T2.\"group\" ") summaryTable = IDConvert(summaryTablePre, summaryTablePre, keyColumn="GeneID", conversionColumn="GeneID", sourceColumn="GeneID", unique=true, collapseNumeric="consensus") excel = CSV2Excel(summaryTable.csv, style = style, refs = refs, frozenRows = 1, frozenColumns = 1, @enabled = false) miRNAstat = SearchReplace(miRNAKaplanMeierStat, key00="hsa.miR.", value00="hsa-miR-") imageMapping = TableQuery(kaplanMeierStatistics, snpSurvAnnotation, force miRNAstat, exonKaplanMeierStat, query=imageMappingQuery) images = LatexCombiner(kaplanMeier, snpSurvImages, miRNAKaplanMeier, exonKaplanMeier) exonAnnotation = CSVJoin(transcriptProbes, kaplanMeierStatistics, keyColumnNames="TranscriptID,group", intersection =false) exonAnnotationFilt = CSVFilter(exonAnnotation, includeColumns="TranscriptID,GeneID,FoldChange,PValue,pValue", rename="pValue=SurvivalPValue") miRNAtable = TableQuery(table1 = miRNAProbes, table2 = miRNAgain, table3 = miRNAloss, force table4 = miRNAstat, query = "SELECT table1.\"miRNAName\", "+ " table1.\"miRNAId\", "+ " table1.\"FoldChange\", "+ " table1.\"PValue\", "+ " table4.\"pValue\" AS \"SurvivalPValue\", "+ " table2.\"freq\" AS \"CGH:Gain\", "+ " table3.\"freq\" AS \"CGH:Loss\", "+ " table1.\"TargetGenes\", "+ " table1.\"stemLoopName\", "+ " COALESCE(table1.\"DNABand\", table2.\"DNABand\", table3.\"DNABand\") AS \"DNABand\" "+ "FROM table1 "+ "LEFT OUTER JOIN table2 ON (table1.\"miRNAName\" = table2.\"miRNAName\") "+ "LEFT OUTER JOIN table3 ON (table1.\"miRNAName\" = table3.\"miRNAName\") "+ "LEFT OUTER JOIN table4 ON (table1.\"miRNAName\" = table4.\"group\") ") site = HTMLReport(table1=summaryTable.csv, table2=exonAnnotationFilt, table3=miRNAtable, table4=pathwaysEnsembl.csv, mapping=siteMapping, refs=siteRefs, images=images, imageMapping=imageMapping.table, labels=siteLabels, missingValue="-", omitMissing=false, digits=3, tableLabels="Gene,Splice variant,miRNA,KEGG pathway", colorStart="#66ff66", colorMiddle="#ffffff", colorEnd="#ff6666") geneSet = CSVTransformer(summaryTable.csv, transform1="subset(csv1, csv1$'TranscriptExpression:Survival' < 0.002 & csv1$'TranscriptExpression:Max' < 1)['GeneID']") geneInfo = GeneSetInfo(force geneSet) return record(table=excel.excelFile, resultSite=site, report=geneInfo.report) } function GeneSetInfo(IDList ensemblGenes, optional LogMatrix expr) -> (Latex report) { annotation = KorvasieniAnnotator(ensemblGenes, connection=ensembl, inputDB=".GeneId", targetDB="GO,_EntrezGene,_Uniprot/SWISSPROT", rename="Uniprot/SWISSPROT=Uniprot") goHeatmap = GOClustering(annotation, expr) goEnrichment = GOEnrichment(annotation, threshold=0.2) graphBP = GraphVisualizer(goEnrichment.graphBP) graphCC = GraphVisualizer(goEnrichment.graphCC) graphMF = GraphVisualizer(goEnrichment.graphMF) uniprotExpanded = ExpandCollapse(annotation.bioAnnotation, listCols="Uniprot") kegg = KEGGPathway(uniprotExpanded.relation, column="Uniprot", cacheURL="ftp://ftp.genome.jp/pub/kegg/genes/organisms/hsa/hsa_xrefall.list") keggReport = CSV2Latex(kegg.pathways, columns="Pathway,Name,QueryProteins", caption="KEGG pathways") report = LatexCombiner(goHeatmap.report, graphBP.figure, graphCC.figure, graphMF.figure, keggReport.report, sectionTitle="Gene set information") return report.document }