function SNPSurvival() -> (CSV statistics, CSV annotResults, CSV annotFull, Latex kmReport, Latex report){ function SurvivalAnalysis(CSV markers, float pvalue) -> (CSV statistics, CSV geneRank, CSV fullData, Latex kmReport, Latex report){ // ---------- // SETTINGS // ---------- authors = "Marko Laakso, Riku Louhimo, Sampsa Hautaniemi" studyTitle = "Glioblastoma SNP survival analysis" cmdLaTeX = "pdflatex" cmdPS2PDF = "ps2pdf" readSNPs = "changed" //"once" pLimit = pvalue caseSQLWhere = "-- No additional restrictions" // -------- // INPUTS // -------- nullCalls = "--,ERR" useDels = false naLimit = 0.1 studyTitle = "Anduril Case Study: "+studyTitle /** Clinical parameters for the case samples */ clinicalData = INPUT(path="data/snp/patient-metadata-caucas.csv") /** Column types for the clinical data */ clinicalTypes = INPUT(path="csv/SNPclinicalTypes.csv") /** Select relevant parameters for the survival analysis */ survivalParamsTemplate = INPUT(path="sql/SNPsurvivalParams.sql") /** This SQL is used to generate marker annotation report. */ annotSQL = INPUT(path="sql/SNPgeneTable.sql") /** Generates a list of genes and the marker counts for them. */ geneRankQuery = INPUT(path="sql/SNPinterestingGenes.sql") /** Hyperlink rules for the marker annotations */ refsHits = INPUT(path="csv/SNPrefsHits.csv") /** Hyperlink rules for the gene rank table */ refsGeneRank = INPUT(path="csv/SNPrefsGeneRank.csv") /** Annotations for survival component. */ snpAnnotations = INPUT(path="csv/SNPAnnotation.csv") // ----------- // FUNCTIONS // ----------- function GetGenotypes(IDList markers, IDList sampleNames, float skipFrq) -> (SNPMatrix genotypes, CSV naDist) { genotypes = SNPHelistinReader(config = genotypeDB, markerNames = markers, sampleNames = sampleNames, skipUnknown = true) freqCheck = AlleleCounter(genotypes, dels = useDels, skip = skipFrq, naLimit = naLimit) ftMarkers = CSV2IDList(freqCheck.frequencies, columnIn="marker", columnOut="marker") gtypes = SNPHelistinReader(config = genotypeDB, markerNames = ftMarkers, sampleNames = sampleNames, skipUnknown = true) return record( genotypes = gtypes.genotypes, naDist = freqCheck.naDist ) } function AnnotateSNPs(CSV markerTable, string caption) -> (Latex report, AnnotationTable genes, CSV fullData) { snpLoci = RefSNPAnnotator(markerTable, keyColumn="marker") snpRel = ExpandCollapse(relation=snpLoci.annotations, listCols="ensembl_gene_stable_id", expand=true) genes = KorvasieniAnnotator(sourceKeys = snpLoci.annotations, connection = ensembl, inputDB = ".GeneId", inputType = "Gene", keyColumn = "ensembl_gene_stable_id", isListKey = true, targetDB = ".GeneName,.GeneDesc,.DNABand,GO,UniProt/SwissProt", unique = true, echoColumns = "refsnp_id", rename = "UniProt/SwissProt=protein") annotData = TableQuery(table1 = snpRel.relation, table2 = genes.bioAnnotation, table3 = markerTable, query = annotSQL) annotTable = CSV2Latex(tabledata = annotData, refs = refsHits, columns = "hit,marker,chr,locus,band,gene,gene description", colFormat = "l@{}lr@{:}lp{1cm}p{1.5cm}p{10.5cm}", listCols = "band", caption = caption) return record( report = annotTable.report, genes = genes.bioAnnotation, fullData = annotData.table ) } // ------------ // COMPONENTS // ------------ survivalParamsQuery = SearchReplace(survivalParamsTemplate, key00 = "REP.cond00", value00 = caseSQLWhere) survivalClinical = TableQuery(query = survivalParamsQuery, columnTypes = clinicalTypes, table1 = clinicalData) caseNames = CSV2IDList(survivalClinical, columnOut = "SampleId") /** Load and preprocess the genotype calls for the cancer patient samples. */ caseGenotypes = GetGenotypes(markers, caseNames, skipFrq=0.1, @execute = readSNPs) clinicalSelect = SourceCode2Latex(source = survivalParamsQuery, language = "SQL", caption = "This SQL query has been used to select "+ "clinical parameters for the survival calculations.") survival = SNPKaplanMeier(genotypes = caseGenotypes.genotypes, survival = survivalClinical, annotations = snpAnnotations, PLimit = pLimit, naLimit = naLimit, minFreq = 0.10, minCount = 15, timeOutLimit = 36, survRatePoint = 0.5, timeCol = "endTime", timeStartCol = "startTime", statusCol = "hasEvent", sampleCol = "sample", skipMissingGT = true, showCI = true, nullCalls = nullCalls) /** Biological annotations for the survival affecting genes */ survSNPAnnot = AnnotateSNPs(survival.statistics, caption = "Gene annotations and the loci of the survival "+ "affecting markers") survivalStatistics = TableQuery(survival.statistics, survSNPAnnot.fullData, query = "SELECT table1.*, table2.\"ensembl\" FROM table1 "+ "LEFT OUTER JOIN table2 ON (table1.\"marker\" = table2.\"marker\") ") geneRank = QueryReport(query = geneRankQuery, table1 = survSNPAnnot.fullData, table2 = survival.statistics, refs = refsGeneRank, section = "Gene rank", showQuery = false, showCols = "gene,markers,p-value,description", colFormat = "lcrp{11cm}", tableCaption = "A list of survival affecting genes and the marker counts. "+ "P-value is the lowest value for the corresponding markers.") survivalReport = LatexCombiner(survival.report, clinicalSelect.report, survSNPAnnot.report, geneRank.report, pagebreak = true) missingDistC = CSV2Latex(caseGenotypes.naDist, colFormat = "p{3cm}r", caption = "Distribution of the missing values in case samples") propertiesDoc = Properties2Latex(genotypeDB, section = "SNP database configuration") reportBody = LatexCombiner(survivalReport, propertiesDoc, missingDistC) reportTemplate = LatexTemplate(abstract = abstract, authors = authors, printTOC = true, title = studyTitle) summaryReport = LatexPDF(header = reportTemplate.header, footer = reportTemplate.footer, document = reportBody.document, latexExec = cmdLaTeX, useRefs = false) return record( statistics = survivalStatistics, geneRank = geneRank.data, fullData = survSNPAnnot.fullData, kmReport = survival.report, report = reportBody.document ) } markers1 = INPUT(path="/opt/sandbox/snpDb/markers/markers1.lst") markers2 = INPUT(path="/opt/sandbox/snpDb/markers/markers2.lst") markers3 = INPUT(path="/opt/sandbox/snpDb/markers/markers3.lst") markers4 = INPUT(path="/opt/sandbox/snpDb/markers/markers4.lst") markers5 = INPUT(path="/opt/sandbox/snpDb/markers/markers5.lst") markers6 = INPUT(path="/opt/sandbox/snpDb/markers/markers6.lst") markers7 = INPUT(path="/opt/sandbox/snpDb/markers/markers7.lst") markers8 = INPUT(path="/opt/sandbox/snpDb/markers/markers8.lst") markers9 = INPUT(path="/opt/sandbox/snpDb/markers/markers9.lst") markers10 = INPUT(path="/opt/sandbox/snpDb/markers/markers10.lst") survivalSet1 = SurvivalAnalysis(markers=markers1, pvalue=0.001) survivalSet2 = SurvivalAnalysis(markers=markers2, pvalue=0.001) survivalSet3 = SurvivalAnalysis(markers=markers3, pvalue=0.001) survivalSet4 = SurvivalAnalysis(markers=markers4, pvalue=0.001) survivalSet5 = SurvivalAnalysis(markers=markers5, pvalue=0.001) survivalSet6 = SurvivalAnalysis(markers=markers6, pvalue=0.001) survivalSet7 = SurvivalAnalysis(markers=markers7, pvalue=0.001) survivalSet8 = SurvivalAnalysis(markers=markers8, pvalue=0.001) survivalSet9 = SurvivalAnalysis(markers=markers9, pvalue=0.001) survivalSet10 = SurvivalAnalysis(markers=markers10, pvalue=0.001) combinedSurvivalpre = CSVListJoin(file1 = survivalSet1.statistics, file2 = survivalSet2.statistics, file3 = survivalSet3.statistics, file4 = survivalSet4.statistics, file5 = survivalSet5.statistics, file6 = survivalSet6.statistics, file7 = survivalSet7.statistics, fileCol = "") combinedSurvival = CSVListJoin(file1 = combinedSurvivalpre, file2 = survivalSet8.statistics, file3 = survivalSet9.statistics, file4 = survivalSet10.statistics, fileCol = "") combinedSurvivalpost = SearchReplace(combinedSurvival, key00 = "A\\.", value00 = "A-") /** Fetch rsnumbers based on AffySNP-ids */ finalMarkerSetPre = AffySNPFetch(combinedSurvivalpost, snpdb, columnIn="marker") finalMarkerSet = CSV2IDList(finalMarkerSetPre, columnIn="marker") /** Redo analysis with rsIDs and more strict p-value. */ survivalAll = SurvivalAnalysis(force markers=finalMarkerSet, pvalue=0.0001) return record( statistics = survivalAll.statistics, annotResults = survivalAll.geneRank, annotFull = survivalAll.fullData, kmReport = survivalAll.kmReport, report = survivalAll.report ) }