library(“airway”)
indir <- system.file(“extdata”, package=“airway”, mustWork=TRUE)
list.files(indir)
csvfile <- file.path(indir, “sample_table.csv”)
sampleTable <- read.csv(csvfile, row.names = 1)
filenames <- file.path(indir, paste0(sampleTable$Run, “_subset.bam”))
file.exists(filenames)
library(“Rsamtools”)
bamfiles <- BamFileList(filenames, yieldSize=2000000)
library(“GenomicFeatures”)
gtffile <- file.path(indir,“Homo_sapiens.GRCh37.75_subset.gtf”)
txdb <- makeTxDbFromGFF(gtffile, format = “gtf”, circ_seqs = character())
ebg <- exonsBy(txdb, by=“gene”)
library(“GenomicAlignments”)
library(“BiocParallel”)
se <- summarizeOverlaps(features=ebg, reads=bamfiles,
mode=“Union”,
singleEnd=FALSE,
ignore.strand=TRUE,
fragments=TRUE )
data(“airway”)
se <- airway
se$dex %<>% relevel(“untrt”)
library(“DESeq2”)
dds <- DESeqDataSet(se, design = ~ cell + dex)
countdata <- assay(se)
coldata <- colData(se)
ddsMat <- DESeqDataSetFromMatrix(countData = countdata,
colData = coldata,
design = ~ cell + dex)
dds <- dds[ rowSums(counts(dds)) > 1, ]
lambda <- 10^seq(from = -1, to = 2, length = 1000)
cts <- matrix(rpois(1000*100, lambda), ncol = 100)
library(“vsn”)
meanSdPlot(cts, ranks = FALSE)
log.cts.one <- log2(cts + 1)
meanSdPlot(log.cts.one, ranks = FALSE)
vsd <- vst(dds, blind = FALSE)
rld <- rlog(dds, blind = FALSE)
library(“dplyr”)
library(“ggplot2”)
dds <- estimateSizeFactors(dds)
df <- bind_rows(
as_data_frame(log2(counts(dds, normalized=TRUE)[, 1:2]+1)) %>%
mutate(transformation = “log2(x + 1)”),
as_data_frame(assay(vsd)[, 1:2]) %>% mutate(transformation = “vst”),
as_data_frame(assay(rld)[, 1:2]) %>% mutate(transformation = “rlog”))
colnames(df)[1:2] <- c(“x”, “y”)
ggplot(df, aes(x = x, y = y)) + geom_hex(bins = 80) +
coord_fixed() + facet_grid( . ~ transformation)
sampleDists <- dist(t(assay(vsd)))
library(“pheatmap”)
library(“RColorBrewer”)
sampleDistMatrix <- as.matrix( sampleDists )
rownames(sampleDistMatrix) <- paste( vsd
cell, sep = " - " )
colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, “Blues”)) )(255)
pheatmap(sampleDistMatrix,
clustering_distance_rows = sampleDists,
clustering_distance_cols = sampleDists,
col = colors)
library(“PoiClaClu”)
poisd <- PoissonDistance(t(counts(dds)))
samplePoisDistMatrix <- as.matrix( poisd
dex, dds
dd,
clustering_distance_cols = poisd$dd,
col = colors)
pcaData <- plotPCA(vsd, intgroup = c( “dex”, “cell”), returnData = TRUE)
percentVar <- round(100 * attr(pcaData, “percentVar”))
ggplot(pcaData, aes(x = PC1, y = PC2, color = dex, shape = cell)) +
geom_point(size =3) +
xlab(paste0("PC1: ", percentVar[1], “% variance”)) +
ylab(paste0("PC2: ", percentVar[2], “% variance”)) +
coord_fixed()
mds <- as.data.frame(colData(vsd)) %>%
cbind(cmdscale(sampleDistMatrix))
ggplot(mds, aes(x = 1
, y = 2
, color = dex, shape = cell)) +
geom_point(size = 3) + coord_fixed()
mdsPois <- as.data.frame(colData(dds)) %>%
cbind(cmdscale(samplePoisDistMatrix))
ggplot(mdsPois, aes(x = 1
, y = 2
, color = dex, shape = cell)) +
geom_point(size = 3) + coord_fixed()
dds <- DESeq(dds)
res <- results(dds)
res <- results(dds, contrast=c(“dex”,“trt”,“untrt”))
mcols(res, use.names = TRUE)
res.05 <- results(dds, alpha = 0.05)
table(res.05$padj < 0.05)
library(“ggbeeswarm”)
geneCounts <- plotCounts(dds, gene = topGene, intgroup = c(“dex”,“cell”),
returnData = TRUE)
ggplot(geneCounts, aes(x = dex, y = count, color = cell)) +
scale_y_log10() + geom_beeswarm(cex = 3)
ggplot(geneCounts, aes(x = dex, y = count, color = cell, group = cell)) +
scale_y_log10() + geom_point(size = 3) + geom_line()
library(“apeglm”)
res <- lfcShrink(dds, coef=“dex_trt_vs_untrt”, type=“apeglm”)
plotMA(res, ylim = c(-5, 5))
res.noshr <- results(dds, name=“dex_trt_vs_untrt”)
plotMA(res.noshr, ylim = c(-5, 5))
plotMA(res, ylim = c(-5,5))
topGene <- rownames(res)[which.min(res$padj)]
with(res[topGene, ], {
points(baseMean, log2FoldChange, col=“dodgerblue”, cex=2, lwd=2)
text(baseMean, log2FoldChange, topGene, pos=2, col=“dodgerblue”)
})
hist(res
baseMean > 1], breaks = 0:20/20,
col = “grey50”, border = “white”)
library(“genefilter”)
topVarGenes <- head(order(rowVars(assay(vsd)), decreasing = TRUE), 20)
mat <- assay(vsd)[ topVarGenes, ]
mat <- mat - rowMeans(mat)
anno <- as.data.frame(colData(vsd)[, c(“cell”,“dex”)])
pheatmap(mat, annotation_col = anno)
qs <- c(0, quantile(resLFC1
baseMean > 0], 0:6/6))
bins <- cut(resLFC1
pvalue, bins, function§
mean(p < .05, na.rm = TRUE))
barplot(fractionSig, xlab = “mean normalized count”,
ylab = “fraction of small p values”)
library(“AnnotationDbi”)
library(“org.Hs.eg.db”)
res
entrez <- mapIds(org.Hs.eg.db,
keys=row.names(res),
column=“ENTREZID”,
keytype=“ENSEMBL”,
multiVals=“first”)
resOrdered <- res[order(res$pvalue),]
resOrderedDF <- as.data.frame(resOrdered)[1:100, ]
write.csv(resOrderedDF, file = “results.csv”)
library(“ReportingTools”)
htmlRep <- HTMLReport(shortName=“report”, title=“My report”,
reportDirectory="./report")
publish(resOrderedDF, htmlRep)
url <- finish(htmlRep)
browseURL(url)
resGR <- results(dds, name=“dex_trt_vs_untrt”, format=“GRanges”)
resGR
log2FoldChange
resGR
symbol) | duplicated(resGRsub
group <- ifelse(naOrDup, names(resGRsub), resGRsub
padj < 0.1 & !is.na(resGRsub$padj),
“sig”, “notsig”))
options(ucscChromosomeNames = FALSE)
g <- GenomeAxisTrack()
a <- AnnotationTrack(resGRsub, name = “gene ranges”, feature = status)
d <- DataTrack(resGRsub, data = “log2FoldChange”, baseline = 0,
type = “h”, name = “log2 fold change”, strand = “+”)
plotTracks(list(g, d, a), groupAnnotation = “group”,
notsig = “grey”, sig = “hotpink”)
library(“sva”)
dat <- counts(dds, normalized = TRUE)
idx <- rowMeans(dat) > 1
dat <- dat[idx, ]
mod <- model.matrix(~ dex, colData(dds))
mod0 <- model.matrix(~ 1, colData(dds))
svseq <- svaseq(dat, mod, mod0, n.sv = 2)
par(mfrow = c(2, 1), mar = c(3,5,3,1))
for (i in 1:2) {
stripchart(svseq
cell, vertical = TRUE, main = paste0(“SV”, i))
abline(h = 0)
}
ddssva <- dds
ddssva
sv[,1]
ddssva
sv[,2]
design(ddssva) <- ~ SV1 + SV2 + dex
library(“fission”)
data(“fission”)
ddsTC <- DESeqDataSet(fission, ~ strain + minute + strain:minute)
ddsTC <- DESeq(ddsTC, test=“LRT”, reduced = ~ strain + minute)
resTC <- results(ddsTC)
resTC
symbol
head(resTC[order(resTC$padj),], 4)
fiss <- plotCounts(ddsTC, which.min(resTC
minute <- as.numeric(as.character(fissKaTeX parse error: Double subscript at position 154: …e") + scale_y_̲log10() res30 <…padj),]
betas <- coef(ddsTC)
topGenes <- head(order(resTC$padj),20)
mat <- betas[topGenes, -c(1,2)]
thr <- 3
mat[mat < -thr] <- -thr
mat[mat > thr] <- thr
pheatmap(mat, breaks=seq(from=-thr, to=thr, length=101),
cluster_col=FALSE)