CD8B spatial expression


Andre Forjaz
I'm a third year PhD candidate in the Wirtz Lab. My research focuses on generating 3D reconstructions of anatomical tissues with single-cell resolution to study disease progression.

CD8B spatial expression

Plots description

Panel A

Panel A shows a t-SNE visualization of the PCA normalized data, with colored points of 5 clusters from k-means clustering.

Panel B

Panel B shows a spatial plot with cluster 2 highlighted, to see where cells in cluster 2 are located spatially.

Panel C

Panel C generates a volcano plot to identify differentially expressed genes of cluster 2. Points are colored by upregulated, downregulated, or not significant. The top differentially expressed genes are labeled.

Panel D

Panel D visualizes the expression of the upregulated CD8B gene in the t-SNE plot from panel A. Points are colored by LEP expression level to see where high and low expression occurs spatially.

Panel E

Panel E shows a spatial plot of CD8B gene expression, similar to panel B but coloring by gene expression level instead of cluster membership. This shows where the CD8B gene is expressed spatially.

References

https://biostatsquid.com/volcano-plots-r-tutorial/ https://cran.r-project.org/web/packages/gridExtra/vignettes/arrangeGrob.html

#### author #### 
# Andre Forjaz
# jhed: aperei13

#### load packages #### 
library(Rtsne)
library(ggplot2)
library(patchwork)
library(gridExtra)
library(umap)
library(ggrepel)
library(tidyverse)
library(RColorBrewer)
#### Input ####
outpth <-'~/genomic-data-visualization-2024/homework/hw4/'
pth <-'~/genomic-data-visualization-2024/data/pikachu.csv.gz'
data <- read.csv(pth, row.names=1)

# data preview
data[1:5,1:8]

# xy data 
pos <- data[, 4:5]
# gene data 
gexp <- data[6:ncol(data)]
# normalize the data with log10 mean
gexpnorm <- log10(gexp/rowSums(gexp) * mean(rowSums(gexp))+1)

rownames(pos) <- rownames(gexpnorm) <- data$cell_id
head(pos)
head(gexp)

# PCA
pcs <- prcomp(gexpnorm)
plot(pcs$sdev[1:50])

# tsne on pcs
emb <- Rtsne::Rtsne(pcs$x[,1:50])$Y
plot(emb)

# identify clusters kmeans
com <- as.factor(kmeans(emb, centers = 5)$cluster)

# pick color palette
colpal <- c("#365359", "#91D2D9", "#BFD9D6", "#A3D9CF","#D4F5DD", "#F2F2F2")

## Panel A
# tsne visualization 
df_a <- data.frame(V1 = emb[,1],
                   V2 = emb[,2],
                   cluster = com)

pa <- ggplot(df_a) + 
  geom_point(aes(V1, V2, color=com)) +
  scale_color_manual(values = colpal[2:6])+ 
  ggtitle("t-SNE")+
  theme_classic()

pa

## Panel B
# spatial resolved cluster
df_b <- data.frame(x = pos$aligned_x,
                   y = pos$aligned_y,
                   cluster = com)

pb <- ggplot(df_b) + 
  geom_point(aes(x, y,
                 color= ifelse(cluster == 2, "Cluster 2", "Other")),
             size =1,
             alpha = 0.6) +
  scale_color_manual(values = colpal, guide = guide_legend(title = NULL))+ 
  ggtitle("spatial location of cluster 2")+
  theme_classic()

pb

## Panel C
# differentially expressed genes for your cluster of interest
pv <- sapply(colnames(gexpnorm), function(i) {
  # print(i) ## print out gene name
  wilcox.test(gexpnorm[com == 2, i], gexpnorm[com != 2, i])$p.val
})

logfc <- sapply(colnames(gexpnorm), function(i) {
  # print(i) ## print out gene name
  log2(mean(gexpnorm[com == 2, i])/mean(gexpnorm[com != 2, i]))
})

# volcano plot
df_c <- data.frame(pv=pv, logfc,gene_symbol=colnames(gexpnorm))
# Credit: https://biostatsquid.com/volcano-plots-r-tutorial/
df_c$diffexpressed <- "NO"
# Set as "UP" if log2Foldchange > 0.6 and pvalue < 0.05
df_c$diffexpressed[df_c$logfc > 0.6 & df_c$pv < 0.05] <- "UP"
# Set as "DOWN" if log2Foldchange < -0.6 and pvalue < 0.05
df_c$diffexpressed[df_c$logfc < -0.6 & df_c$pv <0.05] <- "DOWN"
# df_c$delabel <- NA
df_c$delabel <- ifelse(df_c$gene_symbol %in% head(df_c[order(df_c$pv),"gene_symbol"],50),df_c$gene_symbol, NA)

# Explore the data
head(df_c[order(df_c$pv) & df_c$diffexpressed == 'UP', ])


pc <- ggplot(df_c, aes(x = logfc, y = -log10(pv), col = diffexpressed, label = delabel)) +
  geom_point() +
  geom_vline(xintercept = c(-0.6, 0.6), col = "gray", linetype = 'dashed') +
  geom_hline(yintercept = -log10(0.05), col = "gray", linetype = 'dashed') +
  scale_color_manual(values = c("#00AFBB", "grey", "#FFDB6D"),
                     labels = c("Downregulated", "Not significant", "Upregulated")) +
  labs(color = 'Gene expression',
       x = expression("log"[2]*"FC"), y = expression("-log"[10]*"p-value")) + 
  scale_x_continuous(breaks = seq(-10, 10, 2)) +
  theme_classic() +
  geom_text_repel(data = subset(df_c, diffexpressed != "NO"), 
                  aes(label = delabel),
                  box.padding = 0.5,
                  segment.color = "transparent",
                  point.padding = 0.5,
                  show.legend = FALSE)

pc

## Panel D
# one of these genes in reduced dimensional space (PCA, tSNE, etc)
df_d <- data.frame(V1 = emb[, 1],
                   V2 = emb[, 2],
                   CD8B = gexpnorm["CD8B"])

# Create the plot
pd <- ggplot(df_d) + 
  geom_point(aes(x = V1, y = V2, col = CD8B),
             size = 1) +
  scale_color_gradient(low = 'gray', high = '#00AFBB') +
  ggtitle("CD8B gene in t-SNE space") +
  theme_classic()

pd

## Panel E
# one of these genes in space
df_e <- data.frame(x = pos$aligned_x,
                   y = pos$aligned_y,
                   gexpnorm=gexpnorm["CD8B"])


pe <- ggplot(df_e) + 
  geom_point(aes(x, y, color= CD8B),
             size =2) +
  scale_colour_gradient2(low = 'gray', high = '#00AFBB')+ 
  ggtitle("spatial location of CD8B gene")+
  theme_classic()

pe

#### Plot results ####

# https://cran.r-project.org/web/packages/gridExtra/vignettes/arrangeGrob.html
lay <- rbind(c(1,2,3,3),
             c(4,5,3,3))

combined_plot <-grid.arrange( pa , pb , pc, pd , pe, layout_matrix = lay)

#### Save results ####
ggsave(paste0(outpth, "hw4_aperei13.png"), 
       plot = combined_plot,
       width = 12,
       height = 6,
       units = "in")