Differentially expressed genes across all cell types (Adu vs IgG)

Published

March 18, 2026

Code
library(qs2)
library(Seurat)
library(DESeq2)
library(patchwork)
library(gridExtra)
library(pheatmap)
library(tidyverse)
library(kableExtra)

source("20260217-helper_functions.R")

# Load full object and microglia object (Kai's canonical annotation)
full    <- qs_read("seurat_objects/20260217_Part2_fullobj.qs2")
mg_kai  <- UpdateSeuratObject(qs_read("seurat_objects/20260225-mg_kai.qs2"))

# Discover all cell types
cell_types <- sort(unique(full$annotatedclusters))

Overview

Pseudobulk DESeq2 differential expression analysis (Adu vs IgG) is performed for every cell type in annotatedclusters. For microglia, Kai’s canonical subsetted object is used instead of the full-object subset. Each cell type gets a volcano plot, a table of significant DEGs, and a per-sample expression heatmap. A cross-cell-type summary is shown at the end.

Code
###################################################################################
# Returns a pseudobulk Seurat object with Treatment.Group set, or NULL if
# replication is insufficient (DESeq2 requires ≥2 biological replicates per group)
###################################################################################
build_pseudobulk <- function(obj) {
  n_per_group <- table(
    distinct(obj@meta.data, sample_ID, Treatment.Group)$Treatment.Group
  )
  if (any(n_per_group < 2)) return(NULL)

  bulk <- AggregateExpression(obj, group.by = "sample_ID", return.seurat = TRUE)

  # AggregateExpression converts _ to - in colnames (e.g. KK4_465 → KK4-465)
  sample_meta <- distinct(obj@meta.data, sample_ID, Treatment.Group) |>
    mutate(bulk_id = gsub("_", "-", sample_ID))
  bulk$Treatment.Group <- sample_meta$Treatment.Group[
    match(colnames(bulk), sample_meta$bulk_id)
  ]
  Idents(bulk) <- "Treatment.Group"
  bulk
}
#####################################################################
# Runs pseudobulk DESeq2 (Adu vs IgG) on pre-aggregated Seurat object
#####################################################################
run_deseq2 <- function(bulk, ct_label) {
  anayet(bulk,
         ident1          = "Adu",
         ident2          = "IgG",
         test_method     = "DESeq2",
         min_pct         = 0.1,
         p_adj_threshold = 0.1,
         plot_title      = ct_label,
         print_plot      = FALSE)
}

###################################################################################
# Renders per-sample heatmap of significant DEGs (z-scored pseudobulk expression)
###################################################################################
plot_deg_heatmap <- function(bulk, sig_genes, ct_label) {
  # log-normalised pseudobulk expression; data layer created by AggregateExpression
  expr_mat    <- as.matrix(GetAssayData(bulk, layer = "data")[sig_genes, , drop = FALSE])
  expr_scaled <- t(scale(t(expr_mat)))  # z-score per gene

  pheatmap(expr_scaled,
    color             = colorRampPalette(c("#4575b4", "white", "#d73027"))(100),
    annotation_col    = data.frame(Treatment = bulk$Treatment.Group,
                                   row.names = colnames(bulk)),
    annotation_colors = list(Treatment = c(Adu = "#d73027", IgG = "#4575b4")),
    cluster_cols      = TRUE,
    cluster_rows      = length(sig_genes) > 1,
    main              = paste(ct_label, "— significant DEGs per sample (z-score)\n"),
    fontsize          = 14,
    fontsize_row      = 12,
    fontsize_col      = 12,
    angle_col         = 45,
    cellwidth         = 40,
    cellheight        = 20
  )
}

###############################################################################
# Renders volcano + sig-gene table and (if DEGs exist) the per-sample heatmap
##############################################################################
display_ct_results <- function(res, bulk, ct_label) {
  cat("\n\n<h3>Volcano plot</h3>\n\n")

  sig_tbl <- if (nrow(res$significant_results) > 0) {
    res$significant_results |>
      select(p_val, avg_log2FC, p_val_adj) |>
      mutate(across(where(is.numeric), \(x) signif(x, 3))) |>
      arrange(p_val_adj) |>
      tableGrob(theme = ttheme_minimal(base_size = 18))
  } else {
    tableGrob(data.frame(Result = "No significant DEGs"),
              theme = ttheme_minimal(base_size = 20))
  }
  print(res$volcano_plot + wrap_elements(sig_tbl) + plot_layout(widths = c(2, 1)))

  sig_genes <- rownames(res$significant_results)
  if (length(sig_genes) > 0) plot_deg_heatmap(bulk, sig_genes, ct_label)
}

# Per-cell-type DE pipeline: prints cell counts, runs pseudobulk DESeq2, renders output
run_ct_de <- function(obj, ct_label) {
  stopifnot(inherits(obj, "Seurat"), length(ct_label) == 1, !is.na(ct_label))
  DefaultAssay(obj) <- "Xenium"

  # Show replication balance before pseudobulk aggregation
  cat("\n\n<h3>Cell counts</h3>\n\n")
  tbl <- obj@meta.data |>
    summarise(
      n_cells = n(),
      n_Adu   = sum(Treatment.Group == "Adu"),
      n_IgG   = sum(Treatment.Group == "IgG")
    ) |>
    kbl(caption = paste(ct_label, "— cells per treatment")) |>
    kable_styling("striped")
  cat(tbl)

  bulk <- build_pseudobulk(obj)
  if (is.null(bulk)) {
    cat("\n\n_Skipped: fewer than 2 samples per treatment group._\n\n")
    return(NULL)
  }

  res <- run_deseq2(bulk, ct_label)
  display_ct_results(res, bulk, ct_label)
  res
}
Code
# Run DE for every cell type; use Kai's microglia object for Microglia
all_ct_results <- map(cell_types, \(ct) {
  cat("\n\n<h2>", ct, "</h2>\n\n")
  obj <- if (ct == "Microglia") mg_kai else subset(full, annotatedclusters == ct)
  run_ct_de(obj, ct)
}) |> set_names(cell_types) |> compact()

1

Cell counts

CP — cells per treatment
n_cells n_Adu n_IgG
4129 2404 1725

Volcano plot

2

Cell counts

Endothelial — cells per treatment
n_cells n_Adu n_IgG
25583 12658 12925

Volcano plot

3

Cell counts

Ependymal — cells per treatment
n_cells n_Adu n_IgG
3604 2041 1563

Volcano plot

4

Cell counts

Fibroblast — cells per treatment
n_cells n_Adu n_IgG
6179 2583 3596

Volcano plot

5

Cell counts

Pericyte — cells per treatment
n_cells n_Adu n_IgG
5927 3062 2865

Volcano plot

6

Cell counts

VLMC — cells per treatment
n_cells n_Adu n_IgG
658 241 417

Volcano plot

7

Cell counts

VSMC — cells per treatment
n_cells n_Adu n_IgG
2536 1179 1357

Volcano plot

8

Cell counts

BAM — cells per treatment
n_cells n_Adu n_IgG
1477 717 760

Volcano plot

9

Cell counts

Microglia — cells per treatment
n_cells n_Adu n_IgG
23003 12128 10875

Volcano plot

10

Cell counts

T Cell — cells per treatment
n_cells n_Adu n_IgG
591 354 237

Volcano plot

11

Cell counts

Astrocyte — cells per treatment
n_cells n_Adu n_IgG
48489 24115 24374

Volcano plot

12

Cell counts

Oligodendrocyte — cells per treatment
n_cells n_Adu n_IgG
70324 37189 33135

Volcano plot

13

Cell counts

OPC — cells per treatment
n_cells n_Adu n_IgG
9878 5246 4632

Volcano plot

14

Cell counts

GABAergic Neuron 1 — cells per treatment
n_cells n_Adu n_IgG
21175 9162 12013

Volcano plot

15

Cell counts

GABAergic Neuron 2 — cells per treatment
n_cells n_Adu n_IgG
15026 7531 7495

Volcano plot

16

Cell counts

GABAergic Neuron 3 — cells per treatment
n_cells n_Adu n_IgG
5074 2427 2647

Volcano plot

17

Cell counts

GABAergic Neuron 4 — cells per treatment
n_cells n_Adu n_IgG
4091 2117 1974

Volcano plot

18

Cell counts

Glutamatergic Neuron 1 — cells per treatment
n_cells n_Adu n_IgG
39421 20404 19017

Volcano plot

19

Cell counts

Glutamatergic Neuron 2 — cells per treatment
n_cells n_Adu n_IgG
34335 15773 18562

Volcano plot

20

Cell counts

Glutamatergic Neuron 3 — cells per treatment
n_cells n_Adu n_IgG
7483 3830 3653

Volcano plot

21

Cell counts

Glutamatergic Neuron 4 — cells per treatment
n_cells n_Adu n_IgG
4155 2086 2069

Volcano plot

22

Cell counts

Glutamatergic Neuron 5 — cells per treatment
n_cells n_Adu n_IgG
1994 1038 956

Volcano plot

Code
de_tables <- map(all_ct_results, \(res) res$all_results)

# RDS for downstream R analysis
saveRDS(de_tables, "results/20260304-DEG_allCellTypes.rds")

# JSON for interoperability (row names → gene column)
de_tables |>
  map(\(df) rownames_to_column(df, var = "gene")) |>
  jsonlite::toJSON(pretty = TRUE) |>
  writeLines("results/20260304-DEG_allCellTypes.json")

Summary

DEG counts per cell type

Code
deg_counts <- imap(all_ct_results, \(res, ct) {
  if (nrow(res$significant_results) == 0) return(NULL)
  res$significant_results |>
    rownames_to_column("gene") |>
    mutate(CellType  = ct,
           Direction = ifelse(avg_log2FC > 0, "Up in Adu", "Down in Adu"))
}) |>
  compact() |>
  list_rbind() |>
  count(CellType, Direction)

if (nrow(deg_counts) > 0) {
  ggplot(deg_counts, aes(x = reorder(CellType, n, sum), y = n, fill = Direction)) +
    geom_col(position = "dodge", width = 0.7, alpha = 0.9) +
    coord_flip() +
    scale_fill_manual(values = c("Up in Adu" = "#d73027", "Down in Adu" = "#4575b4")) +
    labs(
      title    = "Significant DEGs per cell type",
      subtitle = "padj < 0.1",
      x        = NULL,
      y        = "Number of DEGs",
      fill     = NULL
    ) +
    theme_minimal(base_size = 14) +
    theme(panel.grid.major.y = element_blank(),
          axis.text = element_text(size = 14))
} else {
  message("No significant DEGs found in any cell type.")
}

Cross-cell-type log2FC heatmap

A heatmap of log2 fold changes (Adu vs IgG) for all genes significant (padj < 0.1) in at least one cell type. Asterisks mark individually significant cell-type–gene combinations.

Code
# Collect all DE results into one long data frame
cross_long <- imap(all_ct_results, \(res, ct) {
  res$all_results |>
    rownames_to_column("gene") |>
    mutate(CellType = ct)
}) |> list_rbind()

# Genes significant in at least one cell type
sig_genes_cross <- cross_long |>
  filter(p_val_adj < 0.1) |>
  pull(gene) |>
  unique()

if (length(sig_genes_cross) > 0) {

  # If too many genes, keep those significant in >=2 cell types
  if (length(sig_genes_cross) > 100) {
    gene_sig_count <- cross_long |>
      filter(p_val_adj < 0.1) |>
      distinct(gene, CellType) |>
      count(gene)
    filtered <- gene_sig_count |> filter(n >= 2) |> pull(gene)
    if (length(filtered) > 0) sig_genes_cross <- filtered
  }

  # log2FC matrix: rows = genes, columns = cell types
  fc_mat <- cross_long |>
    filter(gene %in% sig_genes_cross) |>
    pivot_wider(id_cols = gene, names_from = CellType, values_from = avg_log2FC) |>
    column_to_rownames("gene") |>
    as.matrix()

  # Significance matrix
  sig_mat <- cross_long |>
    filter(gene %in% sig_genes_cross) |>
    pivot_wider(id_cols = gene, names_from = CellType, values_from = p_val_adj) |>
    column_to_rownames("gene") |>
    as.matrix()

  display_mat <- ifelse(!is.na(sig_mat) & sig_mat < 0.1, "*", "")

  clim <- ceiling(max(abs(fc_mat), na.rm = TRUE))

  pheatmap(fc_mat,
    color             = colorRampPalette(c("#4575b4", "white", "#d73027"))(100),
    breaks            = seq(-clim, clim, length.out = 101),
    na_col            = "grey88",
    display_numbers   = display_mat,
    fontsize_number   = 18,
    number_color      = "black",
    cluster_rows      = TRUE,
    cluster_cols      = TRUE,
    main              = "Cross-cell-type log2FC (Adu vs IgG)\n",
    fontsize          = 14,
    fontsize_row      = 12,
    fontsize_col      = 14,
    angle_col         = 45,
    cellwidth         = 30,
    cellheight        = 20
  )
} else {
  message("No significant DEGs found across any cell type — heatmap skipped.")
}

Effect size dot plot

Code
sig_all <- imap(all_ct_results, \(res, ct) {
  res$significant_results |> rownames_to_column("gene") |> mutate(CellType = ct)
}) |> list_rbind()

if (nrow(sig_all) > 0) {
  gene_order <- sig_all |>
    group_by(gene) |>
    summarise(mfc = mean(avg_log2FC)) |>
    arrange(mfc) |>
    pull(gene)

  sig_all |>
    mutate(gene = factor(gene, levels = gene_order)) |>
    ggplot(aes(x = avg_log2FC, y = gene,
               colour = CellType,
               size   = -log10(p_val_adj + 1e-300))) +
    geom_vline(xintercept = 0, linetype = "dashed", colour = "grey60") +
    geom_point(alpha = 0.85) +
    scale_size_continuous(
      name  = expression(-log[10](p[adj])),
      range = c(2, 8)
    ) +
    scale_colour_brewer(palette = "Set2") +
    labs(
      title = "All cell types — significant DEGs (Adu vs IgG)\n",
      x     = expression(log[2] * "FC  (Adu / IgG)"),
      y     = NULL
    ) +
    theme_minimal(base_size = 13) +
    theme(
      axis.text        = element_text(size = 14, face = "italic"),
      axis.title       = element_text(size = 16),
      plot.title       = element_text(size = 18),
      axis.ticks       = element_blank(),
      panel.grid.major.x = element_blank()
    )
} else {
  message("No significant DEGs found — dot plot skipped.")
}