Gene-to-distance-to-plaque correlation

All genes

Published

March 26, 2026

Replication of Fig 3G in Saito et al.,2025 with Akhil ThioS annotations

Setup and data loading

Load libraries, Seurat objects, and compute cell-to-plaque distances.

Code
source("R/setup.R")
library(ggrepel)
library(patchwork)

data <- load_slides_and_distances("full")
slide1 <- data$slide1; slide2 <- data$slide2
cell_distances_1 <- data$distances_1; cell_distances_2 <- data$distances_2
cell_distances_all <- data$distances_all

Gene-distance correlation functions

Does a cell’s expression of this gene change systematically as the cell sits closer to (or farther from) a plaque?

Gene expression ~ distance

compute_gene_distance_correlation()

For each gene we compute a Spearman rank correlation (rho) between two vectors that share one entry per cell:

  1. The cell’s raw transcript count for that gene.
  2. The cell’s Euclidean distance (in um) to its nearest plaque of the specified type.

Spearman correlation operates on ranks rather than raw values, so it captures any monotonic relationship (not just linear) and is robust to the extreme zero-inflation typical of spatial transcriptomics count data. We set exact = FALSE because the large number of tied zeros makes an exact permutation p-value computationally infeasible.

A gene that is highly expressed near plaques (short distance) and lowly expressed far away (long distance) will have a negative rho (high expression co-occurs with low distance). Conversely, a gene expressed preferentially far from plaques will have a positive rho.

To make the results more intuitive,so that “plaque-enriched” genes appear at the top of a ranked list rather than the bottom, we multiply rho by -1 to obtain rho_flip. After flipping, positive values mean “enriched near plaques” and negative values mean “enriched far from plaques”.

We apply the Benjamini-Hochberg (BH) procedure to control the false discovery rate, producing an adjusted p-value (padj). A gene is called significant at padj < 0.05.

Final classification. Each gene is labelled as:

  • enriched_nearpadj < 0.05 and rho_flip > 0 (upregulated close to plaques)
  • enriched_farpadj < 0.05 and rho_flip < 0 (upregulated far from plaques)
  • ns — not significant

plot_gene_distance_correlation()

Generates a rank plot. Each dot is one gene.

The x-axis is the gene’s rank (1 = strongest plaque enrichment) and the y-axis is rho_flip (the sign-flipped Spearman correlation). Genes sitting high on the plot are preferentially expressed near plaques; genes at the bottom are expressed far from plaques. The dashed line at zero marks no association.

Orange = significantly enriched near plaques; purple = significantly enriched far from plaques; grey = not significant (BH-adjusted p >= 0.05). The top and bottom genes are automatically labelled, plus any user-specified genes of interest.

Code
plot_gene_distance_correlation <- function(
    results,
    title         = "Gene–distance correlation",
    n_top         = 5,
    genes_to_label = NULL
) {
  # Auto-label top n positive and negative genes
  top_pos <- results |> filter(sig == "enriched_near") |> slice_min(rank, n = n_top)
  top_neg <- results |> filter(sig == "enriched_far")  |> slice_max(rank, n = n_top)
  auto_label <- bind_rows(top_pos, top_neg)

  # Add user-specified genes
  if (!is.null(genes_to_label)) {
    user_genes <- results |> filter(gene %in% genes_to_label)
    auto_label <- bind_rows(auto_label, user_genes) |> distinct(gene, .keep_all = TRUE)
  }

  # Colour mapping
  colour_map <- c(
    "enriched_near" = "#E07B39",
    "enriched_far"  = "#7B68AE",
    "ns"            = "grey70"
  )

  ggplot(results, aes(x = rank, y = rho_flip)) +
    geom_point(aes(colour = sig), size = 2, alpha = 0.5) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
    geom_text_repel(
      data          = auto_label,
      aes(label = gene, colour = sig),
      size          = 4,
      fontface      = "italic",
      label.size    = 0.2,
      box.padding   = 0.4,
      max.overlaps  = 20,
      show.legend   = FALSE
    ) +
    scale_colour_manual(
      values = colour_map,
      labels = c(
        "enriched_near" = "Enriched near plaque (padj < 0.05)",
        "enriched_far"  = "Enriched far from plaque (padj < 0.05)",
        "ns"            = "Not significant"
      )
    ) +
    labs(
      title  = title,
      x      = "Gene rank",
      y      = "Spearman rho (flipped)",
      colour = NULL
    ) +
    theme_minimal(base_size = 14) +
    theme(
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank())
}

Run analysis

Compute gene–distance correlations for three distance metrics:

  • dist_caa — distance to nearest CAA plaque
  • dist_paren — distance to nearest parenchymal plaque
  • dist_any — distance to nearest plaque of any type
Code
seurat_list <- list(slide1, slide2)

cor_caa <- compute_gene_distance_correlation(
  seurat_list, cell_distances_all, distance_col = "dist_caa")

cor_paren <- compute_gene_distance_correlation(
  seurat_list, cell_distances_all, distance_col = "dist_paren")

cor_any <- compute_gene_distance_correlation(
  seurat_list, cell_distances_all, distance_col = "dist_any")

# qs2::qs_save(cor_caa, "spatial/cor_caa.qs2")
# qs2::qs_save(cor_paren, "spatial/cor_paren.qs2")
# qs2::qs_save(cor_any, "spatial/cor_any.qs2")
Code
seurat_list <- list(slide1, slide2)

dist_adu <- cell_distances_all |> filter(Treatment.Group == "Adu")
dist_igg <- cell_distances_all |> filter(Treatment.Group == "IgG")

for (trt in c("adu", "igg")) {
  dist_trt <- if (trt == "adu") dist_adu else dist_igg
  for (dcol in c("dist_caa", "dist_paren", "dist_any")) {
    res    <- compute_gene_distance_correlation(seurat_list, dist_trt, distance_col = dcol)
    suffix <- sub("dist_", "", dcol)
    qs2::qs_save(res, paste0("spatial/cor_", suffix, "_", trt, ".qs2"))
  }
}

Rank plots

Code
cor_caa   <- qs2::qs_read("spatial/cor_caa.qs2")
cor_paren <- qs2::qs_read("spatial/cor_paren.qs2")
cor_any   <- qs2::qs_read("spatial/cor_any.qs2")

Three analyses are shown, each using a different distance metric: (1) distance to nearest CAA plaque only, (2) distance to nearest parenchymal plaque only, and (3) distance to nearest plaque of any type. Comparing across the three reveals whether certain genes are specifically associated with one plaque subtype or with amyloid deposits in general.

Code
plot_gene_distance_correlation(cor_caa, title = "Distance to nearest CAA plaque")

For each gene, Spearman’s rank correlation coefficient (ρ) was calculated between per-cell gene expression levels (raw count data) and the minimum distance to the nearest amyloid plaque. To facilitate interpretation, correlation coefficients were sign-inverted such that positive values indicate higher gene expression in cells located closer to plaques, whereas negative values indicate higher expression at increasing distances from plaques. Genes were ranked by the inverted correlation coefficient. Statistical significance was assessed using two-sided Spearman correlation tests followed by Benjamini–Hochberg false discovery rate correction. Genes were classified as plaque-enriched, plaque-distal, or not significant based on adjusted P values. The five most positive and five most negative correlations are labeled
Code
plot_gene_distance_correlation(cor_paren, title = "Distance to nearest parenchymal plaque")

Code
plot_gene_distance_correlation(cor_any, title = "Distance to nearest plaque (any type)")

Top genes

Code
show_top <- function(results, label) {
  top <- bind_rows(
    results |> filter(sig == "enriched_near") |> slice_min(rank, n = 10),
    results |> filter(sig == "enriched_far")  |> slice_max(rank, n = 10)
  ) |>
    select(gene, rho, rho_flip, pval, padj, rank, sig) |>
    mutate(across(c(rho, rho_flip), \(x) round(x, 4)),
           across(c(pval, padj), \(x) ifelse(x < 1e-300, "< 1e-300", formatC(x, format = "e", digits = 2))))

  top |>
    kbl(caption = label) |>
    kable_styling("striped", full_width = FALSE)
}

show_top(cor_caa,   "Top genes — distance to CAA")
Top genes — distance to CAA
gene rho rho_flip pval padj rank sig
Lamp5 -0.1404 0.1404 < 1e-300 < 1e-300 1 enriched_near
Stard8 -0.1242 0.1242 < 1e-300 < 1e-300 2 enriched_near
Slc17a7 -0.1184 0.1184 < 1e-300 < 1e-300 3 enriched_near
Cux2 -0.1165 0.1165 < 1e-300 < 1e-300 4 enriched_near
Bhlhe40 -0.1131 0.1131 < 1e-300 < 1e-300 5 enriched_near
Rspo1 -0.1028 0.1028 < 1e-300 < 1e-300 6 enriched_near
Pparg -0.0981 0.0981 < 1e-300 < 1e-300 7 enriched_near
Serpinf1 -0.0850 0.0850 < 1e-300 < 1e-300 8 enriched_near
Egr1 -0.0817 0.0817 < 1e-300 < 1e-300 9 enriched_near
Lum -0.0800 0.0800 < 1e-300 < 1e-300 10 enriched_near
Mbp 0.1356 -0.1356 < 1e-300 < 1e-300 459 enriched_far
Ermn 0.0896 -0.0896 < 1e-300 < 1e-300 458 enriched_far
Nts 0.0888 -0.0888 < 1e-300 < 1e-300 457 enriched_far
Prdx1 0.0855 -0.0855 < 1e-300 < 1e-300 456 enriched_far
Syt6 0.0844 -0.0844 < 1e-300 < 1e-300 455 enriched_far
Slc32a1 0.0837 -0.0837 < 1e-300 < 1e-300 454 enriched_far
Tac1 0.0789 -0.0789 < 1e-300 < 1e-300 453 enriched_far
B3galt5 0.0785 -0.0785 < 1e-300 < 1e-300 452 enriched_far
Mog 0.0770 -0.0770 < 1e-300 < 1e-300 451 enriched_far
Mag 0.0751 -0.0751 < 1e-300 < 1e-300 450 enriched_far
Code
show_top(cor_paren, "Top genes — distance to parenchymal")
Top genes — distance to parenchymal
gene rho rho_flip pval padj rank sig
Cst7 -0.2954 0.2954 < 1e-300 < 1e-300 1 enriched_near
Trem2 -0.2146 0.2146 < 1e-300 < 1e-300 2 enriched_near
Bcl2a1b -0.2043 0.2043 < 1e-300 < 1e-300 3 enriched_near
Lyz2 -0.2000 0.2000 < 1e-300 < 1e-300 4 enriched_near
Cd68 -0.1913 0.1913 < 1e-300 < 1e-300 5 enriched_near
C1qa -0.1874 0.1874 < 1e-300 < 1e-300 6 enriched_near
C1qc -0.1839 0.1839 < 1e-300 < 1e-300 7 enriched_near
Ctsd -0.1829 0.1829 < 1e-300 < 1e-300 8 enriched_near
Ly86 -0.1803 0.1803 < 1e-300 < 1e-300 9 enriched_near
Ccl3 -0.1779 0.1779 < 1e-300 < 1e-300 10 enriched_near
Psap 0.1504 -0.1504 < 1e-300 < 1e-300 459 enriched_far
Ids 0.1468 -0.1468 < 1e-300 < 1e-300 458 enriched_far
Calb1 0.1282 -0.1282 < 1e-300 < 1e-300 457 enriched_far
Gaa 0.1248 -0.1248 < 1e-300 < 1e-300 456 enriched_far
Srebf2 0.1237 -0.1237 < 1e-300 < 1e-300 455 enriched_far
Pygb 0.1233 -0.1233 < 1e-300 < 1e-300 454 enriched_far
Fdft1 0.1198 -0.1198 < 1e-300 < 1e-300 453 enriched_far
Smpd1 0.1154 -0.1154 < 1e-300 < 1e-300 452 enriched_far
Dnah11 0.1146 -0.1146 < 1e-300 < 1e-300 451 enriched_far
Ptges3 0.1128 -0.1128 < 1e-300 < 1e-300 450 enriched_far
Code
show_top(cor_any,   "Top genes — distance to any plaque")
Top genes — distance to any plaque
gene rho rho_flip pval padj rank sig
Cst7 -0.2949 0.2949 < 1e-300 < 1e-300 1 enriched_near
Lyz2 -0.2201 0.2201 < 1e-300 < 1e-300 2 enriched_near
Trem2 -0.2161 0.2161 < 1e-300 < 1e-300 3 enriched_near
Bcl2a1b -0.2084 0.2084 < 1e-300 < 1e-300 4 enriched_near
Cd68 -0.1981 0.1981 < 1e-300 < 1e-300 5 enriched_near
C1qa -0.1966 0.1966 < 1e-300 < 1e-300 6 enriched_near
C1qc -0.1939 0.1939 < 1e-300 < 1e-300 7 enriched_near
Ctsd -0.1870 0.1870 < 1e-300 < 1e-300 8 enriched_near
Ly86 -0.1831 0.1831 < 1e-300 < 1e-300 9 enriched_near
Ccl3 -0.1819 0.1819 < 1e-300 < 1e-300 10 enriched_near
Psap 0.1650 -0.1650 < 1e-300 < 1e-300 459 enriched_far
Ids 0.1580 -0.1580 < 1e-300 < 1e-300 458 enriched_far
Fdft1 0.1382 -0.1382 < 1e-300 < 1e-300 457 enriched_far
Srebf2 0.1334 -0.1334 < 1e-300 < 1e-300 456 enriched_far
Calb1 0.1297 -0.1297 < 1e-300 < 1e-300 455 enriched_far
Gaa 0.1291 -0.1291 < 1e-300 < 1e-300 454 enriched_far
Car12 0.1262 -0.1262 < 1e-300 < 1e-300 453 enriched_far
Chst1 0.1257 -0.1257 < 1e-300 < 1e-300 452 enriched_far
Smpd1 0.1252 -0.1252 < 1e-300 < 1e-300 451 enriched_far
Ptges3 0.1232 -0.1232 < 1e-300 < 1e-300 450 enriched_far

Rank plots by treatment

Code
cor_caa_adu   <- qs2::qs_read("spatial/cor_caa_adu.qs2")
cor_caa_igg   <- qs2::qs_read("spatial/cor_caa_igg.qs2")
cor_paren_adu <- qs2::qs_read("spatial/cor_paren_adu.qs2")
cor_paren_igg <- qs2::qs_read("spatial/cor_paren_igg.qs2")
cor_any_adu   <- qs2::qs_read("spatial/cor_any_adu.qs2")
cor_any_igg   <- qs2::qs_read("spatial/cor_any_igg.qs2")
Code
p1 <- plot_gene_distance_correlation(cor_caa_adu, title = "CAA — Aducanumab")
p2 <- plot_gene_distance_correlation(cor_caa_igg, title = "CAA — IgG")

p1 + p2

Code
pa <- plot_gene_distance_correlation(cor_paren_adu, title = "Parenchymal — Aducanumab")

pi <- plot_gene_distance_correlation(cor_paren_igg, title = "Parenchymal — IgG")

pa + pi

Code
a1 <- plot_gene_distance_correlation(cor_any_adu, title = "Any plaque — Aducanumab")

a2 <- plot_gene_distance_correlation(cor_any_igg, title = "Any plaque — IgG")

a1 + a2

Adu vs IgG scatter

Each point is a gene. X-axis = rho_flip in IgG; Y-axis = rho_flip in Aducanumab. Points are coloured by concordance of significance: both enriched near, both enriched far, discordant, or non-significant in both.

Code
plot_treatment_scatter <- function(cor_adu, cor_igg, title) {
  df <- inner_join(
    cor_adu |> select(gene, rho_flip_adu = rho_flip, sig_adu = sig),
    cor_igg |> select(gene, rho_flip_igg = rho_flip, sig_igg = sig),
    by = "gene"
  ) |>
    mutate(
      concordance = case_when(
        sig_adu == "enriched_near" & sig_igg == "enriched_near" ~ "both near",
        sig_adu == "enriched_far"  & sig_igg == "enriched_far"  ~ "both far",
        sig_adu != "ns"            & sig_igg != "ns"            ~ "discordant",
        sig_adu != "ns"            | sig_igg != "ns"            ~ "one sig",
        TRUE                                                     ~ "ns"
      )
    )

  colour_map <- c(
    "both near"  = "#E07B39",
    "both far"   = "#7B68AE",
    "discordant" = "#C0392B",
    "one sig"    = "#5DADE2",
    "ns"         = "grey80"
  )

  label_df <- df |> filter(concordance %in% c("both near", "both far", "discordant"))

  ggplot(df, aes(x = rho_flip_igg, y = rho_flip_adu)) +
    geom_point(aes(colour = concordance), size = 1.8, alpha = 0.6) +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", colour = "grey40") +
    geom_hline(yintercept = 0, colour = "grey70", linewidth = 0.3) +
    geom_vline(xintercept = 0, colour = "grey70", linewidth = 0.3) +
    geom_text_repel(
      data         = label_df,
      aes(label = gene, colour = concordance),
      size         = 3.5,
      fontface     = "italic",
      max.overlaps = 20,
      show.legend  = FALSE
    ) +
    scale_colour_manual(values = colour_map) +
    labs(
      title  = title,
      x      = "rho_flip — IgG",
      y      = "rho_flip — Aducanumab",
      colour = NULL
    ) +
    theme_minimal(base_size = 13) +
    theme(
      legend.position    = "bottom",
      panel.grid.minor   = element_blank(),
      panel.grid.major   = element_line(colour = "grey92")
    )
}

plot_treatment_scatter(cor_caa_adu,   cor_caa_igg,   "CAA — Adu vs IgG") |
plot_treatment_scatter(cor_paren_adu, cor_paren_igg, "Parenchymal — Adu vs IgG") |
plot_treatment_scatter(cor_any_adu,   cor_any_igg,   "Any plaque — Adu vs IgG")