Gene-to-distance-to-plaque correlation

All genes

Published

March 26, 2026

Replication of Fig 3G in Saito et al.,2025 with Akhil ThioS annotations

Setup and data loading

Load libraries, Seurat objects, and compute cell-to-plaque distances.

Code

source("R/setup.R")
library(ggrepel)
library(patchwork)

data <- load_slides_and_distances("full")
slide1 <- data$slide1; slide2 <- data$slide2
cell_distances_1 <- data$distances_1; cell_distances_2 <- data$distances_2
cell_distances_all <- data$distances_all

Gene-distance correlation functions

Does a cell’s expression of this gene change systematically as the cell sits closer to (or farther from) a plaque?

Gene expression ~ distance

`compute_gene_distance_correlation()`

For each gene we compute a Spearman rank correlation (rho) between two vectors that share one entry per cell:

The cell’s raw transcript count for that gene.
The cell’s Euclidean distance (in um) to its nearest plaque of the specified type.

Spearman correlation operates on ranks rather than raw values, so it captures any monotonic relationship (not just linear) and is robust to the extreme zero-inflation typical of spatial transcriptomics count data. We set exact = FALSE because the large number of tied zeros makes an exact permutation p-value computationally infeasible.

A gene that is highly expressed near plaques (short distance) and lowly expressed far away (long distance) will have a negative rho (high expression co-occurs with low distance). Conversely, a gene expressed preferentially far from plaques will have a positive rho.

To make the results more intuitive,so that “plaque-enriched” genes appear at the top of a ranked list rather than the bottom, we multiply rho by -1 to obtain rho_flip. After flipping, positive values mean “enriched near plaques” and negative values mean “enriched far from plaques”.

We apply the Benjamini-Hochberg (BH) procedure to control the false discovery rate, producing an adjusted p-value (padj). A gene is called significant at padj < 0.05.

Final classification. Each gene is labelled as:

enriched_near — padj < 0.05 and rho_flip > 0 (upregulated close to plaques)
enriched_far — padj < 0.05 and rho_flip < 0 (upregulated far from plaques)
ns — not significant

`plot_gene_distance_correlation()`

Generates a rank plot. Each dot is one gene.

The x-axis is the gene’s rank (1 = strongest plaque enrichment) and the y-axis is rho_flip (the sign-flipped Spearman correlation). Genes sitting high on the plot are preferentially expressed near plaques; genes at the bottom are expressed far from plaques. The dashed line at zero marks no association.

Orange = significantly enriched near plaques; purple = significantly enriched far from plaques; grey = not significant (BH-adjusted p >= 0.05). The top and bottom genes are automatically labelled, plus any user-specified genes of interest.

Code

plot_gene_distance_correlation <- function(
    results,
    title         = "Gene–distance correlation",
    n_top         = 5,
    genes_to_label = NULL
) {
  # Auto-label top n positive and negative genes
  top_pos <- results |> filter(sig == "enriched_near") |> slice_min(rank, n = n_top)
  top_neg <- results |> filter(sig == "enriched_far")  |> slice_max(rank, n = n_top)
  auto_label <- bind_rows(top_pos, top_neg)

  # Add user-specified genes
  if (!is.null(genes_to_label)) {
    user_genes <- results |> filter(gene %in% genes_to_label)
    auto_label <- bind_rows(auto_label, user_genes) |> distinct(gene, .keep_all = TRUE)
  }

  # Colour mapping
  colour_map <- c(
    "enriched_near" = "#E07B39",
    "enriched_far"  = "#7B68AE",
    "ns"            = "grey70"
  )

  ggplot(results, aes(x = rank, y = rho_flip)) +
    geom_point(aes(colour = sig), size = 2, alpha = 0.5) +
    geom_hline(yintercept = 0, linetype = "dashed", colour = "grey40") +
    geom_text_repel(
      data          = auto_label,
      aes(label = gene, colour = sig),
      size          = 4,
      fontface      = "italic",
      label.size    = 0.2,
      box.padding   = 0.4,
      max.overlaps  = 20,
      show.legend   = FALSE
    ) +
    scale_colour_manual(
      values = colour_map,
      labels = c(
        "enriched_near" = "Enriched near plaque (padj < 0.05)",
        "enriched_far"  = "Enriched far from plaque (padj < 0.05)",
        "ns"            = "Not significant"
      )
    ) +
    labs(
      title  = title,
      x      = "Gene rank",
      y      = "Spearman rho (flipped)",
      colour = NULL
    ) +
    theme_minimal(base_size = 14) +
    theme(
      legend.position = "bottom",
      panel.grid.minor = element_blank(),
      panel.grid.major.x = element_blank())
}

Run analysis

Compute gene–distance correlations for three distance metrics:

dist_caa — distance to nearest CAA plaque
dist_paren — distance to nearest parenchymal plaque
dist_any — distance to nearest plaque of any type

Code

seurat_list <- list(slide1, slide2)

cor_caa <- compute_gene_distance_correlation(
  seurat_list, cell_distances_all, distance_col = "dist_caa")

cor_paren <- compute_gene_distance_correlation(
  seurat_list, cell_distances_all, distance_col = "dist_paren")

cor_any <- compute_gene_distance_correlation(
  seurat_list, cell_distances_all, distance_col = "dist_any")

# qs2::qs_save(cor_caa, "spatial/cor_caa.qs2")
# qs2::qs_save(cor_paren, "spatial/cor_paren.qs2")
# qs2::qs_save(cor_any, "spatial/cor_any.qs2")

Code

seurat_list <- list(slide1, slide2)

dist_adu <- cell_distances_all |> filter(Treatment.Group == "Adu")
dist_igg <- cell_distances_all |> filter(Treatment.Group == "IgG")

for (trt in c("adu", "igg")) {
  dist_trt <- if (trt == "adu") dist_adu else dist_igg
  for (dcol in c("dist_caa", "dist_paren", "dist_any")) {
    res    <- compute_gene_distance_correlation(seurat_list, dist_trt, distance_col = dcol)
    suffix <- sub("dist_", "", dcol)
    qs2::qs_save(res, paste0("spatial/cor_", suffix, "_", trt, ".qs2"))
  }
}

Rank plots

Code

cor_caa   <- qs2::qs_read("spatial/cor_caa.qs2")
cor_paren <- qs2::qs_read("spatial/cor_paren.qs2")
cor_any   <- qs2::qs_read("spatial/cor_any.qs2")

Three analyses are shown, each using a different distance metric: (1) distance to nearest CAA plaque only, (2) distance to nearest parenchymal plaque only, and (3) distance to nearest plaque of any type. Comparing across the three reveals whether certain genes are specifically associated with one plaque subtype or with amyloid deposits in general.

Code

plot_gene_distance_correlation(cor_caa, title = "Distance to nearest CAA plaque")

For each gene, Spearman’s rank correlation coefficient (ρ) was calculated between per-cell gene expression levels (raw count data) and the minimum distance to the nearest amyloid plaque. To facilitate interpretation, correlation coefficients were sign-inverted such that positive values indicate higher gene expression in cells located closer to plaques, whereas negative values indicate higher expression at increasing distances from plaques. Genes were ranked by the inverted correlation coefficient. Statistical significance was assessed using two-sided Spearman correlation tests followed by Benjamini–Hochberg false discovery rate correction. Genes were classified as plaque-enriched, plaque-distal, or not significant based on adjusted P values. The five most positive and five most negative correlations are labeled

Code

plot_gene_distance_correlation(cor_paren, title = "Distance to nearest parenchymal plaque")

Code

plot_gene_distance_correlation(cor_any, title = "Distance to nearest plaque (any type)")

Top genes

Code

show_top <- function(results, label) {
  top <- bind_rows(
    results |> filter(sig == "enriched_near") |> slice_min(rank, n = 10),
    results |> filter(sig == "enriched_far")  |> slice_max(rank, n = 10)
  ) |>
    select(gene, rho, rho_flip, pval, padj, rank, sig) |>
    mutate(across(c(rho, rho_flip), \(x) round(x, 4)),
           across(c(pval, padj), \(x) ifelse(x < 1e-300, "< 1e-300", formatC(x, format = "e", digits = 2))))

  top |>
    kbl(caption = label) |>
    kable_styling("striped", full_width = FALSE)
}

show_top(cor_caa,   "Top genes — distance to CAA")

Top genes — distance to CAA
gene	rho	rho_flip	pval	padj	rank	sig
Lamp5	-0.1404	0.1404	< 1e-300	< 1e-300	1	enriched_near
Stard8	-0.1242	0.1242	< 1e-300	< 1e-300	2	enriched_near
Slc17a7	-0.1184	0.1184	< 1e-300	< 1e-300	3	enriched_near
Cux2	-0.1165	0.1165	< 1e-300	< 1e-300	4	enriched_near
Bhlhe40	-0.1131	0.1131	< 1e-300	< 1e-300	5	enriched_near
Rspo1	-0.1028	0.1028	< 1e-300	< 1e-300	6	enriched_near
Pparg	-0.0981	0.0981	< 1e-300	< 1e-300	7	enriched_near
Serpinf1	-0.0850	0.0850	< 1e-300	< 1e-300	8	enriched_near
Egr1	-0.0817	0.0817	< 1e-300	< 1e-300	9	enriched_near
Lum	-0.0800	0.0800	< 1e-300	< 1e-300	10	enriched_near
Mbp	0.1356	-0.1356	< 1e-300	< 1e-300	459	enriched_far
Ermn	0.0896	-0.0896	< 1e-300	< 1e-300	458	enriched_far
Nts	0.0888	-0.0888	< 1e-300	< 1e-300	457	enriched_far
Prdx1	0.0855	-0.0855	< 1e-300	< 1e-300	456	enriched_far
Syt6	0.0844	-0.0844	< 1e-300	< 1e-300	455	enriched_far
Slc32a1	0.0837	-0.0837	< 1e-300	< 1e-300	454	enriched_far
Tac1	0.0789	-0.0789	< 1e-300	< 1e-300	453	enriched_far
B3galt5	0.0785	-0.0785	< 1e-300	< 1e-300	452	enriched_far
Mog	0.0770	-0.0770	< 1e-300	< 1e-300	451	enriched_far
Mag	0.0751	-0.0751	< 1e-300	< 1e-300	450	enriched_far

Code

show_top(cor_paren, "Top genes — distance to parenchymal")

Top genes — distance to parenchymal
gene	rho	rho_flip	pval	padj	rank	sig
Cst7	-0.2954	0.2954	< 1e-300	< 1e-300	1	enriched_near
Trem2	-0.2146	0.2146	< 1e-300	< 1e-300	2	enriched_near
Bcl2a1b	-0.2043	0.2043	< 1e-300	< 1e-300	3	enriched_near
Lyz2	-0.2000	0.2000	< 1e-300	< 1e-300	4	enriched_near
Cd68	-0.1913	0.1913	< 1e-300	< 1e-300	5	enriched_near
C1qa	-0.1874	0.1874	< 1e-300	< 1e-300	6	enriched_near
C1qc	-0.1839	0.1839	< 1e-300	< 1e-300	7	enriched_near
Ctsd	-0.1829	0.1829	< 1e-300	< 1e-300	8	enriched_near
Ly86	-0.1803	0.1803	< 1e-300	< 1e-300	9	enriched_near
Ccl3	-0.1779	0.1779	< 1e-300	< 1e-300	10	enriched_near
Psap	0.1504	-0.1504	< 1e-300	< 1e-300	459	enriched_far
Ids	0.1468	-0.1468	< 1e-300	< 1e-300	458	enriched_far
Calb1	0.1282	-0.1282	< 1e-300	< 1e-300	457	enriched_far
Gaa	0.1248	-0.1248	< 1e-300	< 1e-300	456	enriched_far
Srebf2	0.1237	-0.1237	< 1e-300	< 1e-300	455	enriched_far
Pygb	0.1233	-0.1233	< 1e-300	< 1e-300	454	enriched_far
Fdft1	0.1198	-0.1198	< 1e-300	< 1e-300	453	enriched_far
Smpd1	0.1154	-0.1154	< 1e-300	< 1e-300	452	enriched_far
Dnah11	0.1146	-0.1146	< 1e-300	< 1e-300	451	enriched_far
Ptges3	0.1128	-0.1128	< 1e-300	< 1e-300	450	enriched_far

Code

show_top(cor_any,   "Top genes — distance to any plaque")

Top genes — distance to any plaque
gene	rho	rho_flip	pval	padj	rank	sig
Cst7	-0.2949	0.2949	< 1e-300	< 1e-300	1	enriched_near
Lyz2	-0.2201	0.2201	< 1e-300	< 1e-300	2	enriched_near
Trem2	-0.2161	0.2161	< 1e-300	< 1e-300	3	enriched_near
Bcl2a1b	-0.2084	0.2084	< 1e-300	< 1e-300	4	enriched_near
Cd68	-0.1981	0.1981	< 1e-300	< 1e-300	5	enriched_near
C1qa	-0.1966	0.1966	< 1e-300	< 1e-300	6	enriched_near
C1qc	-0.1939	0.1939	< 1e-300	< 1e-300	7	enriched_near
Ctsd	-0.1870	0.1870	< 1e-300	< 1e-300	8	enriched_near
Ly86	-0.1831	0.1831	< 1e-300	< 1e-300	9	enriched_near
Ccl3	-0.1819	0.1819	< 1e-300	< 1e-300	10	enriched_near
Psap	0.1650	-0.1650	< 1e-300	< 1e-300	459	enriched_far
Ids	0.1580	-0.1580	< 1e-300	< 1e-300	458	enriched_far
Fdft1	0.1382	-0.1382	< 1e-300	< 1e-300	457	enriched_far
Srebf2	0.1334	-0.1334	< 1e-300	< 1e-300	456	enriched_far
Calb1	0.1297	-0.1297	< 1e-300	< 1e-300	455	enriched_far
Gaa	0.1291	-0.1291	< 1e-300	< 1e-300	454	enriched_far
Car12	0.1262	-0.1262	< 1e-300	< 1e-300	453	enriched_far
Chst1	0.1257	-0.1257	< 1e-300	< 1e-300	452	enriched_far
Smpd1	0.1252	-0.1252	< 1e-300	< 1e-300	451	enriched_far
Ptges3	0.1232	-0.1232	< 1e-300	< 1e-300	450	enriched_far

Rank plots by treatment

Code

cor_caa_adu   <- qs2::qs_read("spatial/cor_caa_adu.qs2")
cor_caa_igg   <- qs2::qs_read("spatial/cor_caa_igg.qs2")
cor_paren_adu <- qs2::qs_read("spatial/cor_paren_adu.qs2")
cor_paren_igg <- qs2::qs_read("spatial/cor_paren_igg.qs2")
cor_any_adu   <- qs2::qs_read("spatial/cor_any_adu.qs2")
cor_any_igg   <- qs2::qs_read("spatial/cor_any_igg.qs2")

Code

p1 <- plot_gene_distance_correlation(cor_caa_adu, title = "CAA — Aducanumab")
p2 <- plot_gene_distance_correlation(cor_caa_igg, title = "CAA — IgG")

p1 + p2

Code

pa <- plot_gene_distance_correlation(cor_paren_adu, title = "Parenchymal — Aducanumab")

pi <- plot_gene_distance_correlation(cor_paren_igg, title = "Parenchymal — IgG")

pa + pi

Code

a1 <- plot_gene_distance_correlation(cor_any_adu, title = "Any plaque — Aducanumab")

a2 <- plot_gene_distance_correlation(cor_any_igg, title = "Any plaque — IgG")

a1 + a2

Adu vs IgG scatter

Each point is a gene. X-axis = rho_flip in IgG; Y-axis = rho_flip in Aducanumab. Points are coloured by concordance of significance: both enriched near, both enriched far, discordant, or non-significant in both.

Code

plot_treatment_scatter <- function(cor_adu, cor_igg, title) {
  df <- inner_join(
    cor_adu |> select(gene, rho_flip_adu = rho_flip, sig_adu = sig),
    cor_igg |> select(gene, rho_flip_igg = rho_flip, sig_igg = sig),
    by = "gene"
  ) |>
    mutate(
      concordance = case_when(
        sig_adu == "enriched_near" & sig_igg == "enriched_near" ~ "both near",
        sig_adu == "enriched_far"  & sig_igg == "enriched_far"  ~ "both far",
        sig_adu != "ns"            & sig_igg != "ns"            ~ "discordant",
        sig_adu != "ns"            | sig_igg != "ns"            ~ "one sig",
        TRUE                                                     ~ "ns"
      )
    )

  colour_map <- c(
    "both near"  = "#E07B39",
    "both far"   = "#7B68AE",
    "discordant" = "#C0392B",
    "one sig"    = "#5DADE2",
    "ns"         = "grey80"
  )

  label_df <- df |> filter(concordance %in% c("both near", "both far", "discordant"))

  ggplot(df, aes(x = rho_flip_igg, y = rho_flip_adu)) +
    geom_point(aes(colour = concordance), size = 1.8, alpha = 0.6) +
    geom_abline(slope = 1, intercept = 0, linetype = "dashed", colour = "grey40") +
    geom_hline(yintercept = 0, colour = "grey70", linewidth = 0.3) +
    geom_vline(xintercept = 0, colour = "grey70", linewidth = 0.3) +
    geom_text_repel(
      data         = label_df,
      aes(label = gene, colour = concordance),
      size         = 3.5,
      fontface     = "italic",
      max.overlaps = 20,
      show.legend  = FALSE
    ) +
    scale_colour_manual(values = colour_map) +
    labs(
      title  = title,
      x      = "rho_flip — IgG",
      y      = "rho_flip — Aducanumab",
      colour = NULL
    ) +
    theme_minimal(base_size = 13) +
    theme(
      legend.position    = "bottom",
      panel.grid.minor   = element_blank(),
      panel.grid.major   = element_line(colour = "grey92")
    )
}

plot_treatment_scatter(cor_caa_adu,   cor_caa_igg,   "CAA — Adu vs IgG") |
plot_treatment_scatter(cor_paren_adu, cor_paren_igg, "Parenchymal — Adu vs IgG") |
plot_treatment_scatter(cor_any_adu,   cor_any_igg,   "Any plaque — Adu vs IgG")