Code
# BiocManager::install('Banksy')
# remotes::install_github('satijalab/seurat-wrappers')
source("R/setup.R")
library(Banksy)
library(SeuratWrappers)
library(patchwork)
library(Polychrome)BANSKY segmentation (lambda = 0.8, SCT normalized data)
April 3, 2026
BANKSY (Building Aggregates with a Neighborhood Kernel and Spatial Yardstick) augments each cell’s own transcriptome with the weighted mean expression of its spatial neighbours. At lambda = 0.8, neighbourhood context dominates, grouping cells into contiguous tissue domains rather than transcriptomic cell types.
Reference: Singhal et al., Nature Genetics, 2024
aria is the full merged Seurat object (6 brains, 358 399 cells total) after Chloe correction. Assay SCT_SPLIT contains SPLIT-denoised, SCTransformed counts (474 features) for 346 928 cells that passed the SPLIT quality filter. BANKSY is run on this filtered cell set. Spatial coordinates are in metadata columns x and y.
BANKSY constructs an augmented expression matrix by concatenating:
The mixing parameter lambda = 0.8 places 80 % weight on the neighbourhood context, activating tissue domain segmentation mode. Cells are embedded in this augmented space, dimensionality-reduced with PCA, and clustered with Leiden.
The 6 brains are spread across two slides. Within each slide, the 3 brains are placed at different physical positions so their x/y coordinates are already non-overlapping. Between slides, however, each slide has its own coordinate system starting from (0, 0), so slide 1 and slide 2 coordinates do overlap. We therefore set group = "Slide" to stagger coordinates between slides only, preserving the real within-slide spatial layout. split.scale = TRUE applies within-slide z-scaling before concatenation, accounting for minor technical differences between imaging runs.
Important: do not call
ScaleData()on the resulting BANKSY assay.RunBanksyalready populatesscale.datawith the lambda-weighted features. Re-scaling would erase the neighbourhood weighting.
# Load full object
aria <- qs2::qs_read(paste0(seurat_path, "20260320_fullobj_correctedChloe.qs2"))
# Load cleaned microglia object
microglia <- qs2::qs_read(paste0(seurat_path, "20260323_updated_mg_kai_correctedChloe.qs2"))
# Keep only the microglia subtype labels from the metadata of the cleaned microglia-only object.
mg_subcluster <- microglia@meta.data |>
select(sub.cluster)
DefaultAssay(aria) <- "SCT_SPLIT"
## Subset cells in the SCT_SPLIT assay of the full object
# SCT_SPLIT exists only for SPLIT-passing cells.
# Why: BANKSY uses assay matrices directly; restricting to Cells(SCT_SPLIT) prevents
# downstream failures from trying to run spatial modeling on cells without SCT_SPLIT data.
aria_sct_split <- subset(aria, cells = Cells(aria[["SCT_SPLIT"]]))
# Transfer subtype labels from cleaned microglia object into full object.
aria_sct_split$microglia_sub.cluster <- mg_subcluster[colnames(aria_sct_split), "sub.cluster"]
aria_sct_split$annotatedclusters_finer <- if_else(is.na(aria_sct_split$microglia_sub.cluster), aria_sct_split$annotatedclusters, aria_sct_split$microglia_sub.cluster)
# create new variable "annotatedclusters_finer" in the metadata including major cell type and microglia subtype
aria_sct_split$annotatedclusters_finer <- ifelse(
is.na(aria_sct_split$microglia_sub.cluster),
as.character(aria_sct_split$annotatedclusters),
as.character(aria_sct_split$microglia_sub.cluster))
# Cells not present in cleaned microglia return "Microglia" in "annotatedclusters_finer".
# This is expected because 11,796 microglia were removed during microglia QC cleaning.
# Drop cells still labeled exactly "Microglia" (i.e., no refined subtype assigned).
aria_sct_split <-subset(aria_sct_split, subset = annotatedclusters_finer != "Microglia")
# qs2::qs_save(aria_sct_split, paste0(seurat_path, "20260402_fullobj_correctedChloe_sct_SPLIT_microglia_cleaned.qs2"))
aria_sct_split <- RunBanksy(
aria_sct_split,
lambda = 0.8,
assay = "SCT_SPLIT",
slot = "data", # SCT-normalised expression
features = "all", # 474 variable features from SCT_SPLIT
k_geom = 15, # spatial neighbours per cell
dimx = "x", # x coordinate in metadata
dimy = "y", # y coordinate in metadata
use_agf = TRUE, # use AGF (false by default)
group = "Slide", # stagger coordinates between slides (overlap between slides, not within)
split.scale = TRUE) # within-slide z-scaling)
# Active assay is now BANKSY; do NOT call ScaleData
aria_sct_split <- RunPCA(aria_sct_split,
assay = "BANKSY",
features = rownames(aria_sct_split), # all BANKSY features
npcs = 50)
aria_sct_split <- RunUMAP(aria_sct_split, dims = 1:30, seed.use = 42,
reduction.name = "umap_banksy", reduction.key = "BKSYUMAP_")
aria_sct_split <- FindNeighbors(aria_sct_split, dims = 1:30)
aria_sct_split <- FindClusters(aria_sct_split, resolution = 0.5, random.seed = 42)
# Store BANKSY domains in an explicit column (avoids confusion with pre-existing banksy_domain)
aria_sct_split$banksy_domain <- aria_sct_split$BANKSY_snn_res.0.5
Idents(aria_sct_split) <- "banksy_domain"
qs2::qs_save(aria_sct_split, paste0(seurat_path, "20260402_fullobj_correctedChloe_sct_SPLIT_microglia_cleaned.qs2_banksy_agf=TRUE_sct.qs2"))Domains plotted in tissue space using the x/y metadata coordinates, one panel per brain. The staggered-coordinate overview (all brains side-by-side) uses staggered_x/staggered_y written by RunBanksy.
meta <- aria_sct_split@meta.data |> as_tibble(rownames = "cell")
# Colour palette: one colour per domain
n_domains <- length(unique(aria_sct_split$banksy_domain))
domain_cols <- kelly.colors(n_domains) |> setNames(sort(unique(aria_sct_split$banksy_domain)))
sample_ids <- sort(unique(meta$sample_ID))
plots <- map2(sample_ids, seq_along(sample_ids), \(sid, i) {
df <- meta |> filter(sample_ID == sid)
ggplot(df, aes(x = x, y = y, colour = banksy_domain)) +
geom_point(size = 0.2, alpha = 0.7, show.legend = (i == 1)) +
scale_colour_manual(
values = domain_cols,
limits = names(domain_cols),
drop = FALSE
) +
coord_equal() +
scale_y_reverse() +
labs(title = sid, colour = "Domain") +
theme_void(base_size = 10) +
theme(legend.position = if (i == 1) "left" else "none")
})
wrap_plots(plots, nrow = 2) +
plot_annotation(title = "BANKSY tissue domains per brain") &
guides(colour = guide_legend(override.aes = list(size = 4)))domains <- sort(unique(aria_sct_split$banksy_domain))
bg <- meta |> slice_sample(prop = 0.01) # 1 % of cells for grey background
domain_plots <- map(domains, \(d) {
fg <- meta |> filter(banksy_domain == d)|> slice_sample(prop = 0.02)
ggplot(mapping = aes(x = staggered_sdimx, y = staggered_sdimy)) +
geom_point(data = bg, colour = "grey90", size = 0.05, alpha = 0.5) +
geom_point(data = fg, colour = domain_cols[[as.character(d)]], size = 1e-300, alpha = 0.6) +
coord_equal() +
labs(title = paste("Domain", d)) +
theme_void(base_size = 10) +
theme(legend.position = "none",
panel.border = element_rect(colour = "grey70", fill = NA, linewidth = 0.5))
})
wrap_plots(domain_plots, ncol = 3)Each dot represents one cell type (Y) in one BANKSY domain (X).
Dot size encodes the percentage of cells in that domain belonging to that cell type. Dot colour encodes the treatment balance for that specific cell type within that domain: the fraction of cells originating from Aducanumab-treated brains (out of the 3 Adu + 3 IgG brains). A white dot means the cell type is equally contributed by both treatment groups (~50 % Adu); red means it is disproportionately from Adu brains; blue means it is disproportionately from IgG brains.
Dots are grey when the domain × cell-type combination contains fewer than 50 cells in total, as the Adu fraction would be unreliable at that sample size.
Cell type × domain combinations representing less than 1 % of cells in that domain are not shown.
ct_frac <- meta |>
count(banksy_domain, annotatedclusters_finer) |>
group_by(banksy_domain) |>
mutate(pct = n / sum(n) * 100) |>
ungroup()
adu_frac <- meta |>
count(banksy_domain, annotatedclusters_finer, Treatment.Group) |>
group_by(banksy_domain, annotatedclusters_finer) |>
mutate(total = sum(n), trt_frac = n / total) |>
ungroup() |>
filter(Treatment.Group == "Adu", total >= 50) |>
select(banksy_domain, annotatedclusters_finer, adu_frac = trt_frac)
dot_df <- ct_frac |>
left_join(adu_frac, by = c("banksy_domain", "annotatedclusters_finer"))
ggplot(dot_df |> filter(pct >= 1), aes(x = banksy_domain, y = annotatedclusters_finer,
size = pct, colour = adu_frac)) +
geom_point(colour = "grey20") +
geom_point() +
scale_size_area(max_size = 10, limits = c(0, 100), name = "% cells") +
scale_colour_gradient2(low = "#2166AC", mid = "white", high = "#B2182B",
midpoint = 0.5, limits = c(0, 1), na.value = "grey80",
name = "Adu fraction") +
labs(title = "Cell-type composition per BANKSY domain",
subtitle = "Dot size = % of cells in domain\ncolour = fraction Aducanumab (red) vs IgG (blue)",
x = "Domain", y = NULL) +
theme_minimal(base_size = 13) +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
panel.grid.major = element_line(colour = "grey92"))Note that colour can vary between cell types within the same domain: each cell type has its own Adu/IgG balance depending on how evenly it is distributed across the six brains. A cell type that is rare or spatially patchy may happen to be better represented in one treatment group simply because of sampling, whereas an abundant and ubiquitous cell type will reliably track close to 0.5 in every domain. Consistent red or blue across an entire domain (column) would indicate a global treatment imbalance in that tissue region; isolated red or blue dots within an otherwise white column point to cell-type-specific enrichment.
DefaultAssay(aria_sct_split) <- "SCT_SPLIT"
Idents(aria_sct_split) <- "banksy_domain" # explicit BANKSY domains, not seurat_clusters
markers <- FindAllMarkers(
aria_sct_split,
assay = "SCT_SPLIT",
only.pos = TRUE,
min.pct = 0.1,
logfc.threshold = 0.25,
test.use = "wilcox")
write_csv(markers, "spatial/banksy_domain_markers.csv")library(kableExtra)
top_markers <- markers |>
group_by(cluster) |>
slice_min(p_val_adj, n = 5) |>
ungroup() |>
dplyr::select(cluster, gene, avg_log2FC, pct.1, pct.2, p_val_adj)
top_markers |>
group_by(cluster) |>
slice_max(avg_log2FC, n = 5) |>
kbl(caption = "Top 5 marker genes per BANKSY domain") |>
kable_styling("striped", full_width = FALSE)| cluster | gene | avg_log2FC | pct.1 | pct.2 | p_val_adj |
|---|---|---|---|---|---|
| 0 | Fezf2 | 3.8318251 | 0.459 | 0.060 | 0 |
| 0 | Syt6 | 2.4477337 | 0.193 | 0.039 | 0 |
| 0 | Lhx6 | 2.1133001 | 0.123 | 0.037 | 0 |
| 0 | Slc17a7 | 1.8987641 | 0.963 | 0.602 | 0 |
| 0 | Cnr1 | 1.7988899 | 0.650 | 0.263 | 0 |
| 1 | Lamp5 | 3.6664012 | 0.901 | 0.209 | 0 |
| 1 | Pparg | 3.6561838 | 0.307 | 0.026 | 0 |
| 1 | Rspo1 | 3.5497287 | 0.312 | 0.038 | 0 |
| 1 | Tcap | 3.4218902 | 0.152 | 0.016 | 0 |
| 1 | Cux2 | 2.9136563 | 0.716 | 0.153 | 0 |
| 2 | Calb2 | 4.1347960 | 0.495 | 0.108 | 0 |
| 2 | Slc17a6 | 4.0700749 | 0.691 | 0.185 | 0 |
| 2 | Nts | 3.3298266 | 0.142 | 0.029 | 0 |
| 2 | Htr2c | 2.7275478 | 0.576 | 0.144 | 0 |
| 2 | Tox2 | 2.4873620 | 0.532 | 0.110 | 0 |
| 3 | Mog | 2.7682941 | 0.933 | 0.191 | 0 |
| 3 | Cldn11 | 2.6956733 | 0.979 | 0.289 | 0 |
| 3 | Ermn | 2.6730914 | 0.946 | 0.240 | 0 |
| 3 | Mag | 2.5602917 | 0.765 | 0.160 | 0 |
| 3 | Aspa | 2.5038626 | 0.762 | 0.172 | 0 |
| 4 | Aspa | 2.8782954 | 0.917 | 0.161 | 0 |
| 4 | Mag | 2.8025234 | 0.911 | 0.150 | 0 |
| 4 | Mog | 2.7148775 | 0.979 | 0.189 | 0 |
| 4 | Ermn | 2.7010148 | 0.985 | 0.238 | 0 |
| 4 | Ptgds | 2.5761414 | 0.928 | 0.299 | 0 |
| 5 | Tagln | 6.1144764 | 0.253 | 0.016 | 0 |
| 5 | Lum | 5.2463410 | 0.251 | 0.013 | 0 |
| 5 | Mrc1 | 5.2199345 | 0.187 | 0.010 | 0 |
| 5 | Col1a2 | 5.1997191 | 0.436 | 0.042 | 0 |
| 5 | Serpinf1 | 4.5028792 | 0.341 | 0.034 | 0 |
| 6 | Agt | 4.8736976 | 0.773 | 0.091 | 0 |
| 6 | Slc7a10 | 3.2771281 | 0.613 | 0.094 | 0 |
| 6 | Apoc1 | 3.1848933 | 0.132 | 0.016 | 0 |
| 6 | Aqp4 | 3.0879737 | 0.931 | 0.245 | 0 |
| 6 | Gja1 | 2.8605856 | 0.888 | 0.243 | 0 |
| 7 | Siglech | 4.5392728 | 0.617 | 0.034 | 0 |
| 7 | P2ry12 | 4.4618150 | 0.946 | 0.120 | 0 |
| 7 | Selplg | 4.4556955 | 0.612 | 0.035 | 0 |
| 7 | Gpr34 | 4.4386345 | 0.901 | 0.074 | 0 |
| 7 | Nlrp3 | 4.3347817 | 0.234 | 0.012 | 0 |
| 8 | Plpp3 | 3.4519005 | 0.786 | 0.114 | 0 |
| 8 | Hsd11b1 | 3.2401633 | 0.370 | 0.050 | 0 |
| 8 | Slc7a10 | 3.2263785 | 0.730 | 0.092 | 0 |
| 8 | Htra1 | 3.1352654 | 0.925 | 0.388 | 0 |
| 8 | Gja1 | 3.0440150 | 0.906 | 0.248 | 0 |
| 9 | Cxcl12 | 4.4010489 | 0.658 | 0.056 | 0 |
| 9 | Emcn | 4.3855551 | 0.689 | 0.043 | 0 |
| 9 | Cldn5 | 4.2499565 | 0.896 | 0.098 | 0 |
| 9 | Flt1 | 4.1730322 | 0.973 | 0.142 | 0 |
| 9 | Clec2d | 4.0956329 | 0.714 | 0.064 | 0 |
| 10 | Cldn11 | 2.5617916 | 0.992 | 0.315 | 0 |
| 10 | Mag | 2.5557706 | 0.929 | 0.177 | 0 |
| 10 | Mog | 2.3823084 | 0.975 | 0.218 | 0 |
| 10 | Aspa | 2.3659284 | 0.909 | 0.189 | 0 |
| 10 | Ermn | 2.3492393 | 0.975 | 0.266 | 0 |
| 11 | Lypd1 | 3.7987208 | 0.601 | 0.164 | 0 |
| 11 | Otof | 2.7064330 | 0.436 | 0.074 | 0 |
| 11 | Car12 | 2.2521226 | 0.397 | 0.082 | 0 |
| 11 | Slc17a7 | 1.8828550 | 0.977 | 0.635 | 0 |
| 11 | Grin2b | 1.6646557 | 0.942 | 0.427 | 0 |
| 12 | Cd14 | 3.8850346 | 0.733 | 0.074 | 0 |
| 12 | Bcl2a1b | 3.8290491 | 0.903 | 0.121 | 0 |
| 12 | Itgax | 3.6991997 | 0.392 | 0.044 | 0 |
| 12 | Trem2 | 3.6396776 | 0.983 | 0.190 | 0 |
| 12 | Cx3cr1 | 3.6077357 | 0.980 | 0.176 | 0 |
| 13 | Kcnj8 | 4.2652891 | 0.209 | 0.016 | 0 |
| 13 | Cldn5 | 4.0356056 | 0.888 | 0.106 | 0 |
| 13 | Emcn | 4.0003783 | 0.625 | 0.051 | 0 |
| 13 | Atp13a5 | 3.9099255 | 0.294 | 0.062 | 0 |
| 13 | Flt1 | 3.8696781 | 0.963 | 0.151 | 0 |
| 14 | Ido1 | 5.3846594 | 0.177 | 0.005 | 0 |
| 14 | Otof | 3.7392012 | 0.442 | 0.075 | 0 |
| 14 | Tac1 | 3.6558283 | 0.397 | 0.069 | 0 |
| 14 | Adcy5 | 2.9797052 | 0.895 | 0.405 | 0 |
| 14 | Syt6 | 2.2794430 | 0.225 | 0.053 | 0 |
| 15 | Tdo2 | 4.9739448 | 0.304 | 0.017 | 0 |
| 15 | Dgat2 | 2.9570426 | 0.910 | 0.426 | 0 |
| 15 | Grin2a | 2.5603390 | 0.899 | 0.406 | 0 |
| 15 | Jun | 2.5155856 | 0.559 | 0.111 | 0 |
| 15 | Car12 | 2.3708464 | 0.688 | 0.077 | 0 |
| 16 | Pou3f1 | 3.0380656 | 0.373 | 0.051 | 0 |
| 16 | Grin2a | 2.7089430 | 0.955 | 0.406 | 0 |
| 16 | Grin2b | 2.2477023 | 0.945 | 0.431 | 0 |
| 16 | Slc17a7 | 2.0241629 | 0.973 | 0.638 | 0 |
| 16 | Chst1 | 1.9649632 | 0.963 | 0.627 | 0 |
| 17 | Gck | 4.8746752 | 0.194 | 0.007 | 0 |
| 17 | Sst | 3.8386219 | 0.215 | 0.032 | 0 |
| 17 | Nts | 3.7325257 | 0.226 | 0.035 | 0 |
| 17 | Tac1 | 3.2342157 | 0.473 | 0.070 | 0 |
| 17 | Lhx6 | 2.7354747 | 0.192 | 0.045 | 0 |
| 18 | Ccdc153 | 9.6766644 | 0.543 | 0.005 | 0 |
| 18 | Tmem212 | 9.1276853 | 0.508 | 0.005 | 0 |
| 18 | Dnah11 | 7.2236117 | 0.539 | 0.011 | 0 |
| 18 | Plin5 | 6.6961155 | 0.219 | 0.003 | 0 |
| 18 | C3 | 4.4251947 | 0.288 | 0.025 | 0 |
| 19 | Cxcl10 | 4.4873379 | 0.187 | 0.020 | 0 |
| 19 | Ifit1 | 1.7999332 | 0.176 | 0.065 | 0 |
| 19 | Ifit2 | 1.6803218 | 0.291 | 0.151 | 0 |
| 19 | Cst7 | 1.0784452 | 0.335 | 0.172 | 0 |
| 19 | Mbp | 0.3814115 | 0.960 | 0.897 | 0 |
| 20 | Car12 | 6.0923035 | 0.903 | 0.085 | 0 |
| 20 | Ttr | 4.5807876 | 0.894 | 0.131 | 0 |
| 20 | Ltc4s | 4.3792535 | 0.733 | 0.062 | 0 |
| 20 | Adh1 | 4.2638698 | 0.128 | 0.008 | 0 |
| 20 | Tgfbi | 3.8366659 | 0.410 | 0.087 | 0 |
| 21 | Syt6 | 4.8129215 | 0.576 | 0.057 | 0 |
| 21 | Sncg | 3.7196854 | 0.413 | 0.049 | 0 |
| 21 | Calb2 | 3.4191845 | 0.543 | 0.139 | 0 |
| 21 | Slc17a6 | 2.0037688 | 0.821 | 0.226 | 0 |
| 21 | Acly | 1.8469325 | 0.976 | 0.827 | 0 |