mustlinkMix Demonstration
Published:
A user-informed clustering tree algorithm for cell population identification in flow cytometry data.
Install mustlinkMix
.
remotes::install_github("UltanPDoherty/mustlinkMix")
Load and plot data from the healthyFlowData
package.
library(healthyFlowData)
data(hd)
hfd1 <- hd.flowSet[[1]]@exprs
GGally::ggpairs(hfd1, upper = list(continuous = "density"), progress = FALSE)
Prepare a plusminus table which describes three populations.
- CD4+ T Cells (CD4+CD8-CD3+CD19-)
- CD8+ T Cells (CD4-CD8+CD3+CD19-)
- B Cells (CD4-CD8-CD3-CD19+)
plusminus <- as.data.frame(rbind(
"CD4+_T" = c(+1, -1, +1, -1),
"CD8+_T" = c(-1, +1, +1, -1),
"B" = c(-1, -1, -1, +1)
))
colnames(plusminus) <- colnames(hfd1)
plusminus
## CD4 CD8 CD3 CD19
## CD4+_T 1 -1 1 -1
## CD8+_T -1 1 1 -1
## B -1 -1 -1 1
Use the gatetree
function from the gateTree
package.
hfd1_gatetree <- gateTree::gatetree(
hfd1,
plusminus,
min_scaled_bic_diff = 50,
min_depth = 10,
show_plot = c(FALSE, FALSE)
)
Plot the data, coloured according to the gateTree
labels.
GGally::ggpairs(hfd1,
progress = FALSE,
upper = list(continuous = "density"),
ggplot2::aes(colour = as.factor(1 + hfd1_gatetree$labels))
) +
ggokabeito::scale_colour_okabe_ito(order = c(9, 1, 2, 3)) +
ggokabeito::scale_fill_okabe_ito(order = c(9, 1, 2, 3))
Apply mustlink.
hfd1_mustlink <- mustlinkMix::mustlink(
hfd1,
clust_num = 5,
zone_matrix = hfd1_gatetree$subsetter,
zone_percent = 100,
init_seed = 123,
init_method = "mlkmpp"
)
## 11:14:12 E-Step Number: 1, Log-likelihood: -23719.61352
## 11:14:12 E-Step Number: 2, Log-likelihood: -22911.81572
## 11:14:12 E-Step Number: 3, Log-likelihood: -22502.68615
## 11:14:12 E-Step Number: 4, Log-likelihood: -22473.14504
## 11:14:12 E-Step Number: 5, Log-likelihood: -22470.79092
## 11:14:13 E-Step Number: 6, Log-likelihood: -22470.10189
## 11:14:13 E-Step Number: 7, Log-likelihood: -22469.97368
## 11:14:13 E-Step Number: 8, Log-likelihood: -22469.95122
## 11:14:13 E-Step Number: 9, Log-likelihood: -22469.94685
## 11:14:13 E-Step Number: 10, Log-likelihood: -22469.94594
## 11:14:13 E-Step Number: 11, Log-likelihood: -22469.94573
## 11:14:13 E-Step Number: 12, Log-likelihood: -22469.94569
## 11:14:13 E-Step Number: 13, Log-likelihood: -22469.94568
## 11:14:13 E-Step Number: 14, Log-likelihood: -22469.94567
## 11:14:13 E-Step Number: 15, Log-likelihood: -22469.94567
## ...EM converged at 2024-05-09 11:14:13.248376
Plot the data, coloured according to the mustlink labels.
GGally::ggpairs(hfd1,
progress = FALSE,
upper = list(continuous = "density"),
ggplot2::aes(colour = as.factor(hfd1_mustlink$clust_labels))
) +
ggokabeito::scale_colour_okabe_ito(order = c(1, 2, 3, 5, 6)) +
ggokabeito::scale_fill_okabe_ito(order = c(1, 2, 3, 5, 6))