Main review topics (MeSH terms)
Each records usually has multiple associated MeSH terms, which are separated with a semicolon (or a semicolon with a newline symbol).
The main topics of the article are denoted by the MeSH terms with asterisks *, but some records will not have main topics, so its safer to ignore this information.
Some MeSH headings also have qualifiers (sometimes called subheadings) - these are shown after a forward-slash and are used to “describe the specific aspects of the MeSH heading that are pertinent to the article” (https://www.nlm.nih.gov/bsd/indexing/training/SUB_010.html). MeSH terms and qualifiers could be analysed separately.
# change to long format (one species per row - one or multple rows per study):
mdata %>% select(MeSH_terms) %>% separate_rows(MeSH_terms, sep = ";\\\n") %>% separate_rows(MeSH_terms, sep = "; ") -> t_MESH
#bring to lower case and remove ending and trailing whitespace from character strings, single character vector:
tmesh <- str_to_lower(trimws(t_MESH$MeSH_terms))
#remove star character:
tmesh <- sub("[\\*]", "", tmesh)
#separate terms before and after "/" into separate columns (headings and quantifiers, respectively)
tmesh2 <- str_split_fixed(tmesh, " / ", n = 2)
#create a MeSH dataframe with three columns: terms, headings, qualifiers
t_MESH2 <- bind_cols(tmesh, tmesh2[,1], tmesh2[,2])
colnames(t_MESH2) <- c("MeSH_terms", "MeSH_headings", "MeSH_qualifiers")
#count frequencies of headings
t_MESH2 %>% select(MeSH_headings) %>% count(MeSH_headings) %>% arrange(desc(n)) %>%
filter(MeSH_headings != "NA") -> MeSH_headings_counts
#str(MeSH_headings_counts)
MeSH_headings_counts$MeSH_headings <- factor(MeSH_headings_counts$MeSH_headings, levels = MeSH_headings_counts$MeSH_headings[order(MeSH_headings_counts$n, decreasing = FALSE)])
MESH_headings_order <- MeSH_headings_counts$MeSH_headings[order(MeSH_headings_counts$n, decreasing = FALSE)] #save for the next plot
# simple barplot with top 10 MESH headings
ggplot(MeSH_headings_counts[1:10, ], aes(x = MeSH_headings, y = n)) +
geom_col(aes(fill = ""), width = 0.7) +
theme_light() +
labs(title = expression("Top 10 MeSH headings reviewed")) + #~bold(A.)~' Type and subject'
coord_flip() +
scale_y_continuous(name = "Article count") +
scale_fill_manual(values = c("#919191")) +
theme(legend.position = "none", axis.title.x = element_text(size = 10), axis.title.y = element_blank())
#ggsave(here("plots","figure_MeSH_headings.pdf"), width = 4, height = 6, units = "cm", scale = 2, device = cairo_pdf)
#count frequencies of qualifiers
t_MESH2 %>% select(MeSH_qualifiers) %>% count(MeSH_qualifiers) %>% arrange(desc(n)) %>% filter(MeSH_qualifiers != "NA") %>% filter(MeSH_qualifiers != "")-> MeSH_qualifiers_counts
#str(MeSH_qualifiers_counts)
MeSH_qualifiers_counts$MeSH_qualifiers <- factor(MeSH_qualifiers_counts$MeSH_qualifiers, levels = MeSH_qualifiers_counts$MeSH_qualifiers[order(MeSH_qualifiers_counts$n, decreasing = FALSE)])
MESH_qualifiers_order <- MeSH_qualifiers_counts$MeSH_qualifiers[order(MeSH_qualifiers_counts$n, decreasing = FALSE)] #save for the next plot
# simple barplot with MESH qualifiers
ggplot(MeSH_qualifiers_counts[, ], aes(x = MeSH_qualifiers, y = n)) +
geom_col(aes(fill = ""), width = 0.7) +
theme_light() +
labs(title = expression("MeSH qualifiers reviewed")) + #~bold(A.)~' Type and subject'
coord_flip() +
scale_y_continuous(name = "Article count") +
scale_fill_manual(values = c("#919191")) +
theme(legend.position = "none", axis.title.x = element_text(size = 10), axis.title.y = element_blank())
#ggsave(here("plots","figure_MeSH_qualifiers.pdf"), width = 4, height = 6, units = "cm", scale = 2, device = cairo_pdf)