-
Matthew K Defenderfer authored4df26284
# This script is used to reproducibly clean the flora and pheno files
pheno <- read.csv("~/Google Drive/Grad School/GR 770 Statistics/R Labs/Data/Pheno.csv") %>% select(-starts_with("X"))
flora <- read.csv("~/Google Drive/Grad School/GR 770 Statistics/R Labs/Data/Flora.csv") %>% select(-starts_with("X"))
# Join into a single df adding columns together, assuming subjects were added in the same order
mb <- cbind(pheno,flora)
# Select columns of relative interest.
mb <- select(mb, AGE_YEARS, SEX, ALCOHOL_FREQUENCY,WEIGHT_KG,contains("k__"))
# All variables besides Microbe concentrations are factors with absurd levels. Need to fix this on an individual basis. Filter out rows with unreasonable responses
mb <- mb %>%
mutate(AGE_YEARS = as.numeric(levels(AGE_YEARS))[AGE_YEARS],
WEIGHT_KG = as.numeric(levels(WEIGHT_KG))[WEIGHT_KG]) %>%
filter(AGE_YEARS >= 10, between(WEIGHT_KG,20,250))
# Change Sex designations other than Female or Male to NA
mb$SEX[mb$SEX != c("female","male")] = "NA"
mb$SEX <- droplevels(mb$SEX)
# Remove bothersome tags from the column names
colnames(mb) <- mb %>% names() %>%
str_remove("k__Bacteria.p__") %>%
str_remove("k__Archaea.p__")
# Select specific microbes
mb <- mb %>%
select(AGE_YEARS, SEX,ALCOHOL_FREQUENCY,WEIGHT_KG, Firmicutes, Bacteroidetes, Actinobacteria, Verrucomicrobia, Proteobacteria)
# Change the levels for Alcohol Consumption. Make a short variable first, then paste it back in later
acf <- mb$ALCOHOL_FREQUENCY
acf[!str_detect(acf,c("Rare","Occa","Regu","Never","Daily"))] = "NA"
acf <- droplevels(acf)
acf <- data.frame(acf = acf)
acf <- acf %>%
mutate(acf = fct_recode(acf, "Rarely" = "Rarely (a few times/month)",
"Regularly" = "Regularly (3-5 times/week)",
"Occasionally" = "Occasionally (1-2 times/week)"))
mb$ALCOHOL_FREQUENCY = acf$acf
# Change column names
colnames(mb) <- c("Age","Sex","ACF","Weight.kg","Fir","Bacter","Actin","Verru","Proteo")
# filter out samples with 0 in any of the microbes
mb <- mb %>% filter_if(is.numeric, all_vars(. != 0))