setwd("C:/Users/Bobby/OneDrive/Documents/School/Information Management/Data/separated_data")
my_data <- read.csv("mcdonalds_ingredients_2.csv")
words <- strsplit(as.character(my_data$ingredient), " ")
new_data <- data.frame(mcd_unique_id = rep(my_data$mcd_unique_id, sapply(words, length)),
ingredient = unlist(words))
new_data <- new_data[nchar(new_data$ingredient) > 1, ]
remove_words <- c("soft,", "warm", "with", "the", "taste", "of", "maple.", "McDonald's",
"McGriddles", "recipe", "features", "fluffy", "folded", "and", "melty",
"have", "no", "artificial", "preservatives", "or", "colors", "from",
"sources.", "The", "430", "calories.", "Pick", "up", "on", "your",
"terms", "through", "drive", "thru", "or", "with", "curbside", "pickup",
"when","you", "Mobile", "Order", "App", "download", "registration",
"required.", "Our", "cakes")
new_data <- new_data[!new_data$ingredient %in% remove_words, ]
new_data <- new_data[!grepl("[^[:alnum:]\\s]", new_data$ingredient), ]
write.csv(new_data, "my_new_file.csv", row.names = FALSE)