setwd("C:/Users/Bobby/OneDrive/Documents/School/Information Management/Data/separated_data")

# Read in the CSV file
my_data <- read.csv("mcdonalds_ingredients_2.csv")

# Split the ingredient column into individual words
words <- strsplit(as.character(my_data$ingredient), " ")

# Combine the words with the unique IDs into a new data frame
new_data <- data.frame(mcd_unique_id = rep(my_data$mcd_unique_id, sapply(words, length)),
                       ingredient = unlist(words))

# Remove any rows that only contain a single word
new_data <- new_data[nchar(new_data$ingredient) > 1, ]

# Remove rows containing specified words
remove_words <- c("soft,", "warm", "with", "the", "taste", "of", "maple.", "McDonald's",
                  "McGriddles", "recipe", "features", "fluffy", "folded", "and", "melty",
                  "have", "no", "artificial", "preservatives", "or", "colors", "from",
                  "sources.", "The", "430", "calories.", "Pick", "up", "on", "your",
                  "terms", "through", "drive", "thru", "or", "with", "curbside", "pickup",
                  "when","you", "Mobile", "Order", "App", "download", "registration",
                  "required.", "Our", "cakes")
new_data <- new_data[!new_data$ingredient %in% remove_words, ]

# Remove rows with numbers or special characters
new_data <- new_data[!grepl("[^[:alnum:]\\s]", new_data$ingredient), ]

# Export the new data frame to a CSV file
write.csv(new_data, "my_new_file.csv", row.names = FALSE)