library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
csv_files <- c("allrecipes_breakfastbrunch.csv", "allrecipes_dinner.csv", "allrecipes_lunch.csv")
dfs <- list()
for (csv_file in csv_files) {
df <- read.csv(csv_file, stringsAsFactors = FALSE)
df$scrape_date <- '4/14/2023'
category <- str_remove_all(csv_file, ".csv") %>% str_remove("allrecipes_")
df$category <- category
df$unique_id <- paste0(substr(category, 1, 2), sprintf("%03d", seq_along(df$recipe_name)))
df <- df[c("unique_id", names(df)[-which(names(df) == "unique_id")])]
dfs[[csv_file]] <- df
}
combined_df <- bind_rows(dfs)
write.csv(combined_df, file = "allrecipes_combined.csv", row.names = FALSE)