allrecipes_format.R

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(stringr)

# Define the list of CSV file names
csv_files <- c("allrecipes_breakfastbrunch.csv", "allrecipes_dinner.csv", "allrecipes_lunch.csv")

# Define an empty list to hold the data frames
dfs <- list()

# Loop through each file in the list
for (csv_file in csv_files) {
  # Read the CSV file into a data frame
  df <- read.csv(csv_file, stringsAsFactors = FALSE)
  
  # Add a new column called "scrape_date" and set it to a fixed date
  df$scrape_date <- '4/14/2023'
  
  # Add a new column called "category" and set it to the CSV file name minus "allrecipes_"
  category <- str_remove_all(csv_file, ".csv") %>% str_remove("allrecipes_")
  df$category <- category
  
  df$unique_id <- paste0(substr(category, 1, 2), sprintf("%03d", seq_along(df$recipe_name)))
  
  # Reorder the columns to make "unique_id" the first column
  df <- df[c("unique_id", names(df)[-which(names(df) == "unique_id")])]
  
  # Add the modified data frame to the list
  dfs[[csv_file]] <- df
}

# Combine all data frames into one
combined_df <- bind_rows(dfs)

# Write the combined data frame to a new CSV file
write.csv(combined_df, file = "allrecipes_combined.csv", row.names = FALSE)

allrecipes_format.R

Bobby

2023-05-03