Append data frames together in a for loop

R

R Problem Overview


I have a for loop which produces a data frame after each iteration. I want to append all data frames together but finding it difficult. Following is what I am trying, please suggest how to fix it:

d = NULL
for (i in 1:7) {

  # vector output
  model <- #some processing

  # add vector to a dataframe
  df <- data.frame(model)

}

df_total <- rbind(d,df)

R Solutions


Solution 1 - R

Don't do it inside the loop. Make a list, then combine them outside the loop.

datalist = list()

for (i in 1:5) {
    # ... make some data
    dat <- data.frame(x = rnorm(10), y = runif(10))
    dat$i <- i  # maybe you want to keep track of which iteration produced it?
    datalist[[i]] <- dat # add it to your list
}

big_data = do.call(rbind, datalist)
# or big_data <- dplyr::bind_rows(datalist)
# or big_data <- data.table::rbindlist(datalist)

This is a much more R-like way to do things. It can also be substantially faster, especially if you use dplyr::bind_rows or data.table::rbindlist for the final combining of data frames.

Solution 2 - R

You should try this:

df_total = data.frame()
for (i in 1:7){
    # vector output
    model <- #some processing

    # add vector to a dataframe
    df <- data.frame(model)
    df_total <- rbind(df_total,df)
}

Solution 3 - R

Again maRtin is correct but for this to work you have start with a dataframe that already has at least one column

model <- #some processing
df <- data.frame(col1=model)

for (i in 2:17)
{
     model <- # some processing
     nextcol <-  data.frame(model)
     colnames(nextcol) <- c(paste("col", i, sep="")) # rename the comlum
     df <- cbind(df, nextcol)
}
                           

Solution 4 - R

In the Coursera course, an Introduction to R Programming, this skill was tested. They gave all the students 332 separate csv files and asked them to programmatically combined several of the files to calculate the mean value of the pollutant.

This was my solution:

  # create your empty dataframe so you can append to it.
  combined_df <- data.frame(Date=as.Date(character()),
                    Sulfate=double(),
                    Nitrate=double(),
                    ID=integer())
  # for loop for the range of documents to combine
  for(i in min(id): max(id)) {
    # using sprintf to add on leading zeros as the file names had leading zeros
    read <- read.csv(paste(getwd(),"/",directory, "/",sprintf("%03d", i),".csv", sep=""))
    # in your loop, add the files that you read to the combined_df
    combined_df <- rbind(combined_df, read)
  }

Solution 5 - R

Try to use rbindlist approach over rbind as it's very, very fast.

Example:

library(data.table)

##### example 1: slow processing ######

table.1 <- data.frame(x = NA, y = NA)
time.taken <- 0
for( i in 1:100) {
  start.time = Sys.time()
  x <- rnorm(100)
  y <- x/2 +x/3
  z <- cbind.data.frame(x = x, y = y)
  
  table.1 <- rbind(table.1, z)
  end.time <- Sys.time()
  time.taken  <- (end.time - start.time) + time.taken
 
}
print(time.taken)
> Time difference of 0.1637917 secs

####example 2: faster processing #####

table.2 <- list()
t0 <- 0
for( i in 1:100) {
  s0 = Sys.time()
  x <- rnorm(100)
  y <- x/2 + x/3
  
  z <- cbind.data.frame(x = x, y = y)
  
  table.2[[i]] <- z
  
  e0 <- Sys.time()
  t0  <- (e0 - s0) + t0
  
}
s1 = Sys.time()
table.3 <- rbindlist(table.2)
e1 = Sys.time()

t1  <- (e1-s1) + t0
t1
> Time difference of 0.03064394 secs

Solution 6 - R

For me, it worked very simply. At first, I made an empty data.frame, then in each iteration I added one column to it. Here is my code:

df <- data.frame(modelForOneIteration)
for(i in 1:10){
  model <- # some processing
  df[,i] = model
}

Solution 7 - R

Here are some tidyverse and custom function options that might work depending on your needs:

library(tidyverse)

# custom function to generate, filter, and mutate the data:
combine_dfs <- function(i){
 data_frame(x = rnorm(5), y = runif(5)) %>% 
    filter(x < y) %>% 
    mutate(x_plus_y = x + y) %>% 
    mutate(i = i)
}

df <- 1:5 %>% map_df(~combine_dfs(.))
df <- map_df(1:5, ~combine_dfs(.)) # both give the same results
> df %>% head()
# A tibble: 6 x 4
       x      y x_plus_y     i
   <dbl>  <dbl>    <dbl> <int>
1 -0.973 0.673    -0.300     1
2 -0.553 0.0463   -0.507     1
3  0.250 0.716     0.967     2
4 -0.745 0.0640   -0.681     2
5 -0.736 0.228    -0.508     2
6 -0.365 0.496     0.131     3

You could do something similar if you had a directory of files that needed to be combined:

dir_path <- '/path/to/data/test_directory/'
list.files(dir_path)

combine_files <- function(path, file){
  read_csv(paste0(path, file)) %>% 
    filter(a < b) %>% 
    mutate(a_plus_b = a + b) %>% 
    mutate(file_name = file) 
}

df <- list.files(dir_path, '\\.csv$') %>% 
  map_df(~combine_files(dir_path, .))

# or if you have Excel files, using the readxl package:
combine_xl_files <- function(path, file){
  readxl::read_xlsx(paste0(path, file)) %>% 
    filter(a < b) %>% 
    mutate(a_plus_b = a + b) %>% 
    mutate(file_name = file) 
}

df <- list.files(dir_path, '\\.xlsx$') %>% 
  map_df(~combine_xl_files(dir_path, .))

Solution 8 - R

"""Produce Multiple DataFrames from the unique TFs via a Groupby"""

        i=0
        dfs_list=[]
        for i in range(i,len(df_CDL)):
            df = df_CDL[i]
            print(df,'Only 1 df_CDL')

            dfs= []
            for _, dataframe in df.groupby('TFs'):
                print('What is going on here?15',dataframe)
                dfs.append([dataframe])
            dfs_list.append([dfs])

#Index any dataframe you want or loop through them all. Whatever..

       print('Test?10', dfs[1], 'Test?20')
       print('What is going on here? 1', dfs_list[5], 'What is 
       going on here again? 2')

Solution 9 - R

x <- c(1:10) 

# empty data frame with variables ----

df <- data.frame(x1=character(),
                     y1=character())

for (i in x) {
  a1 <- c(x1 == paste0("The number is ",x[i]),y1 == paste0("This is another number ", x[i]))
  df <- rbind(df,a1)
}

names(df) <- c("st_column","nd_column")
View(df)

that might be a good way to do so....

Attributions

All content for this solution is sourced from the original question on Stackoverflow.

The content on this page is licensed under the Attribution-ShareAlike 4.0 International (CC BY-SA 4.0) license.

Content TypeOriginal AuthorOriginal Content on Stackoverflow
QuestionIbeView Question on Stackoverflow
Solution 1 - RGregor ThomasView Answer on Stackoverflow
Solution 2 - RmaRtinView Answer on Stackoverflow
Solution 3 - Rjwilley44View Answer on Stackoverflow
Solution 4 - RSimonView Answer on Stackoverflow
Solution 5 - RAyushi KachharaView Answer on Stackoverflow
Solution 6 - Rsaleh sereshkiView Answer on Stackoverflow
Solution 7 - RsbhaView Answer on Stackoverflow
Solution 8 - RDavis RogersView Answer on Stackoverflow
Solution 9 - RHaripada KoleyView Answer on Stackoverflow