Импортируйте несколько файлов excel в python pandas и объедините их в один фрейм данных

Я хотел бы прочитать несколько файлов excel из каталога в pandas и объединить их в один большой фреймворк. Однако я не смог понять это. Мне нужна помощь в цикле for и построении конкатенированного фрейма данных: Вот что я до сих пор:

import sys
import csv
import glob
import pandas as pd

# get data file names
path =r'C:\DRO\DCL_rawdata_files\excelfiles'
filenames = glob.glob(path + "/*.xlsx")

dfs = []

for df in dfs: 
    xl_file = pd.ExcelFile(filenames)
    df=xl_file.parse('Sheet1')
    dfs.concat(df, ignore_index=True)

Ответ 1

Как упоминалось в комментариях, одна ошибка, которую вы делаете, состоит в том, что вы перебираете пустой список.

Вот как бы я это сделал, используя пример наличия пяти одинаковых файлов Excel, которые добавляются один за другим.

(1) Импорт:

import os
import pandas as pd

(2) Список файлов:

path = os.getcwd()
files = os.listdir(path)
files

Вывод:

['.DS_Store',
 '.ipynb_checkpoints',
 '.localized',
 'Screen Shot 2013-12-28 at 7.15.45 PM.png',
 'test1 2.xls',
 'test1 3.xls',
 'test1 4.xls',
 'test1 5.xls',
 'test1.xls',
 'Untitled0.ipynb',
 'Werewolf Modelling',
 '~$Random Numbers.xlsx']

(3) Выберите файлы "xls":

files_xls = [f for f in files if f[-3:] == 'xls']
files_xls

Вывод:

['test1 2.xls', 'test1 3.xls', 'test1 4.xls', 'test1 5.xls', 'test1.xls']

(4) Инициализировать пустой фреймворк:

df = pd.DataFrame()

(5) Перечислить список файлов для добавления в пустой фреймворк:

for f in files_xls:
    data = pd.read_excel(f, 'Sheet1')
    df = df.append(data)

(6) Наслаждайтесь новым фреймворком данных.: -)

df

Вывод:

  Result  Sample
0      a       1
1      b       2
2      c       3
3      d       4
4      e       5
5      f       6
6      g       7
7      h       8
8      i       9
9      j      10
0      a       1
1      b       2
2      c       3
3      d       4
4      e       5
5      f       6
6      g       7
7      h       8
8      i       9
9      j      10
0      a       1
1      b       2
2      c       3
3      d       4
4      e       5
5      f       6
6      g       7
7      h       8
8      i       9
9      j      10
0      a       1
1      b       2
2      c       3
3      d       4
4      e       5
5      f       6
6      g       7
7      h       8
8      i       9
9      j      10
0      a       1
1      b       2
2      c       3
3      d       4
4      e       5
5      f       6
6      g       7
7      h       8
8      i       9
9      j      10

Ответ 2

это работает с Python 2.x

находиться в каталоге, где находятся файлы Excel

см. http://pbpython.com/excel-file-combine.html

import numpy as np
import pandas as pd
import glob
all_data = pd.DataFrame()
for f in glob.glob("*.xlsx"):
    df = pd.read_excel(f)
    all_data = all_data.append(df,ignore_index=True)

# now save the data frame
writer = pd.ExcelWriter('output.xlsx')
all_data.to_excel(writer,'sheet1')
writer.save()

Ответ 3

import pandas as pd

import os

os.chdir('...')

#read first file for column names

fdf= pd.read_excel("first_file.xlsx", sheet_name="sheet_name")

#create counter to segregate the different file data

fdf["counter"]=1

nm= list(fdf)

c=2

#read first 1000 files

for i in os.listdir():

  print(c)

  if c<1001:

    if "xlsx" in i:

      df= pd.read_excel(i, sheet_name="sheet_name")

      df["counter"]=c

      if list(df)==nm:

        fdf=fdf.append(df)

        c+=1

      else:

        print("headers name not match")

    else:

      print("not xlsx")


fdf=fdf.reset_index(drop=True)

#relax