visit
Merge multiple Excel files effortlessly with this Python code! 🚀 Using pandas
and os
, the script navigates through files in a specified folder, combining them into a neat merged_excel.xlsx
file.
# import packages
import pandas as pd
import os
# Define a function 'append' to merge Excel files in a specified path
def append(path):
# Create an empty list to store individual DataFrames
frames = []
for root, dirs, files in os.walk(path):
for file in files:
file_with_path = os.path.join(root, file)
df = pd.read_excel(file_with_path)
frames.append(df)
df = pd.concat(frames, axis=0)
return df
# path:The folder path where storage all the excel files
df = append(path)
df.to_excel("merged_excel.xlsx")
In this code snippet, we're using two powerful tools: pandas
and os
(a module for working with the operating system).
The append
function is the star here. It digs through all the Excel files in a specified folder ('path') and collects them into a DataFrame
, which is like a neat table for our data.
Now, for the magic moment: the last two lines! They use our append
function to merge all the Excel data in the specified folder into one consolidated file called merged_excel.xlsx.
import pandas as pd
import os
def append(path):
frames = [] # Create an empty list to store individual DataFrames
for root, dirs, files in os.walk(path):
for file in files:
file_with_path = os.path.join(root, file)
# Extract filename and date information from the file name
file_info = os.path.splitext(file)[0].split('_')
# Read each Excel file using pandas
df = pd.read_excel(file_with_path)
# Add filename and date columns to the DataFrame
df["filename"] = file_info[0] # Assuming filename is the first part
df["date"] = pd.to_datetime(file_info[1]) # Assuming date is the second part
# Append the DataFrame to the list
frames.append(df)
# Concatenate all DataFrames in the list along the rows
df = pd.concat(frames, axis=0)
return df
import pandas as pd
import os
def combine(path):
with pd.ExcelWriter("merged_excel.xlsx") as writer:
for root, dirs, files in os.walk(path):
for file in files:
filename = os.path.join(root, file)
df = pd.read_excel(filename)
# Delete the file name suffix, sometimes it could be xlsv/.xlsx
df.to_excel(writer, sheet_name=file.strip(
'.csv'))
return df