Python處理Excel文件并與數(shù)據(jù)庫匹配做拼接
需求:Python處理Excel中數(shù)據(jù)并于數(shù)據(jù)庫交互匹配得到賬號信息等其他操作
Python實現(xiàn)
import os
import pandas as pd
import pymssql
import warnings
import time
def extract_broadband_speed(speed):if pd.notnull(speed) and 'M' in str(speed):return str(speed).split('M')[0] + 'M'else:return ''
def concatenate_with_dash(row):product_type = row.get('產(chǎn)品類型')workorder_type = row.get('工單類型')access_type = row.get('方式')broadband_speed = row.get('速率提取')if workorder_type in ['改', '其他']:if product_type == '寬帶':return f"{product_type}-{broadband_speed}-{access_type}-{workorder_type}"else:return f"{product_type}-{workorder_type}"elif product_type == '寬帶':return f"{product_type}-{broadband_speed}-{access_type}-{workorder_type}機"else:return f"{product_type}-{workorder_type}機"
def clear_data_in_excel_files(current_directory):files = [file for file in os.listdir(current_directory) if file.endswith('.xls') or file.endswith('.xlsx')]for file in files:file_path = os.path.join(current_directory, file) df = pd.read_excel(file_path) df = df.head(0) df.to_excel(file_path, index=False, header=True) print(f"成功清空文件: {file}")print("成功清空所有 Excel 文件的除第一行表頭外的數(shù)據(jù)")def main():start_time = time.time()print("程序開始時間:", time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(start_time)))warnings.filterwarnings('ignore') server = '127.0.0.1'database = 'YD'username = 'sa'password = 'xyz@1234560'conn = pymssql.connect(server, username, password, database)sql_query = '''SELECT 地市, 人員名稱, [賬號]FROM [ZHB]'''data = pd.read_sql(sql_query, conn)data.rename(columns={'人員名稱': '處理人'}, inplace=True)current_directory = os.getcwd()files = [file for file in os.listdir(current_directory) if file.endswith('.xls')]workorder_count = {}for file in files:file_path = os.path.join(current_directory, file) df0 = pd.read_excel(file_path) df0['速率提取'] = df0['速率'].apply(extract_broadband_speed)df0['用戶品質(zhì)-NEW'] = df0['速率提取'].apply(lambda x: '千兆' if x == '1000M' else '普通品質(zhì)')df0['產(chǎn)品工單類型合并'] = df0.apply(concatenate_with_dash, axis=1).str.replace('裝機', '新裝')df0['區(qū)域-修改'] = df0['區(qū)域'].fillna('城鎮(zhèn)').str.replace('城市', '城鎮(zhèn)').str.replace('鄉(xiāng)鎮(zhèn)', '城鎮(zhèn)')df0['是否沿街-修改'] = df0['沿街'].apply(lambda x: '是' if pd.notnull(x) else '否')df0['開始時間'] = df0['預(yù)約上門時間'].apply(lambda x: str(x).split(' ~ ')[0].strip() if isinstance(x, str) else '')df0['結(jié)束時間'] = df0['預(yù)約上門時間'].apply(lambda x: str(x).split(' ~ ')[-1].strip() if isinstance(x, str) else '')print(f"成功讀取文件: {file}")for workorder_type in df0['產(chǎn)品類型']:workorder_count[workorder_type] = workorder_count.get(workorder_type, 0) + 1merged_df = pd.merge(df0, data[['地市', '處理人', '賬號']], on=['地市', '處理人'], how='left')for idx, (product_type, group_data) in enumerate(merged_df.groupby('產(chǎn)品類型')):print(f"產(chǎn)品類型 {idx + 1}: {product_type}")filtered_data = merged_df[merged_df['產(chǎn)品類型'].isin(['ZW', 'TR'])]filtered_data.to_excel("源文件/ZW_TR數(shù)據(jù)合并.xlsx", index=False)print("成功將產(chǎn)品類型為 ZW_TR數(shù)據(jù)合并.xlsx")product_types = ['云', '門鈴', '喇叭', 'HM']hm_data = merged_df[merged_df['產(chǎn)品類型'].isin(product_types)]hm_data.to_excel("源文件/HM_數(shù)據(jù).xlsx", index=False)other_data = merged_df[~merged_df['產(chǎn)品類型'].isin(['ZW', 'TR', '云', '門鈴', '喇叭', 'HM'])]for product_type, group_data in other_data.groupby('產(chǎn)品類型'):file_name = f"源文件/{product_type}_數(shù)據(jù).xlsx"group_data.to_excel(file_name, index=False)print(f"成功將產(chǎn)品類型為 {product_type} 的數(shù)據(jù)導(dǎo)出到文件 {file_name}")print("成功將數(shù)據(jù)庫查詢結(jié)果匹配并拆分業(yè)務(wù)導(dǎo)出為Excel文件")target_folder = '數(shù)據(jù)庫字段/'clear_data_in_excel_files(target_folder)for file_name in os.listdir(target_folder):file_path = os.path.join(target_folder, file_name)if file_name.endswith('.xlsx'):source_file_path = os.path.join('源文件/', file_name)if os.path.isfile(source_file_path):df_source = pd.read_excel(source_file_path)df_target = pd.read_excel(file_path)for source_col, target_col in [('施工單編碼', '編碼'),('施工單編碼', 'boss號'),('產(chǎn)品工單類型合并', '工單標題'),('市', '市'),('縣', '縣'),('接入方式', '接入方式'),('受理時間', '受理時間'),('派單時間', '派單時間'),('歸檔時間', '歸檔時間'),('預(yù)約上門時間', '前臺預(yù)約時間'),('處理人', '施工人員'),('寬帶速率', '寬帶速率'),('寬帶套餐資費', '套餐信息'),('開始時間', '預(yù)約上門時間'),('區(qū)域-修改', '區(qū)域'),('是否沿街-修改', '沿街商鋪'),('用戶品質(zhì)-NEW', '品質(zhì)'),]:if source_col in df_source.columns and target_col in df_target.columns:df_target[target_col] = df_source[source_col]if 'ZW_TR數(shù)據(jù)合并.xlsx' in source_file_path:if 'ZW資費' in df_source.columns and '信息' in df_target.columns:df_target['信息'] = df_source['ZW資費']df_target.to_excel(file_path, index=False)print(f"成功將字段復(fù)制到文件 {file_path} 中")print("產(chǎn)品類型總數(shù):")for workorder_type, count in workorder_count.items():print(f"{workorder_type}: {count}")end_time = time.time()print("程序結(jié)束時間:", time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(end_time)))run_time = end_time - start_timeprint("程序運行耗時:%0.2f" % run_time, "s")input("按任意鍵退出程序")if __name__ == "__main__":main()