【python】自动统计考勤数据

2023-12-31 04:38| 来源: 网络整理| 查看: 265

如果没有钉钉这种打卡软件可以帮你统计考勤数据，而且考勤机导出的数据只有员工打卡时间时，可以利用python编写一个自动统计的脚本，加快办公效率

实现的几个功能：

1、按月统计出勤，早退，迟到等常见指标

2、对源数据进行了清洗加工，尤其对一天内重复打卡的情况进行处理

3、抽取出异常记录（忘记打卡），便于排查异常原因

附带完整源码~

# -*- coding: utf-8 -*- """ Created on Wed Aug 25 16:10:46 2021 @author: Administrator """ import pandas as pd import datetime import os import time # %% def get_week_day(date): week_day = { 0: '一', 1: '二', 2: '三', 3: '四', 4: '五', 5: '六', 6: '日' } day = date.weekday() return week_day[day] def get_monringORafternoon(date): current_time = date.hour if current_time '09:00:00') def leaveEarly(df): return (df['AMPM'] == '下午') & (df['Time'].apply(lambda x: datetime.time.strftime(x, "%H:%M:%S")) < '18:00:00') def severelyLate(df): return (df['AMPM'] == '上午') & (df['Time'].apply(lambda x: datetime.time.strftime(x, "%H:%M:%S")) > '09:10:00') def workOnSunday(df): return (df['week_day'] == '日') def eveningWork(df): return (df['AMPM'] == '下午') & (df['Time'].apply(lambda x: datetime.time.strftime(x, "%H:%M:%S")) >= '20:00:00') def weekends_between(d1,d2): days_between = (d2-d1).days weekends, leftover = divmod(days_between,7) if leftover: start_day = (d2-datetime.timedelta(leftover)).isoweekday() end_day = start_day+leftover # if start_day6: # weekends +=.5 # if start_day7: # weekends +=.5 if start_day7: #上面是算周末，现在是只算周日 weekends +=1 return weekends # %% def main(): at = time.time() path = os.getcwd() print('-'*20) print(f'已打开文件夹{path} ,正在读取内容...') print('-'*20) dirs = os.listdir(path) excel_list = [i for i in dirs if i.endswith('xls')] if len(excel_list)==0: input(f'没有找到xls文件，请将需要处理的文件放入{path}，检查后重新启动程序') elif len(excel_list)>1: for i in enumerate(excel_list): print(i) try: excel_file = excel_list[int(input('发现有多个xls文件，输入想要统计的文件对应的编号：'))] except: print('没有找到此编号') input() else: excel_file = excel_list[0] print('-'*20) print('正在处理: ',excel_file) print('-'*20) data = pd.read_excel(path+'\\'+excel_file) if data.columns[0]!='ID de Usuario': df = pd.read_excel(path+'\\'+excel_file,header=8) data = df.loc[:,['ID de Usuario', 'Nombre Completo', 'Fecha/Hora']] data.columns = ['ID de Usuario','Nombre','Tiempo'] #统一叫法 data=data[~(data['Nombre'].isnull())] #删掉空行 data['Tiempo'] = pd.to_datetime(data['Tiempo']) data['Year'] = data['Tiempo'].dt.year data['Month'] = data['Tiempo'].dt.month data['Day'] = data['Tiempo'].dt.day data['Time'] = data['Tiempo'].dt.time data['week_day'] = data['Tiempo'].apply(get_week_day) data['AMPM'] = data['Tiempo'].apply(get_monringORafternoon) # %% 剔除重复打卡（上午最早一条，下午最晚一条） df_temp_day = data.drop_duplicates(['ID de Usuario', 'Nombre', 'Year', 'Month', 'Day']) df_temp_AM = data.loc[data['AMPM'] == '上午'].sort_values('Tiempo').drop_duplicates( subset=['ID de Usuario', 'Nombre', 'Year', 'Month', 'Day', 'AMPM'], keep='first') df_temp_PM = data.loc[data['AMPM'] == '下午'].sort_values('Tiempo').drop_duplicates( subset=['ID de Usuario', 'Nombre', 'Year', 'Month', 'Day', 'AMPM'], keep='last') df_dropdup = pd.concat([df_temp_AM, df_temp_PM], ignore_index=True).sort_values(['ID de Usuario', 'Tiempo']) # %% 迟到早退严重迟到周日加班晚上加班 df_dropdup['迟到'] = lateForWork(df_dropdup).astype(int) df_dropdup['早退'] = leaveEarly(df_dropdup).astype(int) df_dropdup['严重迟到'] = severelyLate(df_dropdup).astype(int) df_dropdup['周日加班'] = workOnSunday(df_dropdup).astype(int)*0.5 df_dropdup['晚上加班'] = eveningWork(df_dropdup).astype(int) # %% 异常 gp_timecount = df_dropdup.groupby(['ID de Usuario', 'Nombre', 'Year', 'Month', 'Day'])['Time'].count() exceptionRecord = gp_timecount[gp_timecount == 1].reset_index() exceptionRecord.rename({'Time': '异常'}, axis=1, inplace=True) # 异常记录 gp_abnormal = exceptionRecord.groupby(['ID de Usuario', 'Nombre', 'Year', 'Month'])['异常'].count().reset_index() # %% 周日天数 gp_tiempo=df_dropdup.groupby(['ID de Usuario', 'Nombre', 'Year', 'Month'])['Tiempo'].agg(['min','max']) sundayCount=gp_tiempo.apply(lambda x:weekends_between(x['min'],x['max']),axis=1).reset_index() sundayCount.columns = ['ID de Usuario', 'Nombre', 'Year', 'Month', '周日天数'] # %% 统计汇总 gp_count = df_temp_day.groupby(['ID de Usuario', 'Nombre', 'Year', 'Month'])['Day'].count() gp_sum = df_dropdup.groupby(['ID de Usuario', 'Nombre', 'Year', 'Month'])['迟到', '早退', '严重迟到', '周日加班', '晚上加班'].sum() gp_result = pd.concat([gp_count, gp_sum], axis=1).reset_index() gp_result.rename({'Day': '出勤天数'}, axis=1, inplace=True) # 添加“异常”列 gp_result2 = pd.merge(gp_result, gp_abnormal, how='left').fillna(0) # 添加周日天数辅助计算出勤天数出勤天数= 实际出勤-周日加班+周日天数 gp_result2 = pd.merge(gp_result, sundayCount, how='left').fillna(0) # 校正出勤天数 gp_result2['出勤天数'] = gp_result2['出勤天数']-gp_result2['周日加班']+gp_result2['周日天数'] # 删除辅助列，若有计算疑问可注释该列 gp_result2.drop('周日天数',axis=1,inplace=True) # %% 导出 file_name = path+'\\'+'【result】'+excel_file with pd.ExcelWriter(file_name) as writer: gp_result2.to_excel(writer,sheet_name='统计结果',index=False) data.to_excel(writer,sheet_name='源数据',index=False) df_dropdup.to_excel(writer,sheet_name='加工数据',index=False) exceptionRecord.to_excel(writer,sheet_name='异常记录',index=False) print('Time used: {} sec'.format(time.time()-at)) input(f'数据已跑完，文件保存在{file_name}\n按enter键即可退出') #%% if __name__ == '__main__': try: main() except Exception as e: print('发生未知错误:',e) input()

使用方法：将需要处理的文件和python脚本放在同一个文件夹下，双击启动python脚本就可以了，处理之后会自动生成【result】开头的文件。

【本文地址】

公司简介

联系我们