[Python] 解析Pcap三个Python库（Dpkt Scapy Pyshark）应用实例

#[Python] 解析Pcap三个Python库（Dpkt Scapy Pyshark）应用实例| 来源: 网络整理| 查看: 265

文章目录说明Dpkt输出传输层协议信息Pyshark Or Wireshark[使用 PyShark 和 scapy 从 pcap 文件中读取字段并填充 CSV](https://github.com/vnetman/pcap2csv)

说明

如果要处理pcap文件，python有仨库比较有名

scapy 速度慢资料最多pyshark 速度适中需要系统安装了wiresharkdpkt 速度快资料偏少不够全 Dpkt输出传输层协议信息

（如果有传输层的话）包含以下信息。

时间戳 Timestamp 二层 MAC (source, destination) IP(source, destination) len ttl DF MF offset protocol

# coding:utf-8 """ 逐个packet输出长度，info等信息 https://dpkt.readthedocs.io/en/latest/print_packets.html """ import dpkt import datetime import socket from dpkt.compat import compat_ord from dpkt.ip import get_ip_proto_name def mac_addr(address): """Convert a MAC address to a readable/printable string Args: address (str): a MAC address in hex form (e.g. '\x01\x02\x03\x04\x05\x06') Returns: str: Printable/readable MAC address """ return ':'.join('%02x' % compat_ord(b) for b in address) def inet_to_str(inet): """Convert inet object to a string Args: inet (inet struct): inet network address Returns: str: Printable/readable IP address """ # First try ipv4 and then ipv6 try: return socket.inet_ntop(socket.AF_INET, inet) except ValueError: return socket.inet_ntop(socket.AF_INET6, inet) def print_packets(pcap): """Print out information about each packet in a pcap Args: pcap: dpkt pcap reader object (dpkt.pcap.Reader) """ # For each packet in the pcap process the contents for timestamp, buf in pcap: # Print out the timestamp in UTC print('Timestamp: ', str(datetime.datetime.utcfromtimestamp(timestamp))) # Unpack the Ethernet frame (mac src/dst, ethertype) eth = dpkt.ethernet.Ethernet(buf) print('Ethernet Frame: ', mac_addr(eth.src), mac_addr(eth.dst), eth.type) # Make sure the Ethernet data contains an IP packet if not isinstance(eth.data, dpkt.ip.IP): print('Non IP Packet type not supported %s\n' % eth.data.__class__.__name__) continue # Now unpack the data within the Ethernet frame (the IP packet) # Pulling out src, dst, length, fragment info, TTL, and Protocol ip = eth.data # Pull out fragment information (flags and offset all packed into off field, so use bitmasks) do_not_fragment = bool(ip.off & dpkt.ip.IP_DF) more_fragments = bool(ip.off & dpkt.ip.IP_MF) fragment_offset = ip.off & dpkt.ip.IP_OFFMASK protocol = get_ip_proto_name(ip.p) # Print out the info print('IP: %s -> %s (len=%d ttl=%d DF=%d MF=%d offset=%d protocol=%s)\n' % \ (inet_to_str(ip.src), inet_to_str(ip.dst), ip.len, ip.ttl, do_not_fragment, more_fragments, fragment_offset, protocol)) if __name__ == '__main__': with open('pcap/2021_11_02_Idle.pcap', 'rb') as f: pcap = dpkt.pcap.Reader(f) print_packets(pcap) Pyshark Or Wireshark

如果只是想要所有packet的[src_IP, dst_IP, src_MAC, dst_MAC, Protocol, TimeStamp, Info]等信息，可以直接打开Wireshark导出

首先确定需要导出的Column名

然后直接导出为CSV文件即可

毕竟能不写代码肯定是好的！

这个操作也可以用Pyshark库完成。

import asyncio import os from tqdm import tqdm, trange import pyshark import pandas as pd filePath = 'pcap/nestcamPOWER_3.pcap' def pcap2csv(filePath): print(filePath) loop = asyncio.ProactorEventLoop() asyncio.set_event_loop(loop) cap = pyshark.FileCapture(filePath, only_summaries=True, eventloop=loop) # 预加载的时候很慢, 可以类比为Wireshark打开这个包的速度 cap.load_packets() packetAmount = len(cap) data3 = [] # 由于之前预加载了，处理得时候嘎嘎快！ processBar = tqdm(cap, desc="Pcap Progress Bar ", total=packetAmount) for packet in processBar: line = str(packet) pItem = line.split(" ") # 1 - 时间戳 (s) # 4 - Highest Protocol # 5 - Length() data3.append(pItem[1], pItem[4], pItem[5]]) dataframe = pd.DataFrame(columns=["TimeStamp", "Protocol", "Length"], data=data3) # 保存的csv文件名字 csvName = filePath.replace("pcap/", "result/").replace("pcap", "csv") print(csvName) dataframe.to_csv(csvName, index=False, sep=',') cap.close() 使用 PyShark 和 scapy 从 pcap 文件中读取字段并填充 CSV

Usage: pcap2csv --pcap --csv

pcap 中的每个数据包都呈现到 csv 文件的一行中。要提取的特定项目以及它们在 csv 中的呈现顺序在脚本的“render_csv_row”函数中进行了硬编码。另请注意，csv 中的分隔符是“|”字符，而不是逗号。

此脚本同时使用 PyShark （https://kiminewt.github.io/pyshark/）和 Scapy 来完成其工作。PyShark是因为我们希望利用tshark强大的协议解码能力来生成CSV的“文本描述”字段（如“标准查询0xf3de A www.cisco.com”，“Client Hello”等），而Scapy则因为同时我们希望访问数据包的“有效载荷”部分（PyShark似乎无法提供这一点）。

#!/usr/bin/env python3 """pcap2csv Script to extract specific pieces of information from a pcap file and render into a csv file. Usage: --pcap --csv Each packet in the pcap is rendered into one row of the csv file. The specific items to extract, and the order in which they are rendered in the csv are hard-coded in the script, in the 'render_csv_row' function. Also note that the separators in the csv are '|' characters, not commas. This script uses *both* PyShark (https://kiminewt.github.io/pyshark/) and Scapy to do its work. PyShark because we want to leverage tshark's powerful protocol decoding ability to generate the "textual description" field of the CSV, and Scapy because at the same time we want to access the "payload" portion of the packet (PyShark seems to be unable to provide this). """ import argparse import os.path import sys import pyshark from scapy.utils import RawPcapReader from scapy.layers.l2 import Ether from scapy.layers.inet import IP, UDP, TCP #-------------------------------------------------- def render_csv_row(pkt_sh, pkt_sc, fh_csv): """Write one packet entry into the CSV file. pkt_sh is the PyShark representation of the packet pkt_sc is a 'bytes' representation of the packet as returned from scapy's RawPcapReader fh_csv is the csv file handle """ ether_pkt_sc = Ether(pkt_sc) if ether_pkt_sc.type != 0x800: print('Ignoring non-IP packet') return False ip_pkt_sc = ether_pkt_sc[IP] # {8} dst port # | | | | | | | o-------------> {7} dst ip address # | | | | | | o-----------------> {6} src port # | | | | | o---------------------> {5} src ip address # | | | | o-------------------------> {4} text description # | | | o------------------------------> {3} L4 protocol # | | o----------------------------------> {2} highest protocol # | o--------------------------------------> {1} time # o------------------------------------------> {0} frame number # Example: # 1|0.0|DNS(UDP)|Standard query 0xf3de A www.cisco.com|192.168.1.116:57922|1.1.1.1:53|73|f3de010000010000000000000377777705636973636f03636f6d0000010001 print(fmt.format(pkt_sh.no, # {0} pkt_sh.time, # {1} pkt_sh.protocol, # {2} l4_proto_name, # {3} pkt_sh.info, # {4} pkt_sh.source, # {5} l4_sport, # {6} pkt_sh.destination, # {7} l4_dport, # {8} pkt_sh.length, # {9} l4_payload_bytes.hex()), # {10} file=fh_csv) return True #-------------------------------------------------- def pcap2csv(in_pcap, out_csv): """Main entry function called from main to process the pcap and generate the csv file. in_pcap = name of the input pcap file (guaranteed to exist) out_csv = name of the output csv file (will be created) This function walks over each packet in the pcap file, and for each packet invokes the render_csv_row() function to write one row of the csv. """ # Open the pcap file with PyShark in "summary-only" mode, since this # is the mode where the brief textual description of the packet (e.g. # "Standard query 0xf3de A www.cisco.com", "Client Hello" etc.) are # made available. pcap_pyshark = pyshark.FileCapture(in_pcap, only_summaries=True) pcap_pyshark.load_packets() pcap_pyshark.reset() frame_num = 0 ignored_packets = 0 with open(out_csv, 'w') as fh_csv: # Open the pcap file with scapy's RawPcapReader, and iterate over # each packet. In each iteration get the PyShark packet as well, # and then call render_csv_row() with both representations to generate # the CSV row. for (pkt_scapy, _) in RawPcapReader(in_pcap): try: pkt_pyshark = pcap_pyshark.next_packet() frame_num += 1 if not render_csv_row(pkt_pyshark, pkt_scapy, fh_csv): ignored_packets += 1 except StopIteration: # Shouldn't happen because the RawPcapReader iterator should also # exit before this happens. break print('{} packets read, {} packets not written to CSV'. format(frame_num, ignored_packets)) #-------------------------------------------------- def command_line_args(): """Helper called from main() to parse the command line arguments""" parser = argparse.ArgumentParser() parser.add_argument('--pcap', metavar='', help='pcap file to parse', required=True) parser.add_argument('--csv', metavar='', help='csv file to create', required=True) args = parser.parse_args() return args #-------------------------------------------------- def main(): """Program main entry""" args = command_line_args() if not os.path.exists(args.pcap): print('Input pcap file "{}" does not exist'.format(args.pcap), file=sys.stderr) sys.exit(-1) if os.path.exists(args.csv): print('Output csv file "{}" already exists, ' 'won\'t overwrite'.format(args.csv), file=sys.stderr) sys.exit(-1) pcap2csv(args.pcap, args.csv) #-------------------------------------------------- if __name__ == '__main__': main()

【本文地址】

公司简介

联系我们