python自动下载网页中的文件,python 自动下载脚本 | 您所在的位置:网站首页 › python自动下载网页中的文件怎么设置 › python自动下载网页中的文件,python 自动下载脚本 |
自动下载服务器各目录文件 import threading from multiprocessing import Pool,Process import requests import re,os # new_content = r.content.decode('utf-8') #第一次调用生成子目录 def request_dirname(url,pattern): path_new = [] r = requests.get(url) new = re.findall(pattern, str(r.content)) i = 0 while i < len(new): aa = os.path.join(url, new[i]) # print(aa) path_new.append(aa) i = i + 1 return path_new #第二次调用生产文件目录 def request_filename(url, pattern): # dict_new = {} # num = 0 file_compare_list = [] path_url = request_dirname(url, pattern) for item in path_url: path_fileurl = request_dirname(item,pattern) # item_new = re.findall(r'8081/(.*?)/', item) # dict_new[num] = item_new #对应目录编号 # num = num + 1 file_compare_list.append(path_fileurl[1:]) return file_compare_list #第三次循环写入text文件,文件url def filename_urls(url, pattern): f_urls = [] file_compare_list = request_filename(url, pattern) with open('filename_url.txt', 'w') as f_txt: f_txt.write('') for list01 in file_compare_list: for list02 in list01: f_urls.append(list02) with open('filename_url.txt','a') as f_txt: f_txt.write(list02+'\n') return f_urls #创建文件夹并下载 def build_dir(url,ICBC,current_path): path = url.split('/')[-2] #子目录 filename = url.split('/')[-1] #文件名称 dir_path = os.path.join(current_path,ICBC,path) if os.path.exists(dir_path): download_files(url,ICBC,current_path) else: # os.makedirs('ICBC/'+path) os.makedirs(dir_path) print('%s已创建'%path) build_dir(url,ICBC,current_path) #下载文件 def download_files(url,ICBC,current_path): filename = url.split('/')[-1] path = url.split('/')[-2] work_path = os.path.join(current_path,ICBC,path) print(work_path) os.chdir(work_path) #查看当前文件 r = requests.get(url) with open(filename,'wb') as f: f.write(r.content) print(os.listdir('.')) def run(ICBC,urls_results,current_path): # global urls_results for xxl in urls_results: build_dir(xxl,ICBC,current_path) if __name__ == '__main__': current_path = os.getcwd() pattern1 = r'' url1 = 'http://172.17.3.162:8081/' ABC = filename_urls(url1, pattern1) urls_results = ABC[3:] #文件夹 # p1 = Process(target=run,args=('happy',urls_results,current_path)) # p1.start() pool = Pool(processes=20) for i in range(20): msg = 'ICBC%d'%i print(msg) pool.apply_async(run,(msg,urls_results,current_path)) pool.close() pool.join() print('done') 结论: |
CopyRight 2018-2019 实验室设备网 版权所有 |