import time import os import requests as re from tqdm import tqdm from bs4 import BeautifulSoup download_src = "https://m.tuiimg.com/" #网站url now_file =os.getcwd() #当前文件的路径 def create_file(path): if not os.path.exists(path): os.makdir(path) else: print('文件已存在') path = os.path.abspath(path) return path def download(path,download_src): file_path = re.get(download_src,timeout=5) file_path.encoding = file_path.apparent_encoding soup = BeautifulSoup(file_path.text,'html.parser') #格式化页面 li = soup.find('ul',{'id':'main','class':'main'}).find_all('li') #匹配出所有的li标签 img = [] #赋值一个空列表后期传递图片地址使用 src = [] for list in range(0,len(li)) : #遍历所有li标签的值 list_img= li[list].find('img') #筛选出li标签里面的img标签 img.append(list_img) #将img标签添加到Img列表里面 # print(img) if img[list].attrs['src'] != None: #如果遍历的内容不为none的话则赋值给, download_url_1 = img[list].attrs['realsrc'] #匹配出所有的img的src src.append(download_url_1) #添加到src列表中 # print(src) #获取页面的图片数量信息 pdar = tqdm(src,ncols=100,desc='文件下载进度',colour='#96b97d') for x in pdar: img_url = 'https://m.tuiimg.com/meinv/'+ x[-11:-6] #拼接到图片的访问地址然后去请求url,从而下载图片 list_img_url = re.get(img_url) list_img_url.encoding = list_img_url.apparent_encoding img_soup = BeautifulSoup(list_img_url.text,'html.parser') i = img_soup.find('span',{'class':'all'}).find('i',{'id':'allbtn'}) #提取出对应文章的url里面的limit标签信息,从而知道页面有多少图片 img_num = str(i)[-7:-5] #取到对应页面的图片的里面具体图片limit数量 Folder_path = path +'/'+ x[-11:-6] #创建文件夹名 if not os.path.exists(Folder_path): os.mkdir(Folder_path) for i in range(1,int(img_num)) : download_img = x[:-6] + str(i) + '.jpg' #拼接url路径并下载文件 Img_name = f'{str(i)}.jpg' #文件的具体名称 try : down_img = re.get(download_img,timeout=5) except : continue img_name_path = os.path.join(Folder_path,Img_name) # img_name = Img_path +str(i)+'.jpg' #对应的图片的路径 with open (img_name_path,'wb') as fp : fp.write(down_img.content) time.sleep(0.1) # # #具体的图片下载 '''for i in range(1,int(img_num)) : download_img = x[:-6] + str(i) + '.jpg' # print(download_img) try : down_img = re.get(download_img,timeout=5) except : continue img_name = img_src_path +str(i)+'.jpg' #对应的图片的路径 with open (img_name,'wb') as fp : fp.write(down_img.content) ''' if __name__ == "__main__": file_path = create_file(now_file) print(f'当前文件路径{file_path}') download(file_path,download_src)