1 #encoding: utf-8 2 import selenium 3 from selenium import webdriver 4 from selenium.webdriver.common.by import By 5 import urllib.request as req 6 import time 7 8 start_time = time.time() 9 10 from selenium.webdriver.edge.options import Options # => 引入Chrome的配置 11 import time 12 13 # 配置 14 ch_options = Options() 15 ch_options.add_argument("--headless") # => 为Chrome配置无头模式 16 ch_options.add_experimental_option('excludeSwitches', ['enable-logging']) 17 18 19 20 url__Jrtt='''https://mp.weixin.qq.com/s/8mWoT3_4g4qCI6zE4PBeMA''' 21 url=url__Jrtt.replace( '\n', "") 22 driver=webdriver.Edge( options=ch_options) # => 注意这里的参数 23 #第三步,如使用浏览器一样开始对网站进行访问 24 driver.minimize_window() #设置窗口最大化 25 driver.implicitly_wait(3) #设置等待3秒后打开目标网页 26 driver.get(url) 27 time.sleep(1) 28 29 ArticsLinks=driver.find_element( By.XPATH , '//*[@id="js_content"]') 30 links=ArticsLinks.find_elements(By.TAG_NAME , 'a') 31 32 flag=0 33 datestmp='' 34 35 import re 36 for i in links: 37 flag+=1 38 title=i.text 39 # if flag<1105: 40 # continue 41 42 i.click() 43 time.sleep( 0.2 ) 44 ok=driver.find_element( By.XPATH , '//*[@id="js_link_dialog_ok"]') 45 ok.click() 46 47 time.sleep( .2 ) 48 windows = driver.window_handles 49 driver.switch_to.window(windows[-1]) 50 51 artics=driver.find_elements( By.XPATH , '//*[@id="js_content"]') 52 try: 53 datestmp=driver.find_element( By.XPATH , '//*[@id="publish_time"]') 54 except: 55 datestmp="--已删除--" 56 else: 57 datestmp=datestmp.text 58 datestmp=datestmp.split( " ")[0] 59 60 AllText="" 61 for k in artics: 62 AllText =AllText +k.text 63 # ArticsContext=i.text 64 # print( flag,datestmp ,'===',title , "===",AllText ) 65 66 67 title = re.sub('[\/:*?"<>|]','-',title)#去掉非法字符 68 ph='.\\记忆承载文章\\' +datestmp+' '+title+'.txt' #++r"__" 69 with open( ph, 'w',encoding='utf-8') as f: 70 f.write ( AllText ) 71 print( flag , ":" , title , 'finish.') 72 73 driver.close() 74 driver.switch_to.window(windows[0]) 75 76 # for link in driver.find_elements( By.CLASS_NAME,"a"): 77 # id+=1 78 # print ( id , link) 79 80 # f=open( 'out.txt', 'w') 81 # with open( 'out.xml', 'w',encoding='utf-8') as f: 82 # f.write ( driver.page_source ) 83 # f.close() 84 end_time = time.time() 85 print("总耗时: {:.2f}秒".format(end_time - start_time)) 86 input( "all finished press Enter to quit:") 87 driver.quit()