作业①
要求:
熟练掌握 Selenium 查找 HTML 元素、爬取 Ajax 网页数据、等待 HTML 元素等内
容。
使用 Selenium 框架+ MySQL 数据库存储技术路线爬取“沪深 A 股”、“上证 A 股”、
“深证 A 股”3 个板块的股票数据信息。
候选网站:东方财富网http://quote.eastmoney.com/center/gridlist.html#hs_a_board
输出信息:MYSQL 数据库存储和输出格式如下,表头应是英文命名例如:序号
id,股票代码:bStockNo……,由同学们自行定义设计表头:
Gitee 文件夹链接为:
实验代码:
点击查看代码
import sqlite3
import options as options
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time
from selenium.webdriver.common.by import By
options = webdriver.ChromeOptions()
# options.add_argument('--headless')
# options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=options)
url = 'http://quote.eastmoney.com/center/gridlist.html#hs_a_board'
driver.get(url)
conn = sqlite3.connect('work4.db')
cursor = conn.cursor()
#删除已有表
cursor.execute("DROP TABLE IF EXISTS stock3")
cursor.execute('''CREATE TABLE IF NOT EXISTS stock3
(serial_no INTEGER, code TEXT, name TEXT, latest_price REAL,
change_percent REAL, change_amount REAL, volume INTEGER, amount REAL,
amplitude REAL, highest REAL, lowest REAL, today_open REAL, yesterday_close REAL)''')
# page = driver.find_elements(By.XPATH, '//span[@class="paginate_page"]//a')
def spider(driver):
driver.get(url=url)
getdata(driver)
for i in range(2, 4):
button = driver.find_element(By.XPATH, '//div[@id="tab"]/ul/li[' + str(i) + ']')
print("翻页")
button.click()
time.sleep(1.5)
getdata(driver)
# name = driver.find_elements(By.XPATH, '//tbody/tr/td[3]')
# for i in range(len(name)):
# print(name[i].text)
def getdata(driver):
try:
number = driver.find_elements(By.XPATH, '//tbody/tr/td[1]')
code = driver.find_elements(By.XPATH, '//tbody/tr/td[2]')
name = driver.find_elements(By.XPATH, '//tbody/tr/td[3]')
latest_price = driver.find_elements(By.XPATH, '//tbody/tr/td[5]')
change_percent = driver.find_elements(By.XPATH, '//tbody/tr/td[6]')
change_amount = driver.find_elements(By.XPATH, '//tbody/tr/td[7]')
volume = driver.find_elements(By.XPATH, '//tbody/tr/td[8]')
amount = driver.find_elements(By.XPATH, '//tbody/tr/td[9]')
amplitude = driver.find_elements(By.XPATH, '//tbody/tr/td[10]')
highest = driver.find_elements(By.XPATH, '//tbody/tr/td[11]')
lowest = driver.find_elements(By.XPATH, '//tbody/tr/td[12]')
today_open = driver.find_elements(By.XPATH, '//tbody/tr/td[13]')
yesterday_close = driver.find_elements(By.XPATH, '//tbody/tr/td[14]')
for i in range(len(name)):
d = (number[i].text, code[i].text, name[i].text, latest_price[i].text, change_percent[i].text,
change_amount[i].text, volume[i].text, amount[i].text, amplitude[i].text, highest[i].text,
lowest[i].text, today_open[i].text, yesterday_close[i].text)
cursor.execute("INSERT INTO stock3 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", d)
conn.commit()
print("存储成功")
except Exception as err:
print(err)
spider(driver)
driver.close()