作业4

发布时间 2023-11-16 06:50:38作者: tiantianmimi

作业①

要求:

熟练掌握 Selenium 查找 HTML 元素、爬取 Ajax 网页数据、等待 HTML 元素等内
容。

使用 Selenium 框架+ MySQL 数据库存储技术路线爬取“沪深 A 股”、“上证 A 股”、
“深证 A 股”3 个板块的股票数据信息。

候选网站:东方财富网http://quote.eastmoney.com/center/gridlist.html#hs_a_board

输出信息:MYSQL 数据库存储和输出格式如下,表头应是英文命名例如:序号
id,股票代码:bStockNo……,由同学们自行定义设计表头:
Gitee 文件夹链接为:

实验代码:

点击查看代码
import sqlite3

import options as options
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import time

from selenium.webdriver.common.by import By

options = webdriver.ChromeOptions()
# options.add_argument('--headless')
# options.add_argument('--disable-gpu')
driver = webdriver.Chrome(options=options)

url = 'http://quote.eastmoney.com/center/gridlist.html#hs_a_board'
driver.get(url)
conn = sqlite3.connect('work4.db')
cursor = conn.cursor()
#删除已有表
cursor.execute("DROP TABLE IF EXISTS stock3")

cursor.execute('''CREATE TABLE IF NOT EXISTS stock3
                  (serial_no INTEGER, code TEXT, name TEXT, latest_price REAL,
                   change_percent REAL, change_amount REAL, volume INTEGER, amount REAL,
                   amplitude REAL, highest REAL, lowest REAL, today_open REAL, yesterday_close REAL)''')


# page = driver.find_elements(By.XPATH, '//span[@class="paginate_page"]//a')

def spider(driver):
    driver.get(url=url)
    getdata(driver)
    for i in range(2, 4):
        button = driver.find_element(By.XPATH, '//div[@id="tab"]/ul/li[' + str(i) + ']')
        print("翻页")
        button.click()
        time.sleep(1.5)
        getdata(driver)
        # name = driver.find_elements(By.XPATH, '//tbody/tr/td[3]')
        # for i in range(len(name)):
        #     print(name[i].text)


def getdata(driver):
    try:
        number = driver.find_elements(By.XPATH, '//tbody/tr/td[1]')
        code = driver.find_elements(By.XPATH, '//tbody/tr/td[2]')
        name = driver.find_elements(By.XPATH, '//tbody/tr/td[3]')
        latest_price = driver.find_elements(By.XPATH, '//tbody/tr/td[5]')
        change_percent = driver.find_elements(By.XPATH, '//tbody/tr/td[6]')
        change_amount = driver.find_elements(By.XPATH, '//tbody/tr/td[7]')
        volume = driver.find_elements(By.XPATH, '//tbody/tr/td[8]')
        amount = driver.find_elements(By.XPATH, '//tbody/tr/td[9]')
        amplitude = driver.find_elements(By.XPATH, '//tbody/tr/td[10]')
        highest = driver.find_elements(By.XPATH, '//tbody/tr/td[11]')
        lowest = driver.find_elements(By.XPATH, '//tbody/tr/td[12]')
        today_open = driver.find_elements(By.XPATH, '//tbody/tr/td[13]')
        yesterday_close = driver.find_elements(By.XPATH, '//tbody/tr/td[14]')
        for i in range(len(name)):
            d = (number[i].text, code[i].text, name[i].text, latest_price[i].text, change_percent[i].text,
                 change_amount[i].text, volume[i].text, amount[i].text, amplitude[i].text, highest[i].text,
                 lowest[i].text, today_open[i].text, yesterday_close[i].text)
            cursor.execute("INSERT INTO stock3 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", d)
            conn.commit()
        print("存储成功")
    except Exception as err:
        print(err)


spider(driver)

driver.close()