链家二手房

发布时间 2023-10-18 18:40:21作者: 牧羊人の冬天
import pandas as pd
import requests
from bs4 import BeautifulSoup

# 获取数据的函数
def get_data(page):
    url = f"https://sz.lianjia.com/ershoufang/pg{page}/"
    res = requests.get(url=url).text
    return res

# 处理页面数据的函数
def process_page(page):
    res = get_data(page)
    bs = BeautifulSoup(res, 'html.parser')
    soup = bs.find('ul', class_='sellListContent')
    sells = soup.find_all('li')
    page_data = []
    for sell in sells:
        title = sell.find("div", class_="title").get_text()
        location = sell.find("div", class_="positionInfo").get_text().strip()
        house_info = sell.find("div", class_="houseInfo").get_text().strip()
        price = sell.find("div", class_="totalPrice").get_text()
        unitPrice = sell.find("div", class_="unitPrice").get_text()
        followInfo = sell.find("div", class_="followInfo").get_text()
        data = [title, location, house_info, price, unitPrice, followInfo]
        page_data.append(data)
        print(f"第{page}的数据{data}")
    return page_data

def main():
    data = []
    for i in range(30):
        res = process_page(i)
        data.extend(res)

    # 将列表数据转换为 Pandas DataFrame
    df = pd.DataFrame(data, columns=["标题", "位置", "房屋信息", "价格", "单价", "关注度"])

    # 将数据写入 Excel 文件
    df.to_excel("lianjia.xlsx", index=False)


if __name__ == '__main__':
    main()