import pandas as pd
import requests
from bs4 import BeautifulSoup
# 获取数据的函数
def get_data(page):
url = f"https://sz.lianjia.com/ershoufang/pg{page}/"
res = requests.get(url=url).text
return res
# 处理页面数据的函数
def process_page(page):
res = get_data(page)
bs = BeautifulSoup(res, 'html.parser')
soup = bs.find('ul', class_='sellListContent')
sells = soup.find_all('li')
page_data = []
for sell in sells:
title = sell.find("div", class_="title").get_text()
location = sell.find("div", class_="positionInfo").get_text().strip()
house_info = sell.find("div", class_="houseInfo").get_text().strip()
price = sell.find("div", class_="totalPrice").get_text()
unitPrice = sell.find("div", class_="unitPrice").get_text()
followInfo = sell.find("div", class_="followInfo").get_text()
data = [title, location, house_info, price, unitPrice, followInfo]
page_data.append(data)
print(f"第{page}的数据{data}")
return page_data
def main():
data = []
for i in range(30):
res = process_page(i)
data.extend(res)
# 将列表数据转换为 Pandas DataFrame
df = pd.DataFrame(data, columns=["标题", "位置", "房屋信息", "价格", "单价", "关注度"])
# 将数据写入 Excel 文件
df.to_excel("lianjia.xlsx", index=False)
if __name__ == '__main__':
main()
链家二手房
发布时间 2023-10-18 18:40:21作者: 牧羊人の冬天