爬取 万年历 xml 操作

发布时间 2023-05-24 17:03:53作者: Lionever
import datetime
import requests
import xml.etree.ElementTree as ET

kw ={ 'wd':'python教程'}
url1 ='https://rili.ximizi.com/jinrijishi.php'
url2 ='https://www.xingzuo5.net/calendar/2025/2025-12-22.html'

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' } # 定义headers
# 构建字典
tdate = datetime.date.today()
# tyear = str(tdate.year)
# tmon = str(tdate.month)
# tday =str(tdate.day)
# sday =tyear+'-'+tmon+'-'+tday
# url ='https://www.xingzuo5.net/calendar'
# url2 =url+"/"+tyear+"/"+sday+'.html'
# 稳妥方案
# cooklist = temp.split('; ')
# cookies = {}
# for cookie in cooklist:
# cookies[cookie.split('=')[0]] = cookie.split('=')[-1]

#print(cookies)
# 字典推导式
# cookies_dic = cookies[cookie.split('=')[0]]:cookie.split('=')[-1] for cookie in cooklist.split('; ')
response = requests.get(url1)
res = requests.request('get',url1,headers=headers)
#r1 = requests.get('http://www.baidu.com/s?',headers=headers,params=kw,cookies=cookies, timeout=3) # headers 指定, 设置参数

r2 = requests.get(url1)
r4 = requests.get(url1)
restr= r2.text

response.encoding ='utf-8'
r4.encoding ='utf-8'
r4str =r4.text
# print(response.text)
# print(response.content.decode())
# print(res.content.decode())
#print(r1.url.encode())

# print(restr[110:24542])

nlindex = r4str.find('农历日期')
gzrqindex = r4str.find('干支')
y1index = r4str.find('sc2title') # 第一个时辰
j1index = r4str.find('时忌')
yj2y = r4str[y1index+10:y1index+30]
yendindex =yj2y.find('/p')
cindex = r4str.find('冲')

j1str =r4str[j1index+14:cindex-15]


ystr = yj2y
nlrq = r4str[nlindex+12:nlindex+21]
gzrq =r4str[gzrqindex+10:gzrqindex+21]
y1str ='宜:'
j2str ='忌:'

print('农历日期:'+nlrq)
print('干支日期:'+gzrq)

tree = ET.parse('2.xml')

root = tree.getroot()
blstr =root.find('today')
blstr.text = str(tdate)
blstr =root.find('nl')
blstr.text = nlrq
blstr =root.find('gz')
blstr.text = gzrq
blstr =root.find('y')
blstr.text = y1str
blstr =root.find('j')
blstr.text = j2str
tree.write('2.xml')

print('-------------------------------------------')
#print(r4str)
print('-------------------------------------------')
scstr = r4str.split('sc2title')
i =1

for str1 in scstr:
i =i+1
if i>2 and i<8:

print('-------------------') #0-8 点
tstart =str1[6:7]
tend =str1[14:15]
name = str1[2:5]
allname =str1[2:22]


print(allname)
if i ==8: #9至10点
print('-------------------')
name =str1[2:5]
allname=str1[2:23]
print(name,allname)


if i>8: # 11-24 点
print('-------------------')
allname =str1[2:24]
name =str1[2:5]


print(allname)
if i>2 :
for sh in root.findall('sh'):
shat = sh.findall('sharry')
ename =shat[i-3].find('name')
aname =shat[i-3].find('allname')

ename.text =name
aname.text=allname
tree.write('2.xml')



# print(r4str)
# print(response.headers)
# print('-------')
# print(r1.content.decode())
# print(response.cookies.values())
# print(response.headers)

if __name__== '__main__':
print('开始:')