爬取万年历 xml 操作-JZTXT

import datetime
import requests
import xml.etree.ElementTree as ET

kw ={ 'wd':'python教程'}
url1 ='https://rili.ximizi.com/jinrijishi.php'
url2 ='https://www.xingzuo5.net/calendar/2025/2025-12-22.html'

headers = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/86.0.4240.198 Safari/537.36' }   # 定义headers
# 构建字典
tdate = datetime.date.today()
# tyear = str(tdate.year)
# tmon = str(tdate.month)
# tday =str(tdate.day)
# sday =tyear+'-'+tmon+'-'+tday
# url ='https://www.xingzuo5.net/calendar'
# url2 =url+"/"+tyear+"/"+sday+'.html'
# 稳妥方案
# cooklist  = temp.split('; ')
# cookies = {}
# for cookie in cooklist:
#     cookies[cookie.split('=')[0]] = cookie.split('=')[-1]

#print(cookies)
# 字典推导式
#  cookies_dic = cookies[cookie.split('=')[0]]：cookie.split('=')[-1] for cookie in cooklist.split('; ')
response = requests.get(url1)
res    = requests.request('get',url1,headers=headers)
#r1 = requests.get('http://www.baidu.com/s?',headers=headers,params=kw,cookies=cookies, timeout=3)    # headers 指定， 设置参数

r2  = requests.get(url1)
r4  = requests.get(url1)
restr= r2.text

response.encoding ='utf-8'
r4.encoding ='utf-8'
r4str =r4.text
# print(response.text)
# print(response.content.decode())
# print(res.content.decode())
#print(r1.url.encode())

# print(restr[110:24542])

nlindex = r4str.find('农历日期')
gzrqindex = r4str.find('干支')
y1index = r4str.find('sc2title')     # 第一个时辰
j1index = r4str.find('时忌')
yj2y = r4str[y1index+10:y1index+30]
yendindex =yj2y.find('/p')
cindex = r4str.find('冲')

j1str =r4str[j1index+14:cindex-15]


ystr = yj2y
nlrq = r4str[nlindex+12:nlindex+21]
gzrq =r4str[gzrqindex+10:gzrqindex+21]
y1str ='宜：'
j2str ='忌:'

print('农历日期：'+nlrq)
print('干支日期：'+gzrq)

tree = ET.parse('2.xml')

root = tree.getroot()
blstr =root.find('today')
blstr.text = str(tdate)
blstr =root.find('nl')
blstr.text = nlrq
blstr =root.find('gz')
blstr.text = gzrq
blstr =root.find('y')
blstr.text = y1str
blstr =root.find('j')
blstr.text = j2str
tree.write('2.xml')

print('-------------------------------------------')
#print(r4str)
print('-------------------------------------------')
scstr = r4str.split('sc2title')
i =1

for str1 in scstr:
      i =i+1
      if i>2 and i<8:

            print('-------------------')             #0-8 点
            tstart =str1[6:7]
            tend   =str1[14:15]
            name = str1[2:5]
            allname =str1[2:22]


            print(allname)
      if i ==8:                                          #9至10点
            print('-------------------')
            name =str1[2:5]
            allname=str1[2:23]
            print(name,allname)


      if i>8:                                           #  11-24 点
            print('-------------------')
            allname =str1[2:24]
            name =str1[2:5]


            print(allname)
      if i>2 :
            for sh in root.findall('sh'):
                  shat = sh.findall('sharry')
                  ename =shat[i-3].find('name')
                  aname =shat[i-3].find('allname')

                  ename.text =name
                  aname.text=allname
                  tree.write('2.xml')



# print(r4str)
# print(response.headers)
# print('-------')
# print(r1.content.decode())
# print(response.cookies.values())
# print(response.headers)

if __name__== '__main__':
      print('开始：')
JZTXT

爬取 万年历 xml 操作

爬取万年历 xml 操作