2023-fameex

发布时间 2023-11-28 15:15:35作者: 神在看着你

2023-11-28

来到fameex快半年了,很久没有写过东西了。似乎无穷无尽的功能测试,已经让我忘记了,我原来是喜欢coding的。

这半年来 ,唯一能记得的跟coding相关的,就是SEO项目了。通过文档与页面进行对比,查看是否与预期结果一致

内容主要有两个,1个是基于页面,一个是基于爬虫模式

首先基于页面

我们要拿到预期结果

import xlrd
#打开指定文件
wb = xlrd.open_workbook('test.xlsx')
#按工作簿定位工作表
sh = wb.sheet_by_name('02_TDK')
#在指定工作表拿值
test_expect_result = sh.cell(n, language).value  
 
以下是比较简单的title 的完整代码
import xlrd
import time
from selenium import webdriver

driver=webdriver.Chrome()


#打开excel
wb = xlrd.open_workbook('test.xlsx')
#按工作簿定位工作表
sh = wb.sheet_by_name('02_TDK')
amount = sh.nrows

#开始是n =2 开始
n = 2
language = 3
language_value = ['zh-CN','zh-TW','en-US','en-AU','ko-KR','es-ES','ru-RU','tr-TR','vi-VN','pt-BR']
#language_value = ['ko-KR','es-ES','ru-RU','tr-TR','vi-VN','pt-BR']

for language_value in language_value:
print(language_value,':')
n = 2
L =[]
print('值错误')
while n < sh.nrows:
test_title = sh.cell(n, 1).value
test_url = 'https://pre.fameex.com/'+ language_value + sh.cell(n, 2).value[28:]
driver.get(test_url)
test_Actual_result = driver.title
while test_Actual_result =='':
time.sleep(0.5)
test_Actual_result = driver.title
test_expect_result = sh.cell(n, language).value
if test_Actual_result == test_expect_result:
# print(test_title,'ok')
pass
elif test_Actual_result.replace(" ", "").replace("​", "") == test_expect_result.replace(" ", "").replace("​", ""):
# print(test_title,'ok')
pass
else:
print(test_url,test_title,':')
print(test_Actual_result,'!=\n',test_expect_result)
n+=1
print('没有值\n',L)
language += 1
 
 
#接下来是寻找 description 和keyword的值
import xlrd
import time
from selenium import webdriver

driver=webdriver.Chrome()

#打开excel
wb = xlrd.open_workbook('test.xlsx')
#按工作簿定位工作表
sh = wb.sheet_by_name('02_TDK')
#print(sh.nrows)#有效数据行数
amount = sh.nrows

#开始是n =2 开始
#desc 13开始 keywords 由23开始
language = 29
language_value = ['zh-CN','zh-TW','en-US','en-AU','ko-KR','es-ES','ru-RU','tr-TR','vi-VN','pt-BR']
language_value = ['ru-RU','tr-TR','vi-VN','pt-BR']

#language_value = ['ru-RU','tr-TR','vi-VN','pt-BR']
#language_value = ['pt-BR']
#language_value = ['zh-TW']


for language_value in language_value:
print(language_value,':')
n = 2
L =[]
while n < sh.nrows:
test_title = sh.cell(n, 1).value
test_url = 'https://pre.fameex.com/'+ language_value + sh.cell(n, 2).value[28:]
#print(test_url)
driver.get(test_url)
time.sleep(1)
for x in range(1,40):
the_xpath = f'/html/head/meta[{x}]'
element = driver.find_element_by_xpath(f'/html/head/meta[{x}]')
# if element.get_attribute('name') == 'keywords description':
if element.get_attribute('name') == 'keywords':
test_Actual_result = element.get_attribute('content')
break
test_expect_result = sh.cell(n, language).value
if test_Actual_result == '':
L.append(test_title.replace("\n", ""))
else:
if test_Actual_result == test_expect_result:
# print(test_title, 'ok')
pass
# elif test_Actual_result.replace(" ", "") == test_expect_result.replace(" ", "").replace("​", ""):
elif test_Actual_result.replace(" ", "").replace("&#39;","'").replace("&quot;",'"').replace("&amp;","&").replace("​", "") == test_expect_result.replace(" ", "").replace("​", ""):

# print(test_title, 'ok')
pass
else:
print(test_url,test_title, ':')

print(test_Actual_result, '!=\n', test_expect_result)

n+=1

print('没有值\n',L)
language += 1

 
#下面的就是爬虫模式的SEO完整代码
import xlrd
import time
import requests
# this if for TDK html model
#第一步是替换文档的语言,2是切换读取预期结果的值

wb = xlrd.open_workbook('test.xlsx')
sh = wb.sheet_by_name('02_TDK')
#print(sh.nrows)#有效数据行数
amount = sh.nrows
n = 2
language = 3
language_value = ['zh-CN','zh-TW','en-US','en-AU','ko-KR','es-ES','ru-RU','tr-TR','vi-VN','pt-BR']
#language_value = ['ko-KR']


for language_value in language_value:
print(language_value,':')
n = 2
L =[]
print('值错误')
while n < sh.nrows:
test_title = sh.cell(n, 1).value
test_url = 'https://pre.fameex.com/'+ language_value + sh.cell(n, 2).value[28:]
r = requests.get(test_url)
result = r.text
if result.rfind('<title data-react-helmet="true">') != -1:
start_index = result.rfind('<title data-react-helmet="true">') + 32
elif result.rfind('<title>') != -1:
start_index = result.find('<title>') + 7
else:
start_index = 0
if start_index !=0:
text = r.text[start_index:]
test_Actual_result =''
for x in text:
if x == '<':
break
test_Actual_result += x
test_expect_result = sh.cell(n, language).value
else:
test_Actual_result == ''
if test_Actual_result =='':
L.append(test_title.replace("\n",""))
else:
if test_Actual_result == test_expect_result:
# print(test_title, 'ok')
pass
elif test_Actual_result.replace(" ", "").replace("&#39;","'").replace("&quot;",'"').replace("&amp;","&").replace("​", "") == test_expect_result.replace(" ", "").replace("​", ""):
# print(test_title, 'ok')
pass
else:
print(test_url,test_title, ':')
print(test_Actual_result, '!=\n', test_expect_result)
n += 1
print('没有值\n',L)

language+=1