#!/bin/sh
NAME=$1 # $1 运行时输入参数 为文件名称
if [ -z "$NAME" ]; then
echo "STRING is empty"
NAME="aa"
fi
echo $NAME
ID=`ps -ef | grep "$NAME" | grep -v "$0" | grep -v "grep" | awk '{print $2}'`
echo $ID
echo "---------------"
for id in $ID
do
kill -9 $id
echo "killed $id"
done
echo "---------------"
#!/bin/sh
NAME=$1 # $1 运行时输入参数 为文件名称
NAME=${NAME%%.*}
if [ -z "$NAME" ]; then
echo "STRING is empty"
NAME="aa"
fi
echo $NAME
ID=`ps -ef | grep "$NAME" | grep -v "$0" | grep -v "grep" | awk '{print $2}'`
echo $ID
echo "---------------"
for id in $ID
do
kill -9 $id
echo "killed $id"
done
echo "---------------"
sleep 1
current_dir=$(cd $(dirname $0); pwd)
echo $current_dir
if [ ! -d "$current_dir/logs" ]; then
echo "$current_dir/logs does not exist"
`mkdir $current_dir/logs`
fi
echo "---------------"
echo "nohup /usr/bin/python3.6 $current_dir/$NAME.py > $current_dir/logs/$NAME.log 2>&1 &"
echo "---------------"
echo "tail -f $current_dir/logs/$NAME.log"
`nohup /usr/bin/python3.6 $current_dir/$NAME.py > $current_dir/logs/$NAME.log 2>&1 &`
echo "启动成功"
# -*- coding: UTF-8 -*-
import logging
import os
import platform
import subprocess
import time
logging.basicConfig(
level=logging.INFO, # 定义输出到文件的log级别,大于此级别的都被输出
format='%(asctime)s %(filename)s %(levelname)s : %(message)s', # 定义输出log的格式
datefmt='%Y-%m-%d %H:%M:%S', # 时间
filename=os.path.splitext(os.path.basename(__file__))[0], # log文件名
filemode='a', # 写入模式“w”或“a”
)
console = logging.StreamHandler()
console.setLevel(logging.INFO)
formatter = logging.Formatter('%(asctime)s %(filename)s %(levelname)s : %(message)s')
console.setFormatter(formatter)
logging.getLogger('').addHandler(console)
def run_main():
system = platform.system()
path = os.path.abspath('.')
if 'indo' in system:
spider_path = '{}\yational_policy.py'.format(path)
res = subprocess.call("python {}".format(spider_path), shell=True)
else:
spider_path = '{}/yational_policy.py'.format(path)
res = subprocess.call("python3.6 {}".format(spider_path), shell=True)
logger.info(spider_path)
logger.info(res)
if __name__ == '__main__':
for i in range(100000000000):
run_main()
time.sleep(60)
# nohup /usr/bin/python3.6 /home/pachong/yoyo/work/national_policy/national_policy/run_zc.py > /home/pachong/yoyo/work/national_policy/logs/run_zc.log 2>&1 &
import concurrent.futures
import threading
import requests
import pymysql
from yscredit_tools.MySQL import insert_update_data_mysql, select_data_mysql, insert_data_mysql
from yscredit_tools.utils import clear_dict
headers = {
'User-Agent': 'Apifox/1.0.0 (https://www.apifox.cn)',
'Accept': '*/*',
'Host': 'sqzc.gd.gov.cn',
'Connection': 'keep-alive'
}
db = pymysql.connect(host="10.1.3.29", port=3306, database="crawler_data_prd", user="root", password="root", charset='utf8', autocommit=True)
cursor = db.cursor()
lock = threading.RLock()
def get_data(i):
print(i)
url = "https://sqzc.gd.gov.cn/sqzc/m/cms/policy/getPolicyListPage2?pageNumber={}&pageSize=10&keywords=&publisher=&city=".format(str(i))
response = requests.get(url, headers=headers)
html = response.json()
lock.acquire()
for data in html["data"]:
item = {}
item["title"] = data["title"]
item["publishDate"] = data["publishDate"]
item["publisher"] = data["publisher"]
item["city"] = data["city"]
item["viewCount"] = data["viewCount"]
item["place"] = data["place"]
item["page"] = i
# cur = select_data_mysql(dbhost="localhost", dbname="crawler_data_prd", tablename="policy", where='title = "{}"'.format(item["title"]))
# if not cur.rowcount:
# insert_update_data_mysql(dbhost="localhost", dbname="crawler_data_prd", tablename="policy", **clear_dict(item))
insert_data_mysql(dbhost="localhost", dbname="crawler_data_prd", tablename="policy", **clear_dict(item))
print(item["title"])
print("*" * 100)
lock.release()
with concurrent.futures.ThreadPoolExecutor(max_workers=50) as executor:
future_to_url = executor.map(get_data, [i for i in range(1, 3078)])
# for i in range(1, 3078): # 3078
# print(i)
# url = "https://sqzc.gd.gov.cn/sqzc/m/cms/policy/getPolicyListPage2?pageNumber={}&pageSize=10&keywords=&publisher=&city=".format(str(i))
# response = requests.get(url, headers=headers)
# html = response.json()
# for data in html["data"]:
# item = {}
# item["title"] = data["title"]
# item["publishDate"] = data["publishDate"]
# item["publisher"] = data["publisher"]
# item["city"] = data["city"]
# item["viewCount"] = data["viewCount"]
# item["place"] = data["place"]
# insert_update_data_mysql(dbhost="localhost", dbname="crawler_data_prd", tablename="policy", **clear_dict(item))
# print(item)
# print("*" * 100)