简单Lora爬虫

发布时间 2023-07-22 03:21:32作者: 热爱学の人

demo

import time
from random import random
import re
import requests

BASE_URL = 'https://api.esheep.com/gateway/model/list'
INVALID_CHARS = ':*?"<>|\\/'

HEADERS = {
    'origin': 'https://www.esheep.com',
    'referer': 'https://www.esheep.com/',
    'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36',
}

COOKIES = {
    'buvid': '415DC444-806E-82F5-4753-BA65DE586D5325503infoc',
}


def remove_invalid_chars(name):
    for char in INVALID_CHARS:
        name = name.replace(char, '_')
    return name


def download_image(img_url, name):
    name = remove_invalid_chars(name)
    filename = f"img_url_test/{name}.{'png' if 'png' in img_url else 'jpg'}"
    with requests.get(img_url, headers=HEADERS) as res:
        with open(filename, 'wb') as f:
            f.write(res.content)
    print(f'{name}已保存!')


def collect():
    params = {
        'limit': '20',
        'mode': '3',
        # 'mode': '1',
        # 'next': None
    }

    with requests.Session() as session:
        while True:
            response = session.get(url=BASE_URL, params=params, cookies=COOKIES, headers=HEADERS)
            infos = response.json()['data']

            Next, IsEnd = infos['cursor']['Next'], infos['cursor']['IsEnd']
            print(f"打印=={Next}-----{IsEnd}")

            items = infos['items']
            img_urls = [re.findall(r"'url': '(.*?)', 'height'", repr(item))[0] for item in items]
            names = [item['model']['name'] for item in items]

            for img_url, name in zip(img_urls, names):
                print(img_url)
                download_image(img_url, name)

            if IsEnd:
                break

            params['next'] = Next

            time.sleep(random())


if __name__ == '__main__':
    collect()