Source code for image_crawler_utils.image_downloader.downloaders.general_downloader

import os
import time

import requests
from typing import Optional, Union
from rich import markup

from image_crawler_utils import Cookies
from image_crawler_utils.configs import DownloadConfig
from image_crawler_utils.log import Log
from image_crawler_utils.progress_bar import ProgressGroup

from .core_downloader import download_image
from .pixiv_downloader import pixiv_download_image_from_url
from .twitter_downloader import twitter_download_image_from_status


[docs] def download_image_from_url( url: str, image_name: str, download_config: DownloadConfig=DownloadConfig(), log: Log=Log(), store_path: str="./", session: Optional[requests.Session]=None, progress_group: Optional[ProgressGroup]=None, thread_id: int=0, cookies: Optional[Union[Cookies, list, dict, str]]=Cookies(), ) -> tuple[int, int]: """ Download image from url. Automatically separate Pixiv, Twitter, etc. image URLs from normal URLs. Args: url (str): The URL of the image to download. image_name (str): Name of image to be stored. download_config (image_crawler_utils.configs.DownloadConfig): Comprehensive download config. log (config.Log): The logger. store_path (str): Path of image to be stored. session (requests.Session): A session that may contain cookies. progress_group (image_crawler_utils.progress_bar.ProgressGroup): The Group of Progress bars to be displayed in. thread_id (int): Nth thread of image downloading. cookies (image_crawler_utils.Cookies, str, dict, list, None): If session parameter is empty, use cookies to create a session with cookies. Returns: (float, int): (the size of the downloaded image in bytes, thread_id) """ if session is None: if not isinstance(cookies, Cookies): cookies = Cookies(cookies) session = requests.Session() session.cookies.update(cookies.cookies_dict) # Check whether it is special websites if "pximg.net" in url or "pixiv.net" in url: return pixiv_download_image_from_url( url=url, image_name=image_name, download_config=download_config, log=log, store_path=store_path, session=session, progress_group=progress_group, thread_id=thread_id, ) elif ("x.com" in url or "twitter.com" in url) and "/status/" in url: return twitter_download_image_from_status( url=url, image_name=image_name, download_config=download_config, log=log, store_path=store_path, session=session, progress_group=progress_group, thread_id=thread_id, ) if '.' not in image_name and '.' in url: ext = os.path.splitext(url)[1] edited_image_name = image_name + ext else: edited_image_name = image_name time.sleep(download_config.result_thread_delay) # Start downloading is_success, image_size = download_image( url=url, image_name=edited_image_name, download_config=download_config, log=log, store_path=store_path, session=session, progress_group=progress_group, thread_id=thread_id, ) if is_success: return image_size, thread_id else: log.error(f'FAILED to download [repr.filename]{markup.escape(image_name)}[reset] from [repr.url]{markup.escape(url)}[reset]', extra={"markup": True}) return 0, thread_id