Source code for image_crawler_utils.image_downloader.downloaders.general_downloader

import os
import time

import requests
from typing import Optional, Union
from rich import markup

from image_crawler_utils import Cookies
from image_crawler_utils.configs import DownloadConfig
from image_crawler_utils.log import Log
from image_crawler_utils.progress_bar import ProgressGroup

from .core_downloader import download_image
from .pixiv_downloader import pixiv_download_image_from_url
from .twitter_downloader import twitter_download_image_from_status



[docs]
def download_image_from_url(
    url: str, 
    image_name: str,
    download_config: DownloadConfig=DownloadConfig(),
    log: Log=Log(),
    store_path: str="./",
    session: Optional[requests.Session]=None,
    progress_group: Optional[ProgressGroup]=None,
    thread_id: int=0,
    cookies: Optional[Union[Cookies, list, dict, str]]=Cookies(),
) -> tuple[int, int]:
    """
    Download image from url. Automatically separate Pixiv, Twitter, etc. image URLs from normal URLs.

    Args:
        url (str): The URL of the image to download.
        image_name (str): Name of image to be stored.
        download_config (image_crawler_utils.configs.DownloadConfig): Comprehensive download config.
        log (config.Log): The logger.
        store_path (str): Path of image to be stored.
        session (requests.Session): A session that may contain cookies.
        progress_group (image_crawler_utils.progress_bar.ProgressGroup): The Group of Progress bars to be displayed in.
        thread_id (int): Nth thread of image downloading.
        cookies (image_crawler_utils.Cookies, str, dict, list, None): If session parameter is empty, use cookies to create a session with cookies.

    Returns:
        (float, int): (the size of the downloaded image in bytes, thread_id)
    """

    if session is None:
        if not isinstance(cookies, Cookies):
            cookies = Cookies(cookies)
        session = requests.Session()
        session.cookies.update(cookies.cookies_dict)

    # Check whether it is special websites

    if "pximg.net" in url or "pixiv.net" in url:
        return pixiv_download_image_from_url(
            url=url,
            image_name=image_name,
            download_config=download_config,
            log=log,
            store_path=store_path,
            session=session,
            progress_group=progress_group,
            thread_id=thread_id,
        )
    elif ("x.com" in url or "twitter.com" in url) and "/status/" in url:
        return twitter_download_image_from_status(
            url=url,
            image_name=image_name,
            download_config=download_config,
            log=log,
            store_path=store_path,
            session=session,
            progress_group=progress_group,
            thread_id=thread_id,
        )

    if '.' not in image_name and '.' in url:
        ext = os.path.splitext(url)[1]
        edited_image_name = image_name + ext
    else:
        edited_image_name = image_name

    time.sleep(download_config.result_thread_delay)

    # Start downloading
    
    is_success, image_size = download_image(
        url=url,
        image_name=edited_image_name,
        download_config=download_config,
        log=log,
        store_path=store_path,
        session=session,
        progress_group=progress_group,
        thread_id=thread_id,
    )
    if is_success:
        return image_size, thread_id
    else:
        log.error(f'FAILED to download [repr.filename]{markup.escape(image_name)}[reset] from [repr.url]{markup.escape(url)}[reset]', extra={"markup": True})
        return 0, thread_id