Source code for image_crawler_utils.stations.twitter.parser_assets.search_settings
import dataclasses
from typing import Optional, Union
import time
from image_crawler_utils.log import print_logging_msg
[docs]
@dataclasses.dataclass
class TwitterSearchSettings:
"""
TwitterSearchSettings controls advanced searching settings. It will append an string to the keyword string according to the settings in this class.
"""
from_users: Optional[Union[list[str], str]] = None
"""Select tweets sent by a certain user / a certain list of users."""
to_users: Optional[Union[list[str], str]] = None
"""Select tweets replying to a certain user / a certain list of users."""
mentioned_users: Optional[Union[list[str], str]] = None
"""Select tweets that mention a certain user / a certain list of users."""
including_replies: bool = True
"""Including reply tweets."""
only_replies: bool = False
"""Only including reply tweets. Works only if ``including_replies`` is set to :py:data:`True` (default)."""
including_links: bool = True
"""Including tweets that contain at least one link."""
only_links: bool = False
"""Only including tweets that contain at least one link. Works only if ``including_replies`` is set to :py:data:`True` (default)."""
including_media: bool = True
"""Including tweets that contain at least one media."""
only_media: bool = False
"""Only including tweets that contain at least one media. Works only if ``including_replies`` is set to :py:data:`True` (default)."""
min_reply_num: Optional[int] = None
"""Including tweets with more than ``min_reply_num`` replies."""
min_favorite_num: Optional[int] = None
"""Including tweets with more than ``min_favorite_num`` favorites."""
min_retweet_num: Optional[int] = None
"""Including tweets with more than ``min_retweet_num`` retweets."""
starting_date: str = ''
"""Tweets after this date. Must be \"YYYY-MM-DD\", \"YYYY.MM.DD\" or \"YYYY/MM/DD\" format."""
ending_date: str = ''
"""Tweets before this date. Must be \"YYYY-MM-DD\", \"YYYY.MM.DD\" or \"YYYY/MM/DD\" format."""
def __post_init__(self):
if isinstance(self.from_users, str):
self.from_users = [self.from_users]
if isinstance(self.to_users, str):
self.to_users = [self.to_users]
if isinstance(self.mentioned_users, str):
self.mentioned_users = [self.mentioned_users]
def time_format(s):
if len(s) == 0: # No restrictions
return s
# Try parsing time
new_s = s.replace('/', '-').replace('.', '-')
try:
time.strptime(new_s, "%Y-%m-%d")
return new_s
except:
print_logging_msg(f'{s} is not a valid "year-month-date" format! It will be ignored.', "warning")
return ''
self.starting_date = time_format(self.starting_date)
self.ending_date = time_format(self.ending_date)
[docs]
def build_search_appending_str(self, keyword_string: str):
"""
Building a searching appending suffix.
Args:
keyword_string (str): the constructed keyword string for Twitter.
"""
append_str = keyword_string + ' '
append_str += f" ({' OR '.join(['from:' + user for user in self.from_users])})" if self.from_users is not None else ''
append_str += f" ({' OR '.join(['to:' + user for user in self.to_users])})" if self.to_users is not None else ''
append_str += f" ({' OR '.join(['@' + user for user in self.mentioned_users])})" if self.mentioned_users is not None else ''
if not self.including_replies:
append_str += " -filter:replies"
elif self.only_replies:
append_str += " filter:replies"
if not self.including_links:
append_str += " -filter:links"
elif self.only_links:
append_str += " filter:links"
if not self.including_media:
append_str += " -filter:media"
elif self.only_media:
append_str += " filter:media"
append_str += f" min_replies:{self.min_reply_num}" if self.min_reply_num is not None else ''
append_str += f" min_faves:{self.min_favorite_num}" if self.min_favorite_num is not None else ''
append_str += f" min_retweets:{self.min_retweet_num}" if self.min_retweet_num is not None else ''
append_str += f" since:{self.starting_date}" if len(self.starting_date) > 0 else ''
append_str += f" until:{self.ending_date}" if len(self.ending_date) > 0 else ''
append_str = append_str.strip().replace(' ', ' ').strip()
return append_str