diff --git a/test_twarc2.py b/test_twarc2.py index 46908391..bc9af51f 100644 --- a/test_twarc2.py +++ b/test_twarc2.py @@ -577,6 +577,54 @@ def test_liked_tweets(): break +def test_list_lookup(): + parks_list = T.list_lookup(715919216927322112) + assert "data" in parks_list + assert parks_list["data"]["name"] == "National-parks" + + +def test_list_members(): + response = list(T.list_members(715919216927322112)) + assert len(response) == 1 + members = twarc.expansions.flatten(response[0]) + assert len(members) == 8 + + +def test_list_followers(): + response = list(T.list_followers(715919216927322112)) + assert len(response) >= 2 + followers = twarc.expansions.flatten(response[0]) + assert len(followers) > 50 + + +def test_list_memberships(): + response = list(T.list_memberships("64flavors")) + assert len(response) == 1 + lists = twarc.expansions.flatten(response[0]) + assert len(lists) >= 9 + + +def test_followed_lists(): + response = list(T.followed_lists("nasa")) + assert len(response) == 1 + lists = twarc.expansions.flatten(response[0]) + assert len(lists) >= 1 + + +def test_owned_lists(): + response = list(T.owned_lists("nasa")) + assert len(response) >= 1 + lists = twarc.expansions.flatten(response[0]) + assert len(lists) >= 11 + + +def test_list_tweets(): + response = next(T.list_tweets(715919216927322112)) + assert "data" in response + tweets = twarc.expansions.flatten(response) + assert len(tweets) >= 90 + + def test_twarc_metadata(): # With metadata (default) diff --git a/twarc/client2.py b/twarc/client2.py index ed9ce21b..da025c81 100644 --- a/twarc/client2.py +++ b/twarc/client2.py @@ -21,6 +21,7 @@ MEDIA_FIELDS, POLL_FIELDS, PLACE_FIELDS, + LIST_FIELDS, ensure_flattened, ) from twarc.decorators2 import * @@ -156,6 +157,13 @@ def _prepare_params(self, **kwargs): else ",".join(PLACE_FIELDS) ) + if "list_fields" in kwargs: + params["list.fields"] = ( + kwargs.pop("list_fields") + if kwargs["list_fields"] + else ",".join(LIST_FIELDS) + ) + # Format start_time and end_time if "start_time" in kwargs: start_time = kwargs["start_time"] @@ -294,46 +302,323 @@ def _search( log.info(f"No more results for search {query}.") + def _lists( + self, + url, + expansions=None, + list_fields=None, + user_fields=None, + max_results=None, + pagination_token=None, + ): + """ + Paginates and returns lists + """ + params = self._prepare_params( + list_fields=list_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) + + if expansions: + params["expansions"] = "owner_id" + + for response in self.get_paginated(url, params=params): + # can return without 'data' if there are no results + if "data" in response: + yield response + else: + log.info(f"Retrieved an empty page of results of lists for {url}") + + def list_followers( + self, + list_id, + expansions=None, + tweet_fields=None, + user_fields=None, + max_results=None, + pagination_token=None, + ): + """ + Returns a list of users who are followers of the specified List. + + Calls [GET /2/lists/:id/followers](https://developer.twitter.com/en/docs/twitter-api/lists/list-follows/api-reference/get-lists-id-followers) + + Args: + list_id (int): ID of the list. + expansions enum (pinned_tweet_id): Expansions, include pinned tweets. + max_results (int): the maximum number of results to retrieve. Between 1 and 100. Default is 100. + + Returns: + generator[dict]: A generator, dict for each page of results. + + """ + params = self._prepare_params( + tweet_fields=tweet_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) + + if expansions: + params["expansions"] = "pinned_tweet_id" + + url = f"https://api.twitter.com/2/lists/{list_id}/followers" + return self.get_paginated(url, params=params) + + def list_members( + self, + list_id, + expansions=None, + tweet_fields=None, + user_fields=None, + max_results=None, + pagination_token=None, + ): + """ + Returns a list of users who are members of the specified List. + + Calls [GET /2/lists/:id/members](https://developer.twitter.com/en/docs/twitter-api/lists/list-members/api-reference/get-lists-id-members) + + Args: + list_id (int): ID of the list. + expansions enum (pinned_tweet_id): Expansions, include pinned tweets. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. + + Returns: + generator[dict]: A generator, dict for each page of results. + + """ + + params = self._prepare_params( + tweet_fields=tweet_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) + + if expansions: + params["expansions"] = "pinned_tweet_id" + + url = f"https://api.twitter.com/2/lists/{list_id}/members" + return self.get_paginated(url, params=params) + def list_memberships( self, - id, + user, + expansions=None, + list_fields=None, + user_fields=None, + max_results=None, + pagination_token=None, + ): + """ + Returns all Lists a specified user is a member of. + + Calls [GET /2/users/:id/list_memberships](https://developer.twitter.com/en/docs/twitter-api/lists/list-members/api-reference/get-users-id-list_memberships) + + Args: + user (int): ID of the user. + expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. + + Returns: + generator[dict]: A generator, dict for each page of results. + """ + user_id = self._ensure_user_id(user) + url = f"https://api.twitter.com/2/users/{user_id}/list_memberships" + + return self._lists( + url=url, + expansions=expansions, + list_fields=list_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) + + def owned_lists( + self, + user, expansions=None, list_fields=None, + user_fields=None, max_results=None, pagination_token=None, - user_field=None, ): """ - Function allows to get all the membership list from an specific user ID + Returns all Lists owned by the specified user. - Calls [GET /2/users/:id/list_memberships](https://developer.twitter.com/en/docs/twitter-api/lists/list-members/introduction) + Calls [GET /2/users/:id/owned_lists](https://developer.twitter.com/en/docs/twitter-api/lists/list-lookup/api-reference/get-users-id-owned_lists) Args: + user (int): ID of the user. expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. - list.fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. - user.fields( enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + + Returns: + generator[dict]: A generator, dict for each page of results. + """ + user_id = self._ensure_user_id(user) + url = f"https://api.twitter.com/2/users/{user_id}/owned_lists" + + return self._lists( + url=url, + expansions=expansions, + list_fields=list_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) + + def followed_lists( + self, + user, + expansions=None, + list_fields=None, + user_fields=None, + max_results=None, + pagination_token=None, + ): + """ + Returns all Lists a specified user follows. + + Calls [GET /2/users/:id/followed_lists](https://developer.twitter.com/en/docs/twitter-api/lists/list-follows/api-reference/get-users-id-followed_lists) + + Args: + user (int): ID of the user. + expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. + + Returns: + generator[dict]: A generator, dict for each page of results. """ - user_id = self._ensure_user_id(id) + user_id = self._ensure_user_id(user) + url = f"https://api.twitter.com/2/users/{user_id}/followed_lists" - url = f"https://api.twitter.com/2/users/{user_id}/list_memberships" + return self._lists( + url=url, + expansions=expansions, + list_fields=list_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) - params = self._prepare_params( + def pinned_lists( + self, + user, + expansions=None, + list_fields=None, + user_fields=None, + max_results=None, + pagination_token=None, + ): + """ + Returns the Lists pinned by the authenticating user. Does not work with a Bearer token. + + Calls [GET /2/users/:id/pinned_lists](https://developer.twitter.com/en/docs/twitter-api/lists/pinned-lists/api-reference/get-users-id-pinned_lists) + + Args: + user (int): ID of the user. + expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + max_results (int): The maximum number of results to be returned per page. This can be a number between 1 and 100. + pagination_token (string): Used to request the next page of results if all results weren't returned with the latest request, or to go back to the previous page of results. + + Returns: + generator[dict]: A generator, dict for each page of results. + """ + user_id = self._ensure_user_id(user) + url = f"https://api.twitter.com/2/users/{user_id}/pinned_lists" + + return self._lists( + url=url, + expansions=expansions, list_fields=list_fields, + user_fields=user_fields, max_results=max_results, pagination_token=pagination_token, - user_field=user_field, + ) + + def list_lookup(self, list_id, expansions=None, list_fields=None, user_fields=None): + """ + Returns the details of a specified List. + + Calls [GET /2/lists/:id](https://developer.twitter.com/en/docs/twitter-api/lists/list-lookup/api-reference/get-lists-id) + + Args: + list_id (int): ID of the list. + expansions enum (owner_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. + + Returns: + dict: Result dictionary. + """ + + params = self._prepare_params( + list_fields=list_fields, + user_fields=user_fields, ) if expansions: params["expansions"] = "owner_id" + url = f"https://api.twitter.com/2/lists/{list_id}" + return self.get(url, params=params).json() + + def list_tweets( + self, + list_id, + expansions=None, + tweet_fields=None, + user_fields=None, + max_results=None, + pagination_token=None, + ): + """ + Returns Tweets from the specified List. + + Calls [GET /2/lists/:id/tweets](https://developer.twitter.com/en/docs/twitter-api/lists/list-tweets/api-reference/get-lists-id-tweets) + + Args: + list_id (int): ID of the list. + expansions enum (author_id): enable you to request additional data objects that relate to the originally returned List. + list_fields enum (created_at, follower_count, member_count, private, description, owner_id): This fields parameter enables you to select which specific List fields will deliver with each returned List objects. + user_fields enum (created_at, description, entities, id, location, name, pinned_tweet_id, profile_image_url, protected, public_metrics, url, username, verified, withheld): + This fields parameter enables you to select which specific user fields will deliver with the users object. Specify the desired fields in a comma-separated list without spaces between commas and fields. - resp = self.get(url, params=params) - data = resp.json() + Returns: + generator[dict]: A generator, dict for each page of results. + """ + + params = self._prepare_params( + expansions=expansions, + tweet_fields=tweet_fields, + user_fields=user_fields, + max_results=max_results, + pagination_token=pagination_token, + ) - return data + url = f"https://api.twitter.com/2/lists/{list_id}/tweets" + return self.get_paginated(url, params=params) def search_recent( self, @@ -935,11 +1220,9 @@ def _timeline( if len(excludes) > 0: params["exclude"] = ",".join(excludes) - count = 0 for response in self.get_paginated(url, params=params): # can return without 'data' if there are no results if "data" in response: - count += len(response["data"]) yield response else: log.info(f"Retrieved an empty page of results for timeline {user_id}") @@ -1174,6 +1457,7 @@ def liked_tweets( Retrieve the tweets liked by the given user_id. """ + user_id = self._ensure_user_id(user_id) url = f"https://api.twitter.com/2/users/{user_id}/liked_tweets" params = self._prepare_params( @@ -1272,6 +1556,7 @@ def get_paginated(self, *args, **kwargs): yield page + # Todo: Maybe this should be backwards.. check for `next_token` endings = [ "mentions", "tweets", @@ -1280,6 +1565,11 @@ def get_paginated(self, *args, **kwargs): "liked_tweets", "liking_users", "retweeted_by", + "members", + "memberships", + "followed_lists", + "owned_lists", + "pinned_lists", ] # The search endpoints only take a next_token, but the timeline diff --git a/twarc/command2.py b/twarc/command2.py index e7534af5..88fcf4ca 100644 --- a/twarc/command2.py +++ b/twarc/command2.py @@ -35,6 +35,7 @@ MEDIA_FIELDS, POLL_FIELDS, PLACE_FIELDS, + LIST_FIELDS, ) from click import command, option, Option, UsageError from click_config_file import configuration_option @@ -810,26 +811,26 @@ def followers(T, user, outfile, limit, max_results, hide_progress): """ Get the followers for a given user. """ - count = 0 user_id = None - lookup_total = 0 + lookup_total = 1 - if outfile is not None and (outfile.name == ""): - hide_progress = True + hide_progress = True if (outfile.name == "") else hide_progress if not hide_progress: target_user = T._ensure_user(user) user_id = target_user["id"] lookup_total = target_user["public_metrics"]["followers_count"] - with tqdm(disable=hide_progress, total=lookup_total) as progress: - for result in T.followers(user, user_id=user_id, max_results=max_results): - _write(result, outfile) - count += len(result["data"]) - progress.update(len(result["data"])) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.followers, + user=user, + user_id=user_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + max_results=max_results, + ) @twarc2.command("following") @@ -854,26 +855,26 @@ def following(T, user, outfile, limit, max_results, hide_progress): """ Get the users that a given user is following. """ - count = 0 user_id = None - lookup_total = 0 + lookup_total = 1 - if outfile is not None and (outfile.name == ""): - hide_progress = True + hide_progress = True if (outfile.name == "") else hide_progress if not hide_progress: target_user = T._ensure_user(user) user_id = target_user["id"] lookup_total = target_user["public_metrics"]["following_count"] - with tqdm(disable=hide_progress, total=lookup_total) as progress: - for result in T.following(user, user_id=user_id, max_results=max_results): - _write(result, outfile) - count += len(result["data"]) - progress.update(len(result["data"])) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.following, + user=user, + user_id=user_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + max_results=max_results, + ) @twarc2.command("liking-users") @@ -901,14 +902,12 @@ def liking_users(T, tweet_id, outfile, limit, max_results, hide_progress): Note that the progress bar is approximate. """ - count = 0 - lookup_total = 0 + lookup_total = 1 if not re.match("^\d+$", str(tweet_id)): click.echo(click.style("Please enter a tweet ID", fg="red"), err=True) - if outfile is not None and (outfile.name == ""): - hide_progress = True + hide_progress = True if (outfile.name == "") else hide_progress if not hide_progress: # TODO: we could probably do this everytime, and avoid doing any lookups @@ -917,14 +916,15 @@ def liking_users(T, tweet_id, outfile, limit, max_results, hide_progress): if "data" in target_tweet: lookup_total = target_tweet["data"][0]["public_metrics"]["like_count"] - with tqdm(disable=hide_progress, total=lookup_total) as progress: - for result in T.liking_users(tweet_id, max_results=max_results): - _write(result, outfile) - count += len(result.get("data", [])) - progress.update(len(result.get("data", []))) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.liking_users, + tweet_id=tweet_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + max_results=max_results, + ) @twarc2.command("retweeted-by") @@ -952,14 +952,12 @@ def retweeted_by(T, tweet_id, outfile, limit, max_results, hide_progress): Note that the progress bar is approximate. """ - count = 0 lookup_total = 0 if not re.match("^\d+$", str(tweet_id)): click.echo(click.style("Please enter a tweet ID", fg="red"), err=True) - if outfile is not None and (outfile.name == ""): - hide_progress = True + hide_progress = True if (outfile.name == "") else hide_progress if not hide_progress: # TODO: we could probably do this everytime, and avoid doing any lookups @@ -968,14 +966,15 @@ def retweeted_by(T, tweet_id, outfile, limit, max_results, hide_progress): if "data" in target_tweet: lookup_total = target_tweet["data"][0]["public_metrics"]["retweet_count"] - with tqdm(disable=hide_progress, total=lookup_total) as progress: - for result in T.retweeted_by(tweet_id, max_results=max_results): - _write(result, outfile) - count += len(result.get("data", [])) - progress.update(len(result.get("data", []))) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.retweeed_by, + tweet_id=tweet_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + max_results=max_results, + ) @twarc2.command("liked-tweets") @@ -1003,26 +1002,18 @@ def liked_tweets(T, user_id, outfile, limit, max_results, hide_progress): Note that the progress bar is approximate. """ - count = 0 - lookup_total = 0 - - if not re.match("^\d+$", str(user_id)): - click.echo(click.style("Please enter a user ID", fg="red"), err=True) - - if outfile is not None and (outfile.name == ""): - hide_progress = True # NB: there doesn't appear to be anyway to get the total count of likes # a user has made, so the progress bar isn't very useful in this case... - - with tqdm(disable=hide_progress) as progress: - for result in T.liked_tweets(user_id, max_results=max_results): - _write(result, outfile) - count += len(result.get("data", [])) - progress.update(len(result.get("data", []))) - if limit != 0 and count >= limit: - progress.desc = f"Set --limit of {limit} reached" - break + _write_with_progress( + func=T.liked_tweets, + user_id=user_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + max_results=max_results, + ) @twarc2.command("sample") @@ -1227,7 +1218,7 @@ def mentions(T, user_id, outfile, hide_progress, **kwargs): with tqdm(disable=hide_progress, total=800) as progress: for result in T.mentions(user_id, **kwargs): _write(result, outfile) - progress.update(len(result["data"])) + progress.update(len(result.get("data", []))) else: if progress.n > 800: progress.desc = f"API limit reached with {progress.n} tweets" @@ -1950,6 +1941,356 @@ def stream(T, outfile, limit, **kwargs): log.info("archived %s", result["data"]["id"]) +@twarc2.group() +@click.pass_obj +def lists(T): + """ + Lists API support. + """ + pass + + +@lists.command("lookup") +@click.argument("list_id", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option("--pretty", is_flag=True, default=False, help="Pretty print the JSON") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.pass_obj +@cli_api_error +def lists_lookup(T, list_id, outfile, pretty, **kwargs): + """ + Look up a single list using its list id or URL. + """ + + kwargs = _process_expansions_shortcuts(kwargs) + + if "https" in list_id: + list_id = list_id.split("/")[-1] + if not re.match("^\d+$", list_id): + click.echo(click.style("Please enter a List URL or ID", fg="red"), err=True) + result = T.list_lookup(list_id, **kwargs) + _write(result, outfile, pretty=pretty) + + +@lists.command("bulk-lookup") +@command_line_input_output_file_arguments +@command_line_progressbar_option +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of fields about a list to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.pass_obj +@cli_api_error +def lists_bulk_lookup(T, infile, outfile, hide_progress, **kwargs): + """ + Look up the details of many lists given a file of IDs or URLs. + """ + + kwargs = _process_expansions_shortcuts(kwargs) + + with FileLineProgressBar(infile, outfile, disable=hide_progress) as progress: + for list_id in infile: + progress.update() + + if "https" in list_id: + list_id = list_id.split("/")[-1] + if not re.match("^\d+$", list_id): + click.echo( + click.style("Skipping invalid List URL or ID: {line}", fg="red"), + err=True, + ) + continue + result = T.list_lookup(list_id.strip(), **kwargs) + _write(result, outfile) + + +@lists.command("all") +@click.argument("user", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_all(T, user, outfile, limit, hide_progress, **kwargs): + """ + Get all Lists that a user created or is subscribed to. + + You can use the `owned` or `followed` command to get just the lists + created by the user, or just the lists followed by the user + respectively. + + """ + kwargs = _process_expansions_shortcuts(kwargs) + _write_with_progress( + func=T.owned_lists, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + **kwargs, + ) + _write_with_progress( + func=T.followed_lists, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + **kwargs, + ) + + +@lists.command("owned") +@click.argument("user", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_owned(T, user, outfile, limit, hide_progress, **kwargs): + """ + Get all Lists that a user created. + """ + kwargs = _process_expansions_shortcuts(kwargs) + _write_with_progress( + func=T.owned_lists, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + **kwargs, + ) + + +@lists.command("followed") +@click.argument("user", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_followed(T, user, outfile, limit, hide_progress, **kwargs): + """ + Get all Lists that a user is following. + """ + kwargs = _process_expansions_shortcuts(kwargs) + _write_with_progress( + func=T.followed_lists, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=1, + **kwargs, + ) + + +@lists.command("memberships") +@click.argument("user", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--list-fields", + default=",".join(LIST_FIELDS), + type=click.STRING, + is_eager=True, + help="Comma separated list of tweet fields to retrieve. Default is all available.", + callback=_validate_expansions, +) +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_memberships(T, user, outfile, limit, hide_progress, **kwargs): + """ + Get all Lists that a user is a member of. + """ + kwargs = _process_expansions_shortcuts(kwargs) + lookup_total = 1 + + hide_progress = True if (outfile.name == "") else hide_progress + + if not hide_progress: + target_user = T._ensure_user(user) + lookup_total = target_user["public_metrics"]["listed_count"] + + _write_with_progress( + func=T.list_memberships, + user=user, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + **kwargs, + ) + + +@lists.command("followers") +@click.argument("list-id", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_expansions_options +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_followers(T, list_id, outfile, limit, hide_progress, **kwargs): + """ + Get all Users that are following (subscribed) to a list. + """ + kwargs = _process_expansions_shortcuts(kwargs) + # Also remove media poll and place from kwargs, these are not valid for this endpoint: + kwargs.pop("media_fields", None) + kwargs.pop("poll_fields", None) + kwargs.pop("place_fields", None) + + _list = ensure_flattened(T.list_lookup(list_id))[-1] + list_id = _list["id"] + lookup_total = int(_list["follower_count"]) + + _write_with_progress( + func=T.list_followers, + list_id=list_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + **kwargs, + ) + + +@lists.command("members") +@click.argument("list-id", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--limit", + default=0, + help="Maximum number of lists to save. Default is all.", + type=int, +) +@command_line_expansions_options +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_members(T, list_id, outfile, limit, hide_progress, **kwargs): + """ + Get all Users that are members of a list. + """ + kwargs = _process_expansions_shortcuts(kwargs) + # Also remove media poll and place from kwargs, these are not valid for this endpoint: + kwargs.pop("media_fields", None) + kwargs.pop("poll_fields", None) + kwargs.pop("place_fields", None) + + _list = ensure_flattened(T.list_lookup(list_id))[-1] + list_id = _list["id"] + lookup_total = int(_list["member_count"]) + + _write_with_progress( + func=T.list_members, + list_id=list_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=lookup_total, + **kwargs, + ) + + +@lists.command("tweets") +@click.argument("list-id", type=str) +@click.argument("outfile", type=click.File("w"), default="-") +@click.option( + "--limit", + default=0, + help="Maximum number of tweets to save. Default and max is last 800.", + type=int, +) +@command_line_expansions_options +@command_line_progressbar_option +@click.pass_obj +@cli_api_error +def lists_tweets(T, list_id, outfile, limit, hide_progress, **kwargs): + """ + Get up to the most recent 800 tweets posted by members of a list. + """ + kwargs = _process_expansions_shortcuts(kwargs) + # Also remove media poll and place from kwargs, these are not valid for this endpoint: + kwargs.pop("media_fields", None) + kwargs.pop("poll_fields", None) + kwargs.pop("place_fields", None) + + _write_with_progress( + func=T.list_tweets, + list_id=list_id, + outfile=outfile, + limit=limit, + hide_progress=hide_progress, + progress_total=800, + **kwargs, + ) + + @twarc2.group() @click.pass_obj def stream_rules(T): @@ -2548,3 +2889,26 @@ def _error_str(errors): def _write(results, outfile, pretty=False): indent = 2 if pretty else None click.echo(json.dumps(results, indent=indent), file=outfile) + + +def _write_with_progress( + func, outfile, limit, hide_progress, progress_total=1, **kwargs +): + """ + Get results page by page and write them out with a progress bar + """ + count = 0 + hide_progress = True if (outfile.name == "") else hide_progress + + with tqdm(disable=hide_progress, total=progress_total) as progress: + results = func(**kwargs) + for result in results: + _write(result, outfile) + count += len(result.get("data", [])) + progress.update(len(result.get("data", []))) + if limit != 0 and count >= limit: + # Display message when stopped early + progress.desc = f"Set --limit of {limit} reached" + break + # Finish the progress bar + progress.update(progress.total - progress.n) diff --git a/twarc/expansions.py b/twarc/expansions.py index 9b5d9715..8d8e9e57 100644 --- a/twarc/expansions.py +++ b/twarc/expansions.py @@ -92,6 +92,15 @@ "place_type", ] +LIST_FIELDS = [ + "owner_id", + "created_at", + "member_count", + "follower_count", + "private", + "description", +] + def extract_includes(response, expansion, _id="id"): if "includes" in response and expansion in response["includes"]: