Skip to content

Commit

Permalink
update proxy support (#85, #96, #118)
Browse files Browse the repository at this point in the history
  • Loading branch information
vladkens committed Feb 10, 2024
1 parent cd04ed9 commit fc5eaa8
Show file tree
Hide file tree
Showing 6 changed files with 76 additions and 25 deletions.
5 changes: 5 additions & 0 deletions _get_gql_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@
import httpx
from fake_useragent import UserAgent

"""
docker run --rm -p "3128:3128/tcp" -p "1080:1080/tcp" -e "PROXY_LOGIN=user" -e "PROXY_PASSWORD=pass" tarampampam/3proxy
docker run --rm -p "3129:3128/tcp" -p "1081:1080/tcp" tarampampam/3proxy
"""

client = httpx.Client(headers={"user-agent": UserAgent().chrome})

with open("./twscrape/api.py") as fp:
Expand Down
63 changes: 51 additions & 12 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,6 @@ async def main():
cookies = "abc=12; ct0=xyz" # or '{"abc": "12", "ct0": "xyz"}'
await api.pool.add_account("user3", "pass3", "[email protected]", "mail_pass3", cookies=cookies)

# add account with PROXY
proxy = "http://login:[email protected]:8080"
await api.pool.add_account("user4", "pass4", "[email protected]", "mail_pass4", proxy=proxy)

# API USAGE

# search (latest tab)
Expand All @@ -88,8 +84,10 @@ async def main():
# user info
user_id = 2244994945
await api.user_by_id(user_id) # User
await gather(api.followers(user_id, limit=20)) # list[User]
await gather(api.following(user_id, limit=20)) # list[User]
await gather(api.followers(user_id, limit=20)) # list[User]
await gather(api.verified_followers(user_id, limit=20)) # list[User]
await gather(api.subscriptions(user_id, limit=20)) # list[User]
await gather(api.user_tweets(user_id, limit=20)) # list[Tweet]
await gather(api.user_tweets_and_replies(user_id, limit=20)) # list[Tweet]
await gather(api.liked_tweets(user_id, limit=20)) # list[Tweet]
Expand Down Expand Up @@ -175,17 +173,19 @@ twscrape add_accounts ./order-12345.txt username:password:email:email_password:_

### Login accounts

_Note: If you added accounts with cookies, login not required._
_Note:_ If you added accounts with cookies, login not required.

Run:

```sh
twscrape login_accounts
```

`twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification map by IMAP protocol. After success login account cookies will be saved to db file for future use.
`twscrape` will start login flow for each new account. If X will ask to verify email and you provided `email_password` in `add_account`, then `twscrape` will try to receive verification code by IMAP protocol. After success login account cookies will be saved to db file for future use.

#### Manual email verefication
_Note:_ You can increase timeout for verification code with `TWS_WAIT_EMAIL_CODE` environment variable (default: `40`, in seconds).

#### Manual email verification

In case your email provider not support IMAP protocol (ProtonMail, Tutanota, etc) or IMAP is disabled in settings, you can enter email verification code manually. To do this run login command with `--manual` flag.

Expand All @@ -197,7 +197,6 @@ twscrape relogin user1 user2 --manual
twscrape relogin_failed --manual
```


### Get list of accounts and their statuses

```sh
Expand Down Expand Up @@ -242,8 +241,10 @@ twscrape retweeters TWEET_ID --limit=20
twscrape favoriters TWEET_ID --limit=20
twscrape user_by_id USER_ID
twscrape user_by_login USERNAME
twscrape followers USER_ID --limit=20
twscrape following USER_ID --limit=20
twscrape followers USER_ID --limit=20
twscrape verified_followers USER_ID --limit=20
twscrape subscriptions USER_ID --limit=20
twscrape user_tweets USER_ID --limit=20
twscrape user_tweets_and_replies USER_ID --limit=20
twscrape liked_tweets USER_ID --limit=20
Expand All @@ -261,9 +262,47 @@ By default, parsed data is returned. The original tweet responses can be retriev
twscrape search "elon mask lang:es" --limit=20 --raw
```

### Environment variables
## Proxy

There are few options to use proxies.

1. You can add proxy per account

```py
proxy = "http://login:[email protected]:8080"
await api.pool.add_account("user4", "pass4", "[email protected]", "mail_pass4", proxy=proxy)
```

2. You can use global proxy for all accounts

```py
proxy = "http://login:[email protected]:8080"
api = API(proxy=proxy)
doc = await api.user_by_login("elonmusk")
```

3. Use can set proxy with environemt variable `TWS_RPOXY`:

```sh
TWS_PROXY=socks5://user:[email protected]:1080 twscrape user_by_login elonmusk
```

4. You can change proxy any time like:

```py
api.proxy = "socks5://user:[email protected]:1080"
doc = await api.user_by_login("elonmusk") # new proxy will be used
api.proxy = None
doc = await api.user_by_login("elonmusk") # no proxy used
```

5. Proxy priorities

- `api.proxy` have top priority
- `env.proxy` will be used if `api.proxy` is None
- `acc.proxy` have lowest priotity

`LOGIN_CODE_TIMEOUT` - how long to wait for email code confirmation in seconds (default `40`)
So if you want to use proxy PER ACCOUNT, do NOT override proxy with env variable or by passing proxy param to API.

## Limitations

Expand Down
9 changes: 7 additions & 2 deletions twscrape/account.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
import sqlite3
from dataclasses import asdict, dataclass, field
from datetime import datetime
Expand Down Expand Up @@ -48,9 +49,13 @@ def to_rs(self):
rs["last_used"] = rs["last_used"].isoformat() if rs["last_used"] else None
return rs

def make_client(self) -> AsyncClient:
def make_client(self, proxy: str | None) -> AsyncClient:
proxies = [proxy, os.getenv("TWS_PROXY"), self.proxy]
proxies = [x for x in proxies if x is not None]
proxy = proxies[0] if proxies else None

transport = AsyncHTTPTransport(retries=2)
client = AsyncClient(proxies=self.proxy, follow_redirects=True, transport=transport)
client = AsyncClient(proxy=proxy, follow_redirects=True, transport=transport)

# saved from previous usage
client.cookies.update(self.cookies)
Expand Down
9 changes: 6 additions & 3 deletions twscrape/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,14 +52,17 @@ class API:
# Note: kv is variables, ft is features from original GQL request
pool: AccountsPool

def __init__(self, pool: AccountsPool | str | None = None, debug=False):
def __init__(
self, pool: AccountsPool | str | None = None, debug=False, proxy: str | None = None
):
if isinstance(pool, AccountsPool):
self.pool = pool
elif isinstance(pool, str):
self.pool = AccountsPool(pool)
else:
self.pool = AccountsPool()

self.proxy = proxy
self.debug = debug
if self.debug:
set_log_level("DEBUG")
Expand Down Expand Up @@ -89,7 +92,7 @@ async def _gql_items(
queue, cur, cnt, active = op.split("/")[-1], None, 0, True
kv, ft = {**kv}, {**GQL_FEATURES, **(ft or {})}

async with QueueClient(self.pool, queue, self.debug) as client:
async with QueueClient(self.pool, queue, self.debug, proxy=self.proxy) as client:
while active:
params = {"variables": kv, "features": ft}
if cur is not None:
Expand All @@ -115,7 +118,7 @@ async def _gql_items(
async def _gql_item(self, op: str, kv: dict, ft: dict | None = None):
ft = ft or {}
queue = op.split("/")[-1]
async with QueueClient(self.pool, queue, self.debug) as client:
async with QueueClient(self.pool, queue, self.debug, proxy=self.proxy) as client:
params = {"variables": {**kv}, "features": {**GQL_FEATURES, **ft}}
return await client.get(f"{GQL_URL}/{op}", params=encode_params(params))

Expand Down
10 changes: 4 additions & 6 deletions twscrape/imap.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,9 @@
from datetime import datetime

from .logger import logger
from .utils import int_or

_env = dict(os.environ)

LOGIN_CODE_TIMEOUT = int_or(_env, "LOGIN_CODE_TIMEOUT") or 30
TWS_WAIT_EMAIL_CODE = [os.getenv("TWS_WAIT_EMAIL_CODE"), os.getenv("LOGIN_CODE_TIMEOUT"), 30]
TWS_WAIT_EMAIL_CODE = [int(x) for x in TWS_WAIT_EMAIL_CODE if x is not None][0]


class EmailLoginError(Exception):
Expand Down Expand Up @@ -82,8 +80,8 @@ async def imap_get_email_code(
if code is not None:
return code

if LOGIN_CODE_TIMEOUT < time.time() - start_time:
raise EmailCodeTimeoutError(f"Email code timeout ({LOGIN_CODE_TIMEOUT} sec)")
if TWS_WAIT_EMAIL_CODE < time.time() - start_time:
raise EmailCodeTimeoutError(f"Email code timeout ({TWS_WAIT_EMAIL_CODE} sec)")

await asyncio.sleep(5)
except Exception as e:
Expand Down
5 changes: 3 additions & 2 deletions twscrape/queue_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,12 @@ def dump_rep(rep: Response):


class QueueClient:
def __init__(self, pool: AccountsPool, queue: str, debug=False):
def __init__(self, pool: AccountsPool, queue: str, debug=False, proxy: str | None = None):
self.pool = pool
self.queue = queue
self.debug = debug
self.ctx: Ctx | None = None
self.proxy = proxy

async def __aenter__(self):
await self._get_ctx()
Expand Down Expand Up @@ -104,7 +105,7 @@ async def _get_ctx(self):
if acc is None:
return None

clt = acc.make_client()
clt = acc.make_client(proxy=self.proxy)
self.ctx = Ctx(acc, clt)
return self.ctx

Expand Down

0 comments on commit fc5eaa8

Please sign in to comment.