diff --git a/src/constants.rs b/src/constants.rs new file mode 100644 index 0000000..2a325ad --- /dev/null +++ b/src/constants.rs @@ -0,0 +1,11 @@ +pub const USER_AGENTS: [&str; 5] = [ + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0", + "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41", + "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", +]; + +pub fn random_useragent() -> String { + USER_AGENTS[rand::random::() % USER_AGENTS.len()].to_string() +} diff --git a/src/lib.rs b/src/lib.rs index 0e33b6f..d8c328e 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,24 +1,15 @@ -use std::net::SocketAddr; -use std::str::FromStr; -use std::sync::Arc; +mod constants; +pub mod token_generator; + use isahc::auth::{Authentication, Credentials}; -use isahc::{Body, Request, Response}; use isahc::http::HeaderMap; use isahc::prelude::*; -use log::debug; +use isahc::{Body, Request, Response}; +use std::str::FromStr; +use std::sync::Arc; -const USER_AGENTS: [&str; 5] = [ - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0) Gecko/20100101 Firefox/109.0", - "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_2) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.3 Safari/605.1.15", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41", - "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36", -]; - -/// this is what we look for to find the bearer token -const SEARCH_STRING: &str = "AAAAAAAAA"; -/// this is what we look for to find the main.*.js file -const MAIN_JS_SEARCH_STRING: &str = "main."; +#[allow(unused_imports)] +use log::*; struct Proxy { address: String, @@ -30,14 +21,6 @@ pub enum ProxyError { InvalidProxyFormat, } -#[derive(Clone, Debug)] -pub enum BearerError { - CouldntGetTwitterDotCom(isahc::Error), - CouldntFindMainJsUrl, - CouldntGetMainDotJs(isahc::Error), - CouldntFindBearerToken, -} - impl FromStr for Proxy { type Err = ProxyError; @@ -62,6 +45,13 @@ impl FromStr for Proxy { pub struct Client { http_client: Arc, proxy: Option>, + twitter_auth: Option, +} + +#[derive(Clone)] +pub struct TwitterAuth { + bearer: String, + guest_id: Option, } impl Default for Client { @@ -75,6 +65,7 @@ impl Client { Client { http_client: Arc::new(isahc::HttpClient::new().expect("failed to create http client")), proxy: None, + twitter_auth: None, } } @@ -83,6 +74,7 @@ impl Client { Client { http_client: Arc::new(isahc::HttpClient::new().expect("failed to create http client")), proxy: Some(Arc::new(proxy)), + twitter_auth: None, } } @@ -96,51 +88,17 @@ impl Client { } } let mut request = request.body(())?; - request.headers_mut().extend(headers.iter().map(|(k, v)| (k.clone(), v.clone()))); + request + .headers_mut() + .extend(headers.iter().map(|(k, v)| (k.clone(), v.clone()))); self.http_client.send(request) } - - fn get_twitter_bearer_token(&self) -> Result { - let url = "https://twitter.com"; - let mut headers = HeaderMap::new(); - let user_agent = USER_AGENTS[rand::random::() % USER_AGENTS.len()]; - headers.insert("user-agent", user_agent.parse().unwrap()); - - let mut response = self.get(url, &headers).map_err(BearerError::CouldntGetTwitterDotCom)?; - let body = response.text().unwrap(); - let mut mainjs_url = None; - // fixme! this is really fucking cursed - for line in body.lines() { - if line.contains(MAIN_JS_SEARCH_STRING) { - let main_js_start = line.find(MAIN_JS_SEARCH_STRING).unwrap(); - let main_js_end = line[main_js_start..].find('"').unwrap() + main_js_start; - let main_js_url_start = line[0..main_js_start].rfind('"').unwrap(); - let main_js_url = &line[main_js_url_start + 1..main_js_end]; - mainjs_url = Some(main_js_url.to_string()); - } - } - let mainjs_url = mainjs_url.ok_or(BearerError::CouldntFindMainJsUrl)?; - debug!("mainjs_url: {}", mainjs_url); - // now get the main.js file - let mut response = self.get(&mainjs_url, &headers).map_err(BearerError::CouldntGetMainDotJs)?; - let body = response.text().unwrap(); - let mut token = None; - for line in body.lines() { - if line.contains(SEARCH_STRING) { - let bearer_token_start = line.find(SEARCH_STRING).unwrap(); - let bearer_token_end = line[bearer_token_start..].find('"').unwrap() + bearer_token_start; - let bearer_token = &line[bearer_token_start..bearer_token_end]; - token = Some(bearer_token.to_string()); - } - } - - let token = token.ok_or(BearerError::CouldntFindBearerToken)?; - Ok(token) - } } #[cfg(test)] mod tests { + use crate::token_generator::get_bearer_and_guest_stuff; + use super::*; #[test] @@ -152,8 +110,12 @@ mod tests { #[test] fn get_bearer_token() { env_logger::init(); - let client = Client::new(); - let bearer_token = client.get_twitter_bearer_token(); - println!("{:?}", bearer_token); + let proxy = + std::env::var("TESTING_PROXY").unwrap_or_else(|_| panic!("use a proxy for testing!!!")); + let client = Client::new_with_proxy(proxy.as_str()); + let (bearer, guest_id, guest_token) = get_bearer_and_guest_stuff(&client).unwrap(); + debug!("bearer: {}", bearer); + debug!("guest_id: {}", guest_id); + debug!("guest_token: {}", guest_token); } } diff --git a/src/token_generator.rs b/src/token_generator.rs index e69de29..599bb1d 100644 --- a/src/token_generator.rs +++ b/src/token_generator.rs @@ -0,0 +1,105 @@ +use crate::constants::random_useragent; +use crate::Client; +use isahc::http::HeaderMap; +use isahc::ReadResponseExt; +use log::debug; + +/// this is what we look for to find the bearer token +const SEARCH_STRING: &str = "AAAAAAAAA"; +/// this is what we look for to get the guest_id string, it has a `=` at the end because we don't +/// wanna get something like guest_id_marketing accidentally +const GUEST_ID_SEARCH_STRING: &str = "guest_id="; +/// this is what we look for to find the guest token ("gt") +const GUEST_TOKEN_SEARCH_STRING: &str = "\"gt="; +/// this is what we look for to find the main.*.js file +const MAIN_JS_SEARCH_STRING: &str = "main."; + +#[derive(Clone, Debug)] +pub enum BearerError { + CouldntGetTwitterDotCom(isahc::Error), + CouldntFindMainJsUrl, + CouldntGetMainDotJs(isahc::Error), + CouldntGetGuestIdCookie, + CouldntFindBearerToken, + CouldntGetGuestToken, +} + +/// `let (bearer_token, guest_id, guest_token) = get_bearer_and_guest_stuff(...);` +pub fn get_bearer_and_guest_stuff( + client: &Client, +) -> Result<(String, String, String), BearerError> { + let url = "https://twitter.com"; + let mut headers = HeaderMap::new(); + let user_agent = random_useragent(); + headers.insert("user-agent", user_agent.parse().unwrap()); + + let mut response = client + .get(url, &headers) + .map_err(BearerError::CouldntGetTwitterDotCom)?; + let body_headers = response.headers(); + // get the set-cookie header for the guest id + let guest_id_header = body_headers + .get_all("set-cookie") + .iter() + .filter(|v| { + v.to_str() + .unwrap_or_default() + .starts_with(GUEST_ID_SEARCH_STRING) + }) + .next() + .ok_or(BearerError::CouldntGetGuestIdCookie)?; + let guest_id_cookie = guest_id_header + .to_str() + .map_err(|_| BearerError::CouldntGetGuestIdCookie)?; + let guest_id = guest_id_cookie[0..guest_id_cookie.find(';').unwrap_or(guest_id_cookie.len())] + .split_once('=') + .ok_or(BearerError::CouldntGetGuestIdCookie)? + .1 + .to_string(); + let body = response.text().unwrap(); + let mut mainjs_url = None; + let mut guest_token = None; + // fixme! this is really fucking cursed + for line in body.lines() { + if line.contains(MAIN_JS_SEARCH_STRING) { + let main_js_start = line.find(MAIN_JS_SEARCH_STRING).unwrap(); + let main_js_end = line[main_js_start..].find('"').unwrap() + main_js_start; + let main_js_url_start = line[0..main_js_start].rfind('"').unwrap(); + let main_js_url = &line[main_js_url_start + 1..main_js_end]; + mainjs_url = Some(main_js_url.to_string()); + } + } + for line in body.lines() { + if line.contains(GUEST_TOKEN_SEARCH_STRING) { + let guest_start = line.find(GUEST_TOKEN_SEARCH_STRING).unwrap(); + let guest_end = line[guest_start..].find(';').unwrap() + guest_start; + let gt = &line[guest_start + 1..guest_end]; + guest_token = Some(gt.to_string()); + } + } + let mainjs_url = mainjs_url.ok_or(BearerError::CouldntFindMainJsUrl)?; + let guest_token = guest_token + .ok_or(BearerError::CouldntGetGuestToken)? + .split_once('=') + .ok_or(BearerError::CouldntGetGuestToken)? + .1 + .to_string(); + // now get the main.js file + let mut response = client + .get(&mainjs_url, &headers) + .map_err(BearerError::CouldntGetMainDotJs)?; + let body = response.text().unwrap(); + let mut token = None; + for line in body.lines() { + if line.contains(SEARCH_STRING) { + let bearer_token_start = line.find(SEARCH_STRING).unwrap(); + let bearer_token_end = + line[bearer_token_start..].find('"').unwrap() + bearer_token_start; + let bearer_token = &line[bearer_token_start..bearer_token_end]; + token = Some(bearer_token.to_string()); + } + } + + let token = token.ok_or(BearerError::CouldntFindBearerToken)?; + Ok((token, guest_id, guest_token)) +}