Check-in [ea40110aa4]
Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | yaydl 0.17.2: updated dependencies, fixed the rest of the handlers which weren't using the proxy yet. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | trunk | release-0.17.2 |
Files: | files | file ages | folders |
SHA3-256: |
ea40110aa4d27f6dcdc858f480abdbdc |
User & Date: | Cthulhux 2025-01-21 19:35:27 |
Context
2025-01-21
| ||
19:35 | yaydl 0.17.2: updated dependencies, fixed the rest of the handlers which weren't using the proxy yet. Leaf check-in: ea40110aa4 user: Cthulhux tags: release-0.17.2, trunk | |
2024-11-19
| ||
01:43 | yaydl 0.17.1: fixed one more problem with VOE check-in: 1f161f7024 user: Cthulhux tags: release-0.17.1, trunk | |
Changes
Changes to Cargo.lock.
︙ | ︙ | |||
13 14 15 16 17 18 19 | [[package]] name = "adler2" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" | < < < < < < < < < < < < < | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | [[package]] name = "adler2" version = "2.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" [[package]] name = "aho-corasick" version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" dependencies = [ "memchr", |
︙ | ︙ | |||
357 358 359 360 361 362 363 | checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" dependencies = [ "dtoa", ] [[package]] name = "ego-tree" | | | | 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 | checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" dependencies = [ "dtoa", ] [[package]] name = "ego-tree" version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b2972feb8dffe7bc8c5463b1dacda1b0dfbed3710e50f977d965429692d74cd8" [[package]] name = "encode_unicode" version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a357d28ed41a50f9c765dbfe56cbc04a64e53e5fc58ba79fbc34c10ef3df831f" |
︙ | ︙ | |||
1357 1358 1359 1360 1361 1362 1363 | "libc", "linux-raw-sys", "windows-sys 0.52.0", ] [[package]] name = "rustls" | | | | 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 | "libc", "linux-raw-sys", "windows-sys 0.52.0", ] [[package]] name = "rustls" version = "0.23.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f287924602bf649d949c63dc8ac8b235fa5387d394020705b80c4eb597ce5b8" dependencies = [ "log", "once_cell", "ring", "rustls-pki-types", "rustls-webpki", "subtle", |
︙ | ︙ | |||
1410 1411 1412 1413 1414 1415 1416 | name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scraper" | | | < | 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 | name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "scraper" version = "0.22.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cc3d051b884f40e309de6c149734eab57aa8cc1347992710dc80bcc1c2194c15" dependencies = [ "cssparser", "ego-tree", "getopts", "html5ever", "precomputed-hash", "selectors", "tendril", |
︙ | ︙ | |||
1799 1800 1801 1802 1803 1804 1805 | name = "untrusted" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" | | | | 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 | name = "untrusted" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" [[package]] name = "ureq" version = "2.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "02d1a66277ed75f640d608235660df48c8e3c19f3b4edb6a263315626cc3c01d" dependencies = [ "base64 0.22.1", "flate2", "log", "once_cell", "rustls", "rustls-pki-types", |
︙ | ︙ | |||
2096 2097 2098 2099 2100 2101 2102 | name = "writeable" version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" [[package]] name = "yaydl" | | | 2082 2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 | name = "writeable" version = "0.5.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" [[package]] name = "yaydl" version = "0.17.2" dependencies = [ "anyhow", "cienli", "clap", "env_proxy", "fantoccini", "indicatif", |
︙ | ︙ |
Changes to Cargo.toml.
1 2 3 | [package] name = "yaydl" description = "yet another youtube (and more) down loader" | | | | | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | [package] name = "yaydl" description = "yet another youtube (and more) down loader" version = "0.17.2" authors = ["Cthulhux <git@tuxproject.de>"] edition = "2021" license = "CDDL-1.0" repository = "https://code.rosaelefanten.org/yaydl" categories = ["command-line-utilities"] keywords = ["youtube", "downloading", "video"] [dependencies] anyhow = "1.0" cienli = "0.3" clap = { version = "4.5", features = ["derive"] } env_proxy = "0.4" fantoccini = "0.21" indicatif = "0.17" inventory = "0.3" m3u8-rs = "6.0" nom = "7.1" regex = "1.11" scraper = "0.22" serde_json = "1.0" tokio = { version = "1", features = ["rt"] } ureq = { version = "2.12", features = ["json", "socks-proxy"] } url = "2.5" urlencoding = "2.1" [profile.release] lto = true strip = true |
Changes to src/handlers/spankbang.rs.
︙ | ︙ | |||
38 39 40 41 42 43 44 45 | const MAX_FILENAME_LENGTH: usize = 142; // filename is based on url path description string fn get_video_info(video: &mut VIDEO, url: &str) -> Result<bool> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let local_url = url.to_owned(); video.info.push_str( | > > > > > > > > > > > > | | 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 | const MAX_FILENAME_LENGTH: usize = 142; // filename is based on url path description string fn get_video_info(video: &mut VIDEO, url: &str) -> Result<bool> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let local_url = url.to_owned(); // Initialize the agent: let mut agent = ureq::agent(); let url_p = Url::parse(&local_url)?; if let Some(env_proxy) = env_proxy::for_url(&url_p).host_port() { // Use a proxy: let proxy = ureq::Proxy::new(format!("{}:{}", env_proxy.0, env_proxy.1)); agent = ureq::AgentBuilder::new().proxy(proxy.unwrap()).build(); } video.info.push_str( agent .get(&local_url) .call() .expect("Could not go to the url") .into_string() .expect("Could not read the site source") .as_str(), ); } |
︙ | ︙ |
Changes to src/handlers/vidoza.rs.
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 30 | // - Vidoza handler - use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use scraper::{Html, Selector}; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. | > > > > > > > > > > > | | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | // - Vidoza handler - use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use scraper::{Html, Selector}; use url::Url; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. // Initialize the agent: let mut agent = ureq::agent(); let url_p = Url::parse(url)?; if let Some(env_proxy) = env_proxy::for_url(&url_p).host_port() { // Use a proxy: let proxy = ureq::Proxy::new(format!("{}:{}", env_proxy.0, env_proxy.1)); agent = ureq::AgentBuilder::new().proxy(proxy.unwrap()).build(); } let req = agent.get(url).call()?; let body = req.into_string()?; video.info = body; } // Return it: let d = Html::parse_document(&video.info); |
︙ | ︙ |
Changes to src/handlers/vimeo.rs.
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 | // - Vimeo handler - use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use serde_json::Value; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Value> { if video.info.is_empty() { // We need to fetch the video information first. // Those are hidden behing a config file defined in the page source code. // Search for: window.vimeo.clip_page_config.player = {"config_url":"(.+?)" | > > > > > > > > > > | | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | // - Vimeo handler - use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use serde_json::Value; use url::Url; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Value> { if video.info.is_empty() { // We need to fetch the video information first. // Those are hidden behing a config file defined in the page source code. // Search for: window.vimeo.clip_page_config.player = {"config_url":"(.+?)" let mut agent = ureq::agent(); let url_p = Url::parse(url)?; if let Some(env_proxy) = env_proxy::for_url(&url_p).host_port() { // Use a proxy: let proxy = ureq::Proxy::new(format!("{}:{}", env_proxy.0, env_proxy.1)); agent = ureq::AgentBuilder::new().proxy(proxy.unwrap()).build(); } let req = agent.get(url).call()?; let body = req.into_string()?; let re = Regex::new("window.vimeo.clip_page_config.player = .\"config_url\":\"(?P<URL>.+?)\"") .unwrap(); let search = re.captures(&body).unwrap(); // While we're grepping the source code: Vimeo also hides |
︙ | ︙ | |||
49 50 51 52 53 54 55 | let video_info_url = search .name("URL") .map_or("", |u| u.as_str()) .replace("\\", ""); // The "config_url" body is a JSON structure. // Grab and store it: | | | 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 | let video_info_url = search .name("URL") .map_or("", |u| u.as_str()) .replace("\\", ""); // The "config_url" body is a JSON structure. // Grab and store it: let config_req = agent.get(&video_info_url).call()?; let config_body = config_req.into_string()?; video.info.push_str(config_body.as_str()); } // Return it: let v: Value = serde_json::from_str(&video.info)?; Ok(v) |
︙ | ︙ |
Changes to src/handlers/vivo.rs.
︙ | ︙ | |||
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | use crate::definitions::SiteDefinition; use anyhow::Result; use cienli::ciphers::rot::{Rot, RotType}; use regex::Regex; use scraper::{Html, Selector}; use urlencoding::decode; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. | > > > > > > > > > > > | | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | use crate::definitions::SiteDefinition; use anyhow::Result; use cienli::ciphers::rot::{Rot, RotType}; use regex::Regex; use scraper::{Html, Selector}; use url::Url; use urlencoding::decode; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. // Initialize the agent: let mut agent = ureq::agent(); let url_p = Url::parse(&url)?; if let Some(env_proxy) = env_proxy::for_url(&url_p).host_port() { // Use a proxy: let proxy = ureq::Proxy::new(format!("{}:{}", env_proxy.0, env_proxy.1)); agent = ureq::AgentBuilder::new().proxy(proxy.unwrap()).build(); } let req = agent.get(&url).call()?; let body = req.into_string()?; video.info.push_str(body.as_str()); } let d = Html::parse_document(&video.info); Ok(d) } |
︙ | ︙ |
Changes to src/handlers/voe.rs.
︙ | ︙ | |||
17 18 19 20 21 22 23 24 25 26 27 28 | // - VOE handler - use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use scraper::{Html, Selector}; use crate::VIDEO; fn resolve_js_redirect(url: &str) -> String { // VOE tends to redirect. Find the actual target URL: | > > | > > > > > > > > > > > > > > > > > > > | > > > > > > > > > | | 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 | // - VOE handler - use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use scraper::{Html, Selector}; use url::Url; use crate::VIDEO; fn resolve_js_redirect(url: &str) -> String { // VOE tends to redirect. Find the actual target URL: let static_url = url.to_owned(); let mut agent = ureq::agent(); let url_p = Url::parse(&static_url).unwrap(); if let Some(env_proxy) = env_proxy::for_url(&url_p).host_port() { // Use a proxy: let proxy = ureq::Proxy::new(format!("{}:{}", env_proxy.0, env_proxy.1)); agent = ureq::AgentBuilder::new().proxy(proxy.unwrap()).build(); } let req = agent.get(&static_url).call().expect("could not go to the site URL"); let body = req.into_string().unwrap(); let re_redirect = Regex::new(r"window.location.href = '(?P<URL>.*?)'").unwrap(); if !re_redirect.is_match(&body) { // No redirect String::from(url) } else { // A redirect... let captures = re_redirect.captures(body.as_str()).unwrap(); let returnval = String::from(captures.name("URL").map_or("", |u| u.as_str())); returnval } } fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let mut agent = ureq::agent(); let url_p = Url::parse(url)?; if let Some(env_proxy) = env_proxy::for_url(&url_p).host_port() { // Use a proxy: let proxy = ureq::Proxy::new(format!("{}:{}", env_proxy.0, env_proxy.1)); agent = ureq::AgentBuilder::new().proxy(proxy.unwrap()).build(); } let req = agent.get(&resolve_js_redirect(url)).call()?; let body = req.into_string()?; video.info = body; } // Return it: let d = Html::parse_document(&video.info); Ok(d) } // Implement the site definition: struct VoeHandler; impl SiteDefinition for VoeHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { // We need to catch both VOE.sx and whatever redirectors it uses. // As main.rs hasn't built the VIDEO struct here yet, we'll parse // the resulting website a first time... let mut agent = ureq::agent(); let url_p = Url::parse(url).unwrap(); if let Some(env_proxy) = env_proxy::for_url(&url_p).host_port() { // Use a proxy: let proxy = ureq::Proxy::new(format!("{}:{}", env_proxy.0, env_proxy.1)); agent = ureq::AgentBuilder::new().proxy(proxy.unwrap()).build(); } let req = agent.get(&resolve_js_redirect(&url)).call().unwrap(); let body = req.into_string().unwrap(); // If the body contains a VOEPlayer, we're in it. Regex::new(r"VOEPlayer").unwrap().is_match(&body) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { |
︙ | ︙ |