Check-in [ebb5df12f3]
Many hyperlinks are disabled.
Use anonymous login
to enable hyperlinks.
Overview
Comment: | yaydl 0.11.1: getting rid of unsafe { }. |
---|---|
Downloads: | Tarball | ZIP archive | SQL archive |
Timelines: | family | ancestors | descendants | both | trunk | release-0.11.1 |
Files: | files | file ages | folders |
SHA3-256: |
ebb5df12f307daf79d2816234e8de3e9 |
User & Date: | Cthulhux 2022-07-28 01:52:38 |
Context
2022-07-29
| ||
00:36 | yaydl 0.11.2: Fixed youtube regex error #11 * Added .gitignore check-in: 2334a25c2d user: rhydon tags: trunk, release-0.11.2 | |
2022-07-28
| ||
01:52 | yaydl 0.11.1: getting rid of unsafe { }. check-in: ebb5df12f3 user: Cthulhux tags: trunk, release-0.11.1 | |
2022-07-21
| ||
00:34 | yaydl 0.11: Added spankbang handler (cheers, @egdv from GitHub), updated dependencies. check-in: 1bdfe0346e user: Cthulhux tags: trunk, release-0.11.0 | |
Changes
Changes to CODE_OF_CONDUCT.md.
︙ | ︙ |
Changes to Cargo.lock.
︙ | ︙ | |||
1921 1922 1923 1924 1925 1926 1927 | name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" [[package]] name = "yaydl" | | | 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 | name = "windows_x86_64_msvc" version = "0.36.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680" [[package]] name = "yaydl" version = "0.11.1" dependencies = [ "anyhow", "cienli", "clap", "fantoccini", "indicatif", "inventory", |
︙ | ︙ |
Changes to Cargo.toml.
1 2 3 | [package] name = "yaydl" description = "yet another youtube (and more) down loader" | | | 1 2 3 4 5 6 7 8 9 10 11 | [package] name = "yaydl" description = "yet another youtube (and more) down loader" version = "0.11.1" authors = ["Cthulhux <git@tuxproject.de>"] edition = "2021" license = "CDDL-1.0" repository = "https://code.rosaelefanten.org/yaydl" categories = ["command-line-utilities"] keywords = ["youtube", "downloading", "video"] |
︙ | ︙ |
Changes to src/definitions.rs.
︙ | ︙ | |||
13 14 15 16 17 18 19 20 21 22 23 24 25 26 | * distribution. */ // Yet Another Youtube Down Loader // - definitions.rs file - use anyhow::Result; // Define the public interface for site definitions: pub trait SiteDefinition { // true, if this site can handle <url>. fn can_handle_url<'a>(&'a self, url: &'a str) -> bool; // true, if the video exists. | > > | > > > > > | > > > > > > > | 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 | * distribution. */ // Yet Another Youtube Down Loader // - definitions.rs file - use anyhow::Result; use crate::VIDEO; // Define the public interface for site definitions: pub trait SiteDefinition { // true, if this site can handle <url>. fn can_handle_url<'a>(&'a self, url: &'a str) -> bool; // true, if the video exists. fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<bool>; // true, if the URL is a playlist. fn is_playlist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool>; // returns the title of a video. fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<String>; // returns the download URL of a video or playlist. fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, onlyaudio: bool, ) -> Result<String>; // returns the file extension of the video (e.g. "mp4"). fn find_video_file_extension<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, onlyaudio: bool, ) -> Result<String>; // returns the name of the site (e.g. "YouTube"). fn display_name<'a>(&'a self) -> String; // true, if this site needs a web driver. fn web_driver_required<'a>(&'a self) -> bool; } |
Changes to src/handlers/porndoe.rs.
︙ | ︙ | |||
20 21 22 23 24 25 26 | use anyhow::{anyhow, Result}; use fantoccini::ClientBuilder; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; | | | | | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 | use anyhow::{anyhow, Result}; use fantoccini::ClientBuilder; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let local_url = url.to_owned(); let rt = runtime::Builder::new_current_thread() .enable_time() .enable_io() |
︙ | ︙ | |||
52 53 54 55 56 57 58 59 | "document.getElementsByClassName('age-btn')[0].click();", vec![], ) .await .expect("could not dismiss the age gate"); let body = c.source().await.expect("could not read the site source"); c.close_window().await.expect("could not close the window"); | > < < < < | | | > > | > > > | | | | | | | | | < > < | > | | | | | | < | | > > | > > | < > | 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 | "document.getElementsByClassName('age-btn')[0].click();", vec![], ) .await .expect("could not dismiss the age gate"); let body = c.source().await.expect("could not read the site source"); video.info.push_str(body.as_str()); c.close_window().await.expect("could not close the window"); }); } Ok(true) } // Implement the site definition: struct PornDoeHandler; impl SiteDefinition for PornDoeHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"porndoe.com/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // PornDoe has no playlists. Ok(false) } fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<String> { let _not_used = get_video_info(video, url, webdriver_port)?; let video_info_html = Html::parse_document(video.info.as_str()); let h1_selector = Selector::parse("h1.-heading").unwrap(); let text = video_info_html.select(&h1_selector).next(); let result = match text { Some(txt) => txt.text().collect(), None => return Err(anyhow!("Could not extract the video title.")), }; Ok(result) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { let _not_used = get_video_info(video, url, webdriver_port)?; let video_info_html = Html::parse_document(video.info.as_str()); let url_selector = Selector::parse(r#"meta[itemprop="contentUrl"]"#).unwrap(); let url_elem = video_info_html.select(&url_selector).next().unwrap(); let url_contents = url_elem.value().attr("content").unwrap(); Ok(url_contents.to_string()) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<bool> { let _not_used = get_video_info(video, url, webdriver_port); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "PornDoe".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) } |
︙ | ︙ |
Changes to src/handlers/spankbang.rs.
︙ | ︙ | |||
16 17 18 19 20 21 22 | // specific url path format for this site // https://spankbang.com/5-char-id/video/description+seprated+by+plus+char // // example: https://spankbang.com/12345/video/description+for+this+video // base filename: description_for_this_video lenght maximum is 142 // filename: description_for_this_video-12345.mp4 // | | > > < < | | > < < < < | | > > > > > > < | > | | | | | | < | | > > | > > | < > | 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 | // specific url path format for this site // https://spankbang.com/5-char-id/video/description+seprated+by+plus+char // // example: https://spankbang.com/12345/video/description+for+this+video // base filename: description_for_this_video lenght maximum is 142 // filename: description_for_this_video-12345.mp4 // // test url: https://spankbang.com/70841/video/nikki+fritz // // Yet Another Youtube Down Loader // - Spankbang handler - use crate::definitions::SiteDefinition; use anyhow::Result; use fantoccini::ClientBuilder; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; use url::Url; use crate::VIDEO; const MAX_FILENAME_LENGTH: usize = 142; // filename is based on url path description string fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let local_url = url.to_owned(); let rt = runtime::Builder::new_current_thread() .enable_time() .enable_io() .build() .unwrap(); rt.block_on(async move { let webdriver_url = format!("http://localhost:{}", webdriver_port); let c = ClientBuilder::native() .connect(&webdriver_url) .await .expect("failed to connect to web driver"); c.goto(&local_url).await.expect("could not go to the URL"); let body = c.source().await.expect("could not read the site source"); video.info.push_str(body.as_str()); c.close_window().await.expect("could not close the window"); }); } Ok(true) } // Implement the site definition: struct SpankbangHandler; impl SiteDefinition for SpankbangHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"spankbang.com/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // Generic has playlists. Ok(false) } fn find_video_title<'a>( &'a self, _video: &mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<String> { // generates a valid base filename from url path for linux and windows // video title is less reliable to generate base filename for this particular site Ok(url_filename(url.to_string())) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { let _not_used = get_video_info(video, url, webdriver_port); let video_info_html = Html::parse_document(&video.info); let url_selector = Selector::parse(r#"source[type="video/mp4"]"#).unwrap(); let url_elem = video_info_html.select(&url_selector).next().unwrap(); let url_contents = url_elem.value().attr("src").unwrap(); Ok(url_contents.to_string()) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<bool> { let _not_used = get_video_info(video, url, webdriver_port); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "Spankbang".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) } |
︙ | ︙ |
Changes to src/handlers/vidoza.rs.
︙ | ︙ | |||
18 19 20 21 22 23 24 | use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use scraper::{Html, Selector}; | | | | | | | | > > > > | | | | | | < > < | | | | | | | < | | > > > > | | < > | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use scraper::{Html, Selector}; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let req = ureq::get(&url).call()?; let body = req.into_string()?; video.info = body; } // Return it: let d = Html::parse_document(&video.info); Ok(d) } // Implement the site definition: struct VidozaHandler; impl SiteDefinition for VidozaHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"vidoza.net/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // Vidoza does not seem to have playlists? Ok(false) } fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<String> { let video_info = get_video_info(video, url)?; // Currently, there only is one <H1> on Vidoza. Good for us. let h1_selector = Selector::parse("h1").unwrap(); let text = video_info.select(&h1_selector).next().unwrap(); let result = text.text().collect(); Ok(result) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { let video_info = get_video_info(video, url)?; let url_selector = Selector::parse("source").unwrap(); let url_elem = video_info.select(&url_selector).next().unwrap(); let url_contents = url_elem.value().attr("src").unwrap(); Ok(url_contents.to_string()) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<bool> { let _video_info = get_video_info(video, url); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "Vidoza".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) } |
︙ | ︙ |
Changes to src/handlers/vimeo.rs.
︙ | ︙ | |||
18 19 20 21 22 23 24 | use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use serde_json::Value; | | < | | | | | | | > > > > | | < > < | | | | | | | | | | | | | | | | | | | | < | | > > > > | | < > | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 | use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use serde_json::Value; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Value> { if video.info.is_empty() { // We need to fetch the video information first. // Those are hidden behing a config file defined in the page source code. // Search for: window.vimeo.clip_page_config.player = {"config_url":"(.+?)" let req = ureq::get(url).call()?; let body = req.into_string()?; let re = Regex::new("window.vimeo.clip_page_config.player = .\"config_url\":\"(?P<URL>.+?)\"") .unwrap(); let search = re.captures(&body).unwrap(); // While we're grepping the source code: Vimeo also hides // the video title here. let title_re = Regex::new("<meta property=\"og:title\" content=\"(?P<TITLE>.+?)\"").unwrap(); let title_search = title_re.captures(&body).unwrap(); let video_title = title_search.name("TITLE").map_or("", |t| t.as_str()); video.title = video_title.to_string(); // If yaydl stops here, the URL is invalid. // TODO: That should be more obvious to the user. let video_info_url = search .name("URL") .map_or("", |u| u.as_str()) .replace("\\", ""); // The "config_url" body is a JSON structure. // Grab and store it: let config_req = ureq::get(&video_info_url).call()?; let config_body = config_req.into_string()?; video.info.push_str(config_body.as_str()); } // Return it: let v: Value = serde_json::from_str(&video.info)?; Ok(v) } // Implement the site definition: struct VimeoHandler; impl SiteDefinition for VimeoHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"(?:www\.)?vimeo.com/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // Vimeo seems to have no playlists? Ok(false) } fn find_video_title<'a>( &'a self, video: &mut VIDEO, _url: &'a str, _webdriver_port: u16, ) -> Result<String> { let ret = &video.title; Ok(ret.to_string()) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { let id_regex = Regex::new(r"(?:vimeo.com/)(.*$)").unwrap(); let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str(); let video_info = get_video_info(video, id)?; let video_info_streams_progressive = match video_info["request"]["files"]["progressive"].as_array() { None => return Ok("".to_string()), Some(streams) => streams, }; // Vimeo makes it easy for us, as the size grows with the quality. // Thus, we can just take the largest width here. let mut url = ""; let mut width = 0u64; for stream in video_info_streams_progressive.iter() { let this_width = stream["width"].as_u64().unwrap_or(0); if this_width > width { width = this_width; url = stream["url"].as_str().unwrap(); } } Ok(url.to_string()) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<bool> { let _video_info = get_video_info(video, url); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "Vimeo".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) } |
︙ | ︙ |
Changes to src/handlers/vivo.rs.
︙ | ︙ | |||
20 21 22 23 24 25 26 | use anyhow::Result; use cienli::ciphers::rot::{Rot, RotType}; use regex::Regex; use scraper::{Html, Selector}; use urlencoding::decode; | | | | | < < | | | > > > > | | | | | < > < | | | | | | | | | | | | | < | | > > > > | | < > | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 | use anyhow::Result; use cienli::ciphers::rot::{Rot, RotType}; use regex::Regex; use scraper::{Html, Selector}; use urlencoding::decode; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let req = ureq::get(&url).call()?; let body = req.into_string()?; video.info.push_str(body.as_str()); } let d = Html::parse_document(&video.info); Ok(d) } // Implement the site definition: struct VivoHandler; impl SiteDefinition for VivoHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"vivo.sx/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // Vivo has no playlists. Ok(false) } fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<String> { let video_info = get_video_info(video, url)?; let title_selector = Selector::parse("div.stream-content").unwrap(); let title_elem = video_info.select(&title_selector).next().unwrap(); let title_contents = title_elem.value().attr("data-name").unwrap(); Ok(title_contents.to_string()) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { // VIVO displays the stream URL only after executing JavaScript. // It is buried inside the source code and ROT47-encrypted. Bah... :-) let src_re = Regex::new("source: '(?P<SOURCE>.+?)',").unwrap(); let src_search = src_re.captures(&video.info).unwrap(); let video_src = src_search.name("SOURCE").map_or("", |t| t.as_str()); // URL decoding: let url_decoded = match decode(video_src) { Ok(u) => u, _ => unreachable!(), }; // un-ROT47: let unrotated = Rot::new(&url_decoded, RotType::Rot47); Ok(unrotated.decipher().to_string()) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<bool> { let _video_info = get_video_info(video, url); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "VIVO".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) } |
︙ | ︙ |
Changes to src/handlers/voe.rs.
︙ | ︙ | |||
18 19 20 21 22 23 24 | use crate::definitions::SiteDefinition; use anyhow::{anyhow, Result}; use regex::Regex; use scraper::{Html, Selector}; | | | | | | | | > > > > | | | | | | | | | | < > < | | | | | | | < | | > > > > | | < > | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | use crate::definitions::SiteDefinition; use anyhow::{anyhow, Result}; use regex::Regex; use scraper::{Html, Selector}; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let req = ureq::get(&url).call()?; let body = req.into_string()?; video.info = body; } // Return it: let d = Html::parse_document(&video.info); Ok(d) } // Implement the site definition: struct VoeHandler; impl SiteDefinition for VoeHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"(?:\.)?voe.sx/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // TODO: Does VOE still have playlists? Ok(false) } fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<String> { let video_info = get_video_info(video, url)?; let h1_selector = Selector::parse("h1.mt-1").unwrap(); let text = video_info.select(&h1_selector).next(); let result = match text { Some(txt) => txt.text().collect(), None => return Err(anyhow!("Erroneous video site - maybe embed-only?")), }; Ok(result) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { let _video_info = get_video_info(video, url)?; let url_re = Regex::new("sources: ..src: '(?P<URL>.+?)'").unwrap(); let url_search = url_re.captures(&video.info).unwrap(); let video_url = url_search.name("URL").map_or("", |u| u.as_str()); Ok(video_url.to_string()) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<bool> { let _video_info = get_video_info(video, url); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "Voe".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) } |
︙ | ︙ |
Changes to src/handlers/watchmdh.rs.
︙ | ︙ | |||
20 21 22 23 24 25 26 | use anyhow::Result; use fantoccini::ClientBuilder; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; | | | | > < < < < | | | > > | > > > | | | | < > < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | < | | > > > > | | < > | 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 | use anyhow::Result; use fantoccini::ClientBuilder; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let local_url = url.to_owned(); let rt = runtime::Builder::new_current_thread() .enable_time() .enable_io() .build() .unwrap(); rt.block_on(async move { let webdriver_url = format!("http://localhost:{}", webdriver_port); let c = ClientBuilder::native() .connect(&webdriver_url) .await .expect("failed to connect to web driver"); c.goto(&local_url).await.expect("could not go to the URL"); let body = c.source().await.expect("could not read the site source"); video.info.push_str(body.as_str()); c.close_window().await.expect("could not close the window"); }); } Ok(true) } // Implement the site definition: struct WatchMDHHandler; impl SiteDefinition for WatchMDHHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"watchmdh.to/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // WatchMDH has no playlists. Ok(false) } fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<String> { let _not_used = get_video_info(video, url, webdriver_port)?; let video_info_html = Html::parse_document(video.info.as_str()); let title_selector = Selector::parse(r#"meta[property="og:title"]"#).unwrap(); let title_elem = video_info_html.select(&title_selector).next().unwrap(); let title_contents = title_elem.value().attr("content").unwrap(); Ok(title_contents.to_string()) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { // Find the best video format and the rnd value: let re_rnd = Regex::new(r"rnd: '(\d+)'").unwrap(); let rnd = re_rnd .captures(&video.info) .unwrap() .get(1) .unwrap() .as_str(); let re_vid1 = Regex::new("video_alt_url: 'function/0/(.+?)',").unwrap(); let re_vid2 = Regex::new("video_url: 'function/0/(.+?)',").unwrap(); let url_contents; if re_vid1.is_match(&video.info) { url_contents = re_vid1 .captures(&video.info) .unwrap() .get(1) .unwrap() .as_str(); } else { url_contents = re_vid2 .captures(&video.info) .unwrap() .get(1) .unwrap() .as_str(); } Ok(String::from(url_contents) + "?rnd=" + rnd) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<bool> { let _video_info = get_video_info(video, url, webdriver_port); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "WatchMDH".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) } |
︙ | ︙ |
Changes to src/handlers/xhamster.rs.
︙ | ︙ | |||
22 23 24 25 26 27 28 | use fantoccini::ClientBuilder; use nom::Finish; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; use url::Url; | | | | > < < < < | | | > > | > > > | | | | | | | | | < > < | > | | | | | | | | | | | | | | | | | | | | | | | | < | | > > > > | | < > | 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 | use fantoccini::ClientBuilder; use nom::Finish; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; use url::Url; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let local_url = url.to_owned(); let rt = runtime::Builder::new_current_thread() .enable_time() .enable_io() .build() .unwrap(); rt.block_on(async move { let webdriver_url = format!("http://localhost:{}", webdriver_port); let c = ClientBuilder::native() .connect(&webdriver_url) .await .expect("failed to connect to web driver"); c.goto(&local_url).await.expect("could not go to the URL"); let body = c.source().await.expect("could not read the site source"); video.info.push_str(body.as_str()); c.close_window().await.expect("could not close the window"); }); } Ok(true) } // Implement the site definition: struct XHamsterHandler; impl SiteDefinition for XHamsterHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"xhamster.com/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // xHamster has playlists. Ok(true) } fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<String> { let _not_used = get_video_info(video, url, webdriver_port)?; let video_info_html = Html::parse_document(video.info.as_str()); let h1_selector = Selector::parse("h1").unwrap(); let text = video_info_html.select(&h1_selector).next(); let result = match text { Some(txt) => txt.text().collect(), None => return Err(anyhow!("Could not extract the video title.")), }; Ok(result) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { let _not_used = get_video_info(video, url, webdriver_port)?; let video_info_html = Html::parse_document(video.info.as_str()); // Find the playlist first: let url_selector = Selector::parse(r#"link[rel="preload"][as="fetch"]"#).unwrap(); let url_elem = video_info_html.select(&url_selector).next().unwrap(); let url_contents = url_elem.value().attr("href").unwrap(); let mut playlist_url = Url::parse(url_contents)?; let request = ureq::get(playlist_url.as_str()); let playlist_text = request.call()?.into_string()?; // Parse the playlist: let playlist = m3u8_rs::parse_media_playlist(&playlist_text.as_bytes()) .finish() .unwrap(); // Grab the last (= best) segment from the media playlist to find the video "playlist" // (which contains all segments of the video): let video_uri = &playlist.1.segments.last().ok_or("").unwrap().uri; // xHamster uses relative URIs in its playlists, so we'll only need to replace // the last URL segment: playlist_url .path_segments_mut() .unwrap() .pop() .push(video_uri); Ok(playlist_url.to_string()) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<bool> { let _video_info = get_video_info(video, url, webdriver_port); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "xHamster".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("ts".to_string()) } |
︙ | ︙ |
Changes to src/handlers/youtube.rs.
︙ | ︙ | |||
18 19 20 21 22 23 24 | use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use serde_json::{json, Value}; | | < | | | | | > > > > > < | | | < > < | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | | > > > | > < | | | | < > < | | | | | | | < | 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 | use crate::definitions::SiteDefinition; use anyhow::Result; use regex::Regex; use serde_json::{json, Value}; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, id: &str) -> Result<Value> { if video.info.is_empty() { // We need to fetch the video information first. let video_url = "https://youtubei.googleapis.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; let req = ureq::post(&video_url).send_json(ureq::json!({ "videoId": id, "context": { "client": { "clientName": "ANDROID", "clientVersion": "16.02" } } }))?; let body = req.into_string()?; video.info.push_str(body.as_str()); } // Return it: let v: Value = serde_json::from_str(&video.info)?; Ok(v) } // Implement the site definition: struct YouTubeHandler; impl SiteDefinition for YouTubeHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"(?:www\.)?youtu(?:be\.com|\.be)/") .unwrap() .is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // YouTube has broken domains, but no playlists. :-) Ok(false) } fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<String> { let id_regex = Regex::new(r"(?:v=|\.be/)(.*?)(&.*)*$").unwrap(); let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str(); let video_info = get_video_info(video, id)?; let video_info_title = video_info["videoDetails"]["title"].as_str().unwrap_or(""); Ok(String::from(video_info_title)) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, onlyaudio: bool, ) -> Result<String> { let id_regex = Regex::new(r"(?:v=|\.be/)(.*?)(&.*)*$").unwrap(); let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str(); let video_info = get_video_info(video, id)?; let video_info_itags = match video_info["streamingData"]["formats"].as_array() { None => return Ok("".to_string()), Some(itags) => itags, }; let video_info_itags_adaptive = match video_info["streamingData"]["adaptiveFormats"].as_array() { None => return Ok("".to_string()), Some(itags) => itags, }; let mut url_to_choose = ""; // Finding the least horrible combination of video and audio: let vq1 = "tiny"; let vq2 = "small"; let vq3 = "medium"; let vq4 = "large"; let vq5 = "hd720"; let vq6 = "hd1080"; let mut last_vq = "".to_string(); let aq1 = "AUDIO_QUALITY_LOW"; let aq2 = "AUDIO_QUALITY_MEDIUM"; let aq3 = "AUDIO_QUALITY_HIGH"; let mut last_aq = "".to_string(); for itag in video_info_itags.iter().chain(video_info_itags_adaptive) { // The highest quality wins. let this_aq = itag["audioQuality"].as_str().unwrap_or(""); let this_vq = itag["quality"].as_str().unwrap_or(""); let is_better_audio = (last_aq.is_empty() && !this_aq.is_empty()) || (last_aq == aq1 && (this_aq == aq2 || this_aq == aq3)) || (last_aq == aq2 && this_aq == aq3); let is_same_or_better_audio = (last_aq == this_aq) || is_better_audio; let is_better_video = (last_vq.is_empty() && !this_vq.is_empty()) || (last_vq == vq1 && (this_vq == vq2 || this_vq == vq3 || this_vq == vq4 || this_vq == vq5 || this_vq == vq6)) || (last_vq == vq2 && (this_vq == vq3 || this_vq == vq4 || this_vq == vq5 || this_vq == vq6)) || (last_vq == vq3 && (this_vq == vq4 || this_vq == vq5 || this_vq == vq6)) || (last_vq == vq4 && (this_vq == vq5 || this_vq == vq6)) || (last_vq == vq5 && this_vq == vq6); let is_same_or_better_video = (last_vq == this_vq) || is_better_video; let is_better_quality = (is_better_audio && is_same_or_better_video) || (is_better_video && is_same_or_better_audio) || (onlyaudio && is_better_audio); // If audio: Try to download the best audio quality. // If video: Try to download the best combination. if (onlyaudio && itag["mimeType"].to_string().contains("audio/") || !onlyaudio && itag["mimeType"].to_string().contains("video/")) && (!onlyaudio || itag["quality"] != json!(null)) && itag["audioQuality"] != json!(null) && (onlyaudio && this_vq.is_empty() || !onlyaudio && last_vq.is_empty() && !this_vq.is_empty()) || is_better_quality { video.mime = itag["mimeType"].to_string(); url_to_choose = itag["url"].as_str().unwrap(); last_vq = String::from(this_vq); last_aq = String::from(this_aq); } } if url_to_choose.is_empty() { Err(anyhow::Error::msg( "Could not find a working itag - aborting.".to_string(), )) } else { Ok(url_to_choose.to_string()) } } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, _webdriver_port: u16, ) -> Result<bool> { let id_regex = Regex::new(r"(?:v=|\.be\/)(.*?)(&.*)*$").unwrap(); let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str(); let video_info = get_video_info(video, id)?; let video_info_is_playable = video_info["playabilityStatus"]["status"] == json!("OK"); let video_info_has_details = video_info["videoDetails"] != json!(null); Ok(video_info_has_details && video_info_is_playable) } fn display_name<'a>(&'a self) -> String { "YouTube".to_string() } fn find_video_file_extension<'a>( &'a self, video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { // By this point, we have already filled VIDEO_MIME. Let's just use that. let mut ext = "mp4"; if video.mime.contains("/webm") { ext = "webm"; } else if video.mime.contains("audio/mp4") { ext = "m4a"; } Ok(ext.to_string()) } fn web_driver_required<'a>(&'a self) -> bool { false } } // Push the site definition to the list of known handlers: inventory::submit! { &YouTubeHandler as &dyn SiteDefinition } |
Changes to src/main.rs.
︙ | ︙ | |||
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | #[clap(long, help = "The port of your web driver (required for some sites)")] webdriver: Option<u16>, #[clap(help = "Sets the input URL to use", index = 1)] url: String, } fn main() -> Result<()> { // Argument parsing: let args = Args::parse(); let in_url = &args.url; inventory::collect!(&'static dyn definitions::SiteDefinition); let mut site_def_found = false; for handler in inventory::iter::<&dyn definitions::SiteDefinition> { // "15:15 And he found a pair of eyes, scanning the directories for files." // https://kingjamesprogramming.tumblr.com/post/123368869357/1515-and-he-found-a-pair-of-eyes-scanning-the // ------------------------------------ // Find a known handler for <in_url>: | > > > > > > > > > > > > > > > > | 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | #[clap(long, help = "The port of your web driver (required for some sites)")] webdriver: Option<u16>, #[clap(help = "Sets the input URL to use", index = 1)] url: String, } // #[derive(Debug)] // usage: // let v = VIDEO{info: String::new(), title:String::new(), mime:String::new()}; // println!("{:#?}",v); pub struct VIDEO { info: String, title: String, mime: String, } fn main() -> Result<()> { // Argument parsing: let args = Args::parse(); let in_url = &args.url; inventory::collect!(&'static dyn definitions::SiteDefinition); let mut site_def_found = false; let mut video = VIDEO { info: String::new(), title: String::new(), mime: String::new(), }; for handler in inventory::iter::<&dyn definitions::SiteDefinition> { // "15:15 And he found a pair of eyes, scanning the directories for files." // https://kingjamesprogramming.tumblr.com/post/123368869357/1515-and-he-found-a-pair-of-eyes-scanning-the // ------------------------------------ // Find a known handler for <in_url>: |
︙ | ︙ | |||
99 100 101 102 103 104 105 | if handler.web_driver_required() && webdriverport == 0 { // This handler would need a web driver, but none is supplied to yaydl. println!("{} requires a web driver installed and running as described in the README. Please tell yaydl which port to use (yaydl --webdriver <PORT>) and try again.", handler.display_name()); continue; } | | | | > > > > > | > > | > > | 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | if handler.web_driver_required() && webdriverport == 0 { // This handler would need a web driver, but none is supplied to yaydl. println!("{} requires a web driver installed and running as described in the README. Please tell yaydl which port to use (yaydl --webdriver <PORT>) and try again.", handler.display_name()); continue; } let video_exists = handler.does_video_exist(&mut video, in_url, webdriverport)?; if !video_exists { println!("The video could not be found. Invalid link?"); } else { if args.verbose { println!("The requested video was found. Processing..."); } let video_title = handler.find_video_title(&mut video, in_url, webdriverport); let vt = match video_title { Err(_e) => "".to_string(), Ok(title) => title, }; // Usually, we already find errors here. if vt.is_empty() { println!("The video title could not be extracted. Invalid link?"); } else { if args.verbose { println!("Title: {}", vt); } let url = handler.find_video_direct_url( &mut video, in_url, webdriverport, args.onlyaudio, )?; let ext = handler.find_video_file_extension( &mut video, in_url, webdriverport, args.onlyaudio, )?; // Now let's download it: let mut targetfile = format!( "{}.{}", vt.trim() .replace(&['|', '\'', '\"', ':', '\'', '\\', '/'][..], r#""#), ext |
︙ | ︙ |