yaydl: Check-in [ebb5df12f3]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview

Comment:	yaydl 0.11.1: getting rid of unsafe { }.
Downloads:	Tarball \| ZIP archive \| SQL archive
Timelines:	family \| ancestors \| descendants \| both \| trunk \| release-0.11.1
Files:	files \| file ages \| folders
SHA3-256:	ebb5df12f307daf79d2816234e8de3e9cca5c0d9eaabcc9bb9b4b38dacced62a
User & Date:	Cthulhux 2022-07-28 01:52:38

Context

2022-07-29
00:36		yaydl 0.11.2: Fixed youtube regex error #11 * Added .gitignore check-in: 2334a25c2d user: rhydon tags: trunk, release-0.11.2
2022-07-28
01:52		yaydl 0.11.1: getting rid of unsafe { }. check-in: ebb5df12f3 user: Cthulhux tags: trunk, release-0.11.1
2022-07-21
00:34		yaydl 0.11: Added spankbang handler (cheers, @egdv from GitHub), updated dependencies. check-in: 1bdfe0346e user: Cthulhux tags: trunk, release-0.11.0

Changes

Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to CODE_OF_CONDUCT.md.

Changes to Cargo.lock.

Changes to Cargo.toml.

Changes to src/definitions.rs.

Changes to src/handlers/porndoe.rs.

Changes to src/handlers/spankbang.rs.

Changes to src/handlers/vidoza.rs.

Changes to src/handlers/vimeo.rs.

Changes to src/handlers/vivo.rs.

Changes to src/handlers/voe.rs.

Changes to src/handlers/watchmdh.rs.

Changes to src/handlers/xhamster.rs.

Changes to src/handlers/youtube.rs.

Changes to src/main.rs.

︙			︙
20 21 22 23 24 25 26 27 28 ~~29 30~~ 31 32 33 34 35 36 37	use anyhow::{anyhow, Result}; use fantoccini::ClientBuilder; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; ~~stat~~ic mut~~ VIDEO~~_INFO: String = String::new()~~;~~ ~~~~unsafe~~ fn get_video_info(url: &str, webdriver_port: u16) -> Result<~~Htm~~l> { if ~~VIDEO_INFO~~.is_empty() {~~ // We need to fetch the video information first. // It will contain the whole body for now. let local_url = url.to_owned(); let rt = runtime::Builder::new_current_thread() .enable_time() .enable_io()	\| \| \|	20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37	use anyhow::{anyhow, Result}; use fantoccini::ClientBuilder; use regex::Regex; use scraper::{Html, Selector}; use tokio::runtime; use crate::VIDEO; fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> { if video.info.is_empty() { // We need to fetch the video information first. // It will contain the whole body for now. let local_url = url.to_owned(); let rt = runtime::Builder::new_current_thread() .enable_time() .enable_io()
︙			︙
52 53 54 55 56 57 58 59 ~~60 61~~ 62 63 64 ~~65 66 67~~ 68 69 70 71 72 73 74 75 76 77 78 79 80 81 ~~82 83~~ 84 85 ~~86 87~~ 88 ~~89 90 91 92~~ 93 ~~94 95 96 97~~ 98 99 100 101 102 103 ~~104 105~~ 106 ~~107 108 109~~ 110 ~~111 112 113 114 115 116~~ ~~117~~ ~~118 119~~ 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134	"document.getElementsByClassName('age-btn')[0].click();", vec![], ) .await .expect("could not dismiss the age gate"); let body = c.source().await.expect("could not read the site source"); c.close_window().await.expect("could not close the window"); ~~VIDEO_INFO = body;~~ }); } ~~~~// Return it:~~ ~~let d = Html::parse_document(&VIDEO_INFO);~~ Ok(d)~~ } // Implement the site definition: struct PornDoeHandler; impl SiteDefinition for PornDoeHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"porndoe.com/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // PornDoe has no playlists. Ok(false) } ~~fn find_video_title<'a>(~~&'a self, url: &'a str, webdriver_port: u16) -> Result<String> {~~ uns~~afe {~~~~ ~~let video_info = get_video_info(url,~~ webdriver_port~~)?;~~ ~~let h1_selector = Selector::parse("h1.-heading").unwrap(); let text = video_info.select(&h1_selector).next();~~ ~~let result = match text { Some(txt) => txt.text().collect(), None => return Err(anyhow!("Could not extract the video title.")), };~~ ~~Ok(result) } }~~ fn find_video_direct_url<'a>( &'a self, url: &'a str, webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { ~~~~unsafe {~~ let vid~~eo_info~~ = get_video_info(url, webdriver_port)?;~~ let url_selector = Selector::parse(r#"meta[itemprop="contentUrl"]"#).unwrap(); let url_elem = video_info.select(&url_selector).next().unwrap(); let url_contents = url_elem.value().attr("content").unwrap(); ~~Ok(url_contents.to_string()) } } fn does_video_exist<'a>(~~&'a self, url: &'a str, webdriver_port: u16) -> Result<bool> {~~ uns~~afe {~~~~ ~~~~let _video_info = get_video_info(url,~~ webdriver_port);~~ ~~Ok(!~~VIDEO_INFO~~.is_empty()) }~~ } fn display_name<'a>(&'a self) -> String { "PornDoe".to_string() } fn find_video_file_extension<'a>( &'a self, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) }	> < < < < \| \| \| > > \| > > > \| \| \| \| \| \| \| \| \| < > < \| > \| \| \| \| \| \| < \| \| > > \| > > \| < >	52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139	"document.getElementsByClassName('age-btn')[0].click();", vec![], ) .await .expect("could not dismiss the age gate"); let body = c.source().await.expect("could not read the site source"); video.info.push_str(body.as_str()); c.close_window().await.expect("could not close the window"); }); } Ok(true) } // Implement the site definition: struct PornDoeHandler; impl SiteDefinition for PornDoeHandler { fn can_handle_url<'a>(&'a self, url: &'a str) -> bool { Regex::new(r"porndoe.com/.+").unwrap().is_match(url) } fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> { // PornDoe has no playlists. Ok(false) } fn find_video_title<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<String> { let _not_used = get_video_info(video, url, webdriver_port)?; let video_info_html = Html::parse_document(video.info.as_str()); let h1_selector = Selector::parse("h1.-heading").unwrap(); let text = video_info_html.select(&h1_selector).next(); let result = match text { Some(txt) => txt.text().collect(), None => return Err(anyhow!("Could not extract the video title.")), }; Ok(result) } fn find_video_direct_url<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { let _not_used = get_video_info(video, url, webdriver_port)?; let video_info_html = Html::parse_document(video.info.as_str()); let url_selector = Selector::parse(r#"meta[itemprop="contentUrl"]"#).unwrap(); let url_elem = video_info_html.select(&url_selector).next().unwrap(); let url_contents = url_elem.value().attr("content").unwrap(); Ok(url_contents.to_string()) } fn does_video_exist<'a>( &'a self, video: &'a mut VIDEO, url: &'a str, webdriver_port: u16, ) -> Result<bool> { let _not_used = get_video_info(video, url, webdriver_port); Ok(!video.info.is_empty()) } fn display_name<'a>(&'a self) -> String { "PornDoe".to_string() } fn find_video_file_extension<'a>( &'a self, _video: &'a mut VIDEO, _url: &'a str, _webdriver_port: u16, _onlyaudio: bool, ) -> Result<String> { Ok("mp4".to_string()) }
︙			︙

︙			︙
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79	#[clap(long, help = "The port of your web driver (required for some sites)")] webdriver: Option<u16>, #[clap(help = "Sets the input URL to use", index = 1)] url: String, } fn main() -> Result<()> { // Argument parsing: let args = Args::parse(); let in_url = &args.url; inventory::collect!(&'static dyn definitions::SiteDefinition); let mut site_def_found = false; for handler in inventory::iter::<&dyn definitions::SiteDefinition> { // "15:15 And he found a pair of eyes, scanning the directories for files." // https://kingjamesprogramming.tumblr.com/post/123368869357/1515-and-he-found-a-pair-of-eyes-scanning-the // ------------------------------------ // Find a known handler for <in_url>:	> > > > > > > > > > > > > > > >	58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95	#[clap(long, help = "The port of your web driver (required for some sites)")] webdriver: Option<u16>, #[clap(help = "Sets the input URL to use", index = 1)] url: String, } // #[derive(Debug)] // usage: // let v = VIDEO{info: String::new(), title:String::new(), mime:String::new()}; // println!("{:#?}",v); pub struct VIDEO { info: String, title: String, mime: String, } fn main() -> Result<()> { // Argument parsing: let args = Args::parse(); let in_url = &args.url; inventory::collect!(&'static dyn definitions::SiteDefinition); let mut site_def_found = false; let mut video = VIDEO { info: String::new(), title: String::new(), mime: String::new(), }; for handler in inventory::iter::<&dyn definitions::SiteDefinition> { // "15:15 And he found a pair of eyes, scanning the directories for files." // https://kingjamesprogramming.tumblr.com/post/123368869357/1515-and-he-found-a-pair-of-eyes-scanning-the // ------------------------------------ // Find a known handler for <in_url>:
︙			︙
99 100 101 102 103 104 105 ~~106~~ 107 108 109 110 111 112 113 ~~114~~ 115 116 117 118 119 120 121 122 123 124 125 126 127 ~~128~~ ~~129~~ ~~130~~ 131 132 133 134 135 136 137	if handler.web_driver_required() && webdriverport == 0 { // This handler would need a web driver, but none is supplied to yaydl. println!("{} requires a web driver installed and running as described in the README. Please tell yaydl which port to use (yaydl --webdriver <PORT>) and try again.", handler.display_name()); continue; } ~~let video_exists = handler.does_video_exist(in_url, webdriverport)?;~~ if !video_exists { println!("The video could not be found. Invalid link?"); } else { if args.verbose { println!("The requested video was found. Processing..."); } ~~let video_title = handler.find_video_title(in_url, webdriverport);~~ let vt = match video_title { Err(_e) => "".to_string(), Ok(title) => title, }; // Usually, we already find errors here. if vt.is_empty() { println!("The video title could not be extracted. Invalid link?"); } else { if args.verbose { println!("Title: {}", vt); } ~~let url = handler.find_video_direct_url(~~in_url, webdriverport, args.onlyaudio)?;~~~~ ~~let ext =~~ ~~handler.find_video_file_extension(in_url,~~ webdriverport, ~~args.onlyaudio)?;~~ // Now let's download it: let mut targetfile = format!( "{}.{}", vt.trim() .replace(&['\|', '\'', '\"', ':', '\'', '\\', '/'][..], r#""#), ext	\| \| \| > > > > > \| > > \| > >	115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162	if handler.web_driver_required() && webdriverport == 0 { // This handler would need a web driver, but none is supplied to yaydl. println!("{} requires a web driver installed and running as described in the README. Please tell yaydl which port to use (yaydl --webdriver <PORT>) and try again.", handler.display_name()); continue; } let video_exists = handler.does_video_exist(&mut video, in_url, webdriverport)?; if !video_exists { println!("The video could not be found. Invalid link?"); } else { if args.verbose { println!("The requested video was found. Processing..."); } let video_title = handler.find_video_title(&mut video, in_url, webdriverport); let vt = match video_title { Err(_e) => "".to_string(), Ok(title) => title, }; // Usually, we already find errors here. if vt.is_empty() { println!("The video title could not be extracted. Invalid link?"); } else { if args.verbose { println!("Title: {}", vt); } let url = handler.find_video_direct_url( &mut video, in_url, webdriverport, args.onlyaudio, )?; let ext = handler.find_video_file_extension( &mut video, in_url, webdriverport, args.onlyaudio, )?; // Now let's download it: let mut targetfile = format!( "{}.{}", vt.trim() .replace(&['\|', '\'', '\"', ':', '\'', '\\', '/'][..], r#""#), ext
︙			︙