Yet Another Youtube Down Loader

⌈⌋ branch:  yaydl


Check-in [7e1163f550]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:yaydl 0.11.4: The webdriver is not a requirement for SpankBang and XHamster (via LuisMayo @ GitHub)
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | release-0.11.4
Files: files | file ages | folders
SHA3-256: 7e1163f550a97e4f592defe1f50b4837f3f87197d4a7e150a63dfb4b3b7e058e
User & Date: Cthulhux 2022-11-08 23:30:22
Context
2022-11-10
12:43
As everyone is on Mastodon now, you could actually reach me there as well. Leaf check-in: 7cc8e20ce8 user: Cthulhux tags: trunk
2022-11-08
23:30
yaydl 0.11.4: The webdriver is not a requirement for SpankBang and XHamster (via LuisMayo @ GitHub) check-in: 7e1163f550 user: Cthulhux tags: release-0.11.4, trunk
2022-08-01
09:27
yaydl 0.11.3: Updated dependencies, fixed file name generation for videos that contain question marks. check-in: d02d554a31 user: Cthulhux tags: release-0.11.3, trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to Cargo.lock.

1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"

[[package]]
name = "yaydl"
version = "0.11.2"
dependencies = [
 "anyhow",
 "cienli",
 "clap",
 "fantoccini",
 "indicatif",
 "inventory",







|







1886
1887
1888
1889
1890
1891
1892
1893
1894
1895
1896
1897
1898
1899
1900
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"

[[package]]
name = "yaydl"
version = "0.11.4"
dependencies = [
 "anyhow",
 "cienli",
 "clap",
 "fantoccini",
 "indicatif",
 "inventory",

Changes to Cargo.toml.

1
2
3
4
5
6
7
8
9
10
11
[package]
name = "yaydl"
description = "yet another youtube (and more) down loader"
version = "0.11.3"
authors = ["Cthulhux <git@tuxproject.de>"]
edition = "2021"
license = "CDDL-1.0"
repository = "https://code.rosaelefanten.org/yaydl"
categories = ["command-line-utilities"]
keywords = ["youtube", "downloading", "video"]




|







1
2
3
4
5
6
7
8
9
10
11
[package]
name = "yaydl"
description = "yet another youtube (and more) down loader"
version = "0.11.4"
authors = ["Cthulhux <git@tuxproject.de>"]
edition = "2021"
license = "CDDL-1.0"
repository = "https://code.rosaelefanten.org/yaydl"
categories = ["command-line-utilities"]
keywords = ["youtube", "downloading", "video"]

Changes to src/handlers/spankbang.rs.

25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59

60
61
62
63
64
65
66
67
68
69
70

// Yet Another Youtube Down Loader
// - Spankbang handler -

use crate::definitions::SiteDefinition;

use anyhow::Result;
use fantoccini::ClientBuilder;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;
use url::Url;

use crate::VIDEO;

const MAX_FILENAME_LENGTH: usize = 142; // filename is based on url path description string

fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");

            let body = c.source().await.expect("could not read the site source");
            video.info.push_str(body.as_str());
            c.close_window().await.expect("could not close the window");
        });
    }

    Ok(true)
}

// Implement the site definition:
struct SpankbangHandler;







<


<






|




|
<
<
<
<
<
<
<
<
|
|
<
|
>
|
|
<
|







25
26
27
28
29
30
31

32
33

34
35
36
37
38
39
40
41
42
43
44
45








46
47

48
49
50
51

52
53
54
55
56
57
58
59

// Yet Another Youtube Down Loader
// - Spankbang handler -

use crate::definitions::SiteDefinition;

use anyhow::Result;

use regex::Regex;
use scraper::{Html, Selector};

use url::Url;

use crate::VIDEO;

const MAX_FILENAME_LENGTH: usize = 142; // filename is based on url path description string

fn get_video_info(video: &mut VIDEO, url: &str) -> Result<bool> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();
        video.info.push_str(








            ureq::get(&local_url)
                .call()

                .expect("Could not go to the url")
                .into_string()
                .expect("Could not read the site source")
                .as_str(),

        );
    }

    Ok(true)
}

// Implement the site definition:
struct SpankbangHandler;
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134

135
136
137
138
139
140
141
        Ok(url_filename(url.to_string()))
    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url, webdriver_port);
        let video_info_html = Html::parse_document(&video.info);

        let url_selector = Selector::parse(r#"source[type="video/mp4"]"#).unwrap();
        let url_elem = video_info_html.select(&url_selector).next().unwrap();
        let url_contents = url_elem.value().attr("src").unwrap();

        Ok(url_contents.to_string())
    }

    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<bool> {
        let _not_used = get_video_info(video, url, webdriver_port);
        Ok(!video.info.is_empty())
    }

    fn display_name<'a>(&'a self) -> String {
        "Spankbang".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

    fn web_driver_required<'a>(&'a self) -> bool {
        true

    }
}

// Push the site definition to the list of known handlers:
inventory::submit! {
    &SpankbangHandler as &dyn SiteDefinition
}







|


|













|

|


















<
>







78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122

123
124
125
126
127
128
129
130
        Ok(url_filename(url.to_string()))
    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url);
        let video_info_html = Html::parse_document(&video.info);

        let url_selector = Selector::parse(r#"source[type="video/mp4"]"#).unwrap();
        let url_elem = video_info_html.select(&url_selector).next().unwrap();
        let url_contents = url_elem.value().attr("src").unwrap();

        Ok(url_contents.to_string())
    }

    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<bool> {
        let _not_used = get_video_info(video, url);
        Ok(!video.info.is_empty())
    }

    fn display_name<'a>(&'a self) -> String {
        "Spankbang".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

    fn web_driver_required<'a>(&'a self) -> bool {

        false
    }
}

// Push the site definition to the list of known handlers:
inventory::submit! {
    &SpankbangHandler as &dyn SiteDefinition
}

Changes to src/handlers/xhamster.rs.

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48

49
50
51
52
53
54
55
56
57
58
59

// Yet Another Youtube Down Loader
// - xHamster handler -

use crate::definitions::SiteDefinition;

use anyhow::{anyhow, Result};
use fantoccini::ClientBuilder;
use nom::Finish;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;
use url::Url;

use crate::VIDEO;

fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");

            let body = c.source().await.expect("could not read the site source");
            video.info.push_str(body.as_str());
            c.close_window().await.expect("could not close the window");
        });
    }

    Ok(true)
}

// Implement the site definition:
struct XHamsterHandler;







<



<




|





|
<
<
<
<
<
<
<
|
|
<
|
>
|
|
<
|







15
16
17
18
19
20
21

22
23
24

25
26
27
28
29
30
31
32
33
34
35







36
37

38
39
40
41

42
43
44
45
46
47
48
49

// Yet Another Youtube Down Loader
// - xHamster handler -

use crate::definitions::SiteDefinition;

use anyhow::{anyhow, Result};

use nom::Finish;
use regex::Regex;
use scraper::{Html, Selector};

use url::Url;

use crate::VIDEO;

fn get_video_info(video: &mut VIDEO, url: &str) -> Result<bool> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        video.info.push_str(







            ureq::get(&local_url)
                .call()

                .expect("Could not go to the url")
                .into_string()
                .expect("Could not read the site source")
                .as_str(),

        );
    }

    Ok(true)
}

// Implement the site definition:
struct XHamsterHandler;
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
        Ok(true)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url, webdriver_port)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        let h1_selector = Selector::parse("h1").unwrap();
        let text = video_info_html.select(&h1_selector).next();

        let result = match text {
            Some(txt) => txt.text().collect(),
            None => return Err(anyhow!("Could not extract the video title.")),
        };

        Ok(result)
    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url, webdriver_port)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        // Find the playlist first:
        let url_selector = Selector::parse(r#"link[rel="preload"][as="fetch"]"#).unwrap();
        let url_elem = video_info_html.select(&url_selector).next().unwrap();
        let url_contents = url_elem.value().attr("href").unwrap();








|

|

















|


|







57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
        Ok(true)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        let h1_selector = Selector::parse("h1").unwrap();
        let text = video_info_html.select(&h1_selector).next();

        let result = match text {
            Some(txt) => txt.text().collect(),
            None => return Err(anyhow!("Could not extract the video title.")),
        };

        Ok(result)
    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        // Find the playlist first:
        let url_selector = Selector::parse(r#"link[rel="preload"][as="fetch"]"#).unwrap();
        let url_elem = video_info_html.select(&url_selector).next().unwrap();
        let url_contents = url_elem.value().attr("href").unwrap();

125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153

154
155
156
157
158
159
160
        Ok(playlist_url.to_string())
    }

    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<bool> {
        let _video_info = get_video_info(video, url, webdriver_port);
        Ok(!video.info.is_empty())
    }

    fn display_name<'a>(&'a self) -> String {
        "xHamster".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("ts".to_string())
    }

    fn web_driver_required<'a>(&'a self) -> bool {
        true

    }
}

// Push the site definition to the list of known handlers:
inventory::submit! {
    &XHamsterHandler as &dyn SiteDefinition
}







|

|


















<
>







115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

143
144
145
146
147
148
149
150
        Ok(playlist_url.to_string())
    }

    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<bool> {
        let _video_info = get_video_info(video, url);
        Ok(!video.info.is_empty())
    }

    fn display_name<'a>(&'a self) -> String {
        "xHamster".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("ts".to_string())
    }

    fn web_driver_required<'a>(&'a self) -> bool {

        false
    }
}

// Push the site definition to the list of known handlers:
inventory::submit! {
    &XHamsterHandler as &dyn SiteDefinition
}