Yet Another Youtube Down Loader

⌈⌋ branch:  yaydl


Check-in [ebb5df12f3]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:yaydl 0.11.1: getting rid of unsafe { }.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | release-0.11.1
Files: files | file ages | folders
SHA3-256: ebb5df12f307daf79d2816234e8de3e9cca5c0d9eaabcc9bb9b4b38dacced62a
User & Date: Cthulhux 2022-07-28 01:52:38
Context
2022-07-29
00:36
yaydl 0.11.2: Fixed youtube regex error #11 * Added .gitignore check-in: 2334a25c2d user: rhydon tags: release-0.11.2, trunk
2022-07-28
01:52
yaydl 0.11.1: getting rid of unsafe { }. check-in: ebb5df12f3 user: Cthulhux tags: release-0.11.1, trunk
2022-07-21
00:34
yaydl 0.11: Added spankbang handler (cheers, @egdv from GitHub), updated dependencies. check-in: 1bdfe0346e user: Cthulhux tags: release-0.11.0, trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to CODE_OF_CONDUCT.md.

Changes to Cargo.lock.

1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"

[[package]]
name = "yaydl"
version = "0.11.0"
dependencies = [
 "anyhow",
 "cienli",
 "clap",
 "fantoccini",
 "indicatif",
 "inventory",







|







1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"

[[package]]
name = "yaydl"
version = "0.11.1"
dependencies = [
 "anyhow",
 "cienli",
 "clap",
 "fantoccini",
 "indicatif",
 "inventory",

Changes to Cargo.toml.

1
2
3
4
5
6
7
8
9
10
11
[package]
name = "yaydl"
description = "yet another youtube (and more) down loader"
version = "0.11.0"
authors = ["Cthulhux <git@tuxproject.de>"]
edition = "2021"
license = "CDDL-1.0"
repository = "https://code.rosaelefanten.org/yaydl"
categories = ["command-line-utilities"]
keywords = ["youtube", "downloading", "video"]




|







1
2
3
4
5
6
7
8
9
10
11
[package]
name = "yaydl"
description = "yet another youtube (and more) down loader"
version = "0.11.1"
authors = ["Cthulhux <git@tuxproject.de>"]
edition = "2021"
license = "CDDL-1.0"
repository = "https://code.rosaelefanten.org/yaydl"
categories = ["command-line-utilities"]
keywords = ["youtube", "downloading", "video"]

Changes to src/definitions.rs.

13
14
15
16
17
18
19


20
21
22
23
24
25
26
27





28
29
30
31
32
33





34
35
36
37

38
39
40
41
42
43
44
45

46
47
48
49
50
51
52
53
54
55
56
 * distribution.
 */

// Yet Another Youtube Down Loader
// - definitions.rs file -

use anyhow::Result;



// Define the public interface for site definitions:
pub trait SiteDefinition {
    // true, if this site can handle <url>.
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool;

    // true, if the video exists.
    fn does_video_exist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool>;






    // true, if the URL is a playlist.
    fn is_playlist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool>;

    // returns the title of a video.
    fn find_video_title<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<String>;






    // returns the download URL of a video or playlist.
    fn find_video_direct_url<'a>(
        &'a self,

        url: &'a str,
        webdriver_port: u16,
        onlyaudio: bool,
    ) -> Result<String>;

    // returns the file extension of the video (e.g. "mp4").
    fn find_video_file_extension<'a>(
        &'a self,

        url: &'a str,
        webdriver_port: u16,
        onlyaudio: bool,
    ) -> Result<String>;

    // returns the name of the site (e.g. "YouTube").
    fn display_name<'a>(&'a self) -> String;

    // true, if this site needs a web driver.
    fn web_driver_required<'a>(&'a self) -> bool;
}







>
>







|
>
>
>
>
>





|
>
>
>
>
>




>








>











13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
 * distribution.
 */

// Yet Another Youtube Down Loader
// - definitions.rs file -

use anyhow::Result;

use crate::VIDEO;

// Define the public interface for site definitions:
pub trait SiteDefinition {
    // true, if this site can handle <url>.
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool;

    // true, if the video exists.
    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<bool>;

    // true, if the URL is a playlist.
    fn is_playlist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool>;

    // returns the title of a video.
    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<String>;

    // returns the download URL of a video or playlist.
    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
        onlyaudio: bool,
    ) -> Result<String>;

    // returns the file extension of the video (e.g. "mp4").
    fn find_video_file_extension<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
        onlyaudio: bool,
    ) -> Result<String>;

    // returns the name of the site (e.g. "YouTube").
    fn display_name<'a>(&'a self) -> String;

    // true, if this site needs a web driver.
    fn web_driver_required<'a>(&'a self) -> bool;
}

Changes to src/handlers/porndoe.rs.

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

use anyhow::{anyhow, Result};
use fantoccini::ClientBuilder;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;

static mut VIDEO_INFO: String = String::new();

unsafe fn get_video_info(url: &str, webdriver_port: u16) -> Result<Html> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()







|

|
|







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37

use anyhow::{anyhow, Result};
use fantoccini::ClientBuilder;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;

use crate::VIDEO;

fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
52
53
54
55
56
57
58

59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83


84



85
86
87
88
89
90
91
92
93
94
95
96
97
98
99

100
101
102
103
104
105

106
107
108
109
110
111
112
113
114
115
116


117


118
119
120
121
122
123
124
125
126
127

128
129
130
131
132
133
134
                "document.getElementsByClassName('age-btn')[0].click();",
                vec![],
            )
            .await
            .expect("could not dismiss the age gate");

            let body = c.source().await.expect("could not read the site source");

            c.close_window().await.expect("could not close the window");

            VIDEO_INFO = body;
        });
    }

    // Return it:
    let d = Html::parse_document(&VIDEO_INFO);
    Ok(d)
}

// Implement the site definition:
struct PornDoeHandler;
impl SiteDefinition for PornDoeHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"porndoe.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // PornDoe has no playlists.
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<String> {
        unsafe {


            let video_info = get_video_info(url, webdriver_port)?;




            let h1_selector = Selector::parse("h1.-heading").unwrap();
            let text = video_info.select(&h1_selector).next();

            let result = match text {
                Some(txt) => txt.text().collect(),
                None => return Err(anyhow!("Could not extract the video title.")),
            };

            Ok(result)
        }
    }

    fn find_video_direct_url<'a>(
        &'a self,

        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        unsafe {
            let video_info = get_video_info(url, webdriver_port)?;


            let url_selector = Selector::parse(r#"meta[itemprop="contentUrl"]"#).unwrap();
            let url_elem = video_info.select(&url_selector).next().unwrap();
            let url_contents = url_elem.value().attr("content").unwrap();

            Ok(url_contents.to_string())
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool> {
        unsafe {


            let _video_info = get_video_info(url, webdriver_port);


            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "PornDoe".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }








>

<
<



<
<
|














|
|
>
>
|
>
>
>

|
|

|
|
|
|

|
|
|
<


>




<
|
>

|
|
|

|
|
|
<
|
|
>
>
|
>
>
|
<








>







52
53
54
55
56
57
58
59
60


61
62
63


64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105

106
107
108
109
110
111
112
113
114
115

116
117
118
119
120
121
122
123

124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
                "document.getElementsByClassName('age-btn')[0].click();",
                vec![],
            )
            .await
            .expect("could not dismiss the age gate");

            let body = c.source().await.expect("could not read the site source");
            video.info.push_str(body.as_str());
            c.close_window().await.expect("could not close the window");


        });
    }



    Ok(true)
}

// Implement the site definition:
struct PornDoeHandler;
impl SiteDefinition for PornDoeHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"porndoe.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // PornDoe has no playlists.
        Ok(false)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url, webdriver_port)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        let h1_selector = Selector::parse("h1.-heading").unwrap();
        let text = video_info_html.select(&h1_selector).next();

        let result = match text {
            Some(txt) => txt.text().collect(),
            None => return Err(anyhow!("Could not extract the video title.")),
        };

        Ok(result)
    }


    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {

        let _not_used = get_video_info(video, url, webdriver_port)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        let url_selector = Selector::parse(r#"meta[itemprop="contentUrl"]"#).unwrap();
        let url_elem = video_info_html.select(&url_selector).next().unwrap();
        let url_contents = url_elem.value().attr("content").unwrap();

        Ok(url_contents.to_string())
    }


    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<bool> {
        let _not_used = get_video_info(video, url, webdriver_port);
        Ok(!video.info.is_empty())

    }

    fn display_name<'a>(&'a self) -> String {
        "PornDoe".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

Changes to src/handlers/spankbang.rs.

16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60

61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84





85
86
87
88
89
90
91

92
93
94
95
96
97

98
99
100
101
102
103
104
105
106
107
108


109


110
111
112
113
114
115
116
117
118
119

120
121
122
123
124
125
126
// specific url path format for this site
// https://spankbang.com/5-char-id/video/description+seprated+by+plus+char
//
// example:  https://spankbang.com/12345/video/description+for+this+video
// base filename: description_for_this_video lenght maximum is 142
// filename: description_for_this_video-12345.mp4
//
// https://spankbang.com/70841/video/nikki+fritz
//

// Yet Another Youtube Down Loader
// - Spankbang handler -

use crate::definitions::SiteDefinition;

use anyhow::Result;
use fantoccini::ClientBuilder;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;
use url::Url;



const MAX_FILENAME_LENGTH: usize = 142; // filename is based on url path description string

static mut VIDEO_INFO: String = String::new();

unsafe fn get_video_info(url: &str, webdriver_port: u16) -> Result<Html> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");
            let body = c.source().await.expect("could not read the site source");

            c.close_window().await.expect("could not close the window");

            VIDEO_INFO = body;
        });
    }

    // Return it:
    let d = Html::parse_document(&VIDEO_INFO);
    Ok(d)
}

// Implement the site definition:
struct SpankbangHandler;
impl SiteDefinition for SpankbangHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"spankbang.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Generic has playlists.
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<String> {





        // generates a valid base filename from url path for linux and windows
        // video title is less reliable to generate base filename for this particular site
        Ok(url_filename(url.to_string()))
    }

    fn find_video_direct_url<'a>(
        &'a self,

        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        unsafe {
            let video_info = get_video_info(url, webdriver_port)?;


            let url_selector = Selector::parse(r#"source[type="video/mp4"]"#).unwrap();
            let url_elem = video_info.select(&url_selector).next().unwrap();
            let url_contents = url_elem.value().attr("src").unwrap();

            Ok(url_contents.to_string())
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool> {
        unsafe {


            let _video_info = get_video_info(url, webdriver_port);


            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "Spankbang".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }








|













>
>



<
<
|
|

















>

<
<



<
<
|














|
>
>
>
>
>







>




<
|
>

|
|
|

|
|
|
<
|
|
>
>
|
>
>
|
<








>







16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41


42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62


63
64
65


66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105
106
107
108

109
110
111
112
113
114
115
116

117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
// specific url path format for this site
// https://spankbang.com/5-char-id/video/description+seprated+by+plus+char
//
// example:  https://spankbang.com/12345/video/description+for+this+video
// base filename: description_for_this_video lenght maximum is 142
// filename: description_for_this_video-12345.mp4
//
// test url: https://spankbang.com/70841/video/nikki+fritz
//

// Yet Another Youtube Down Loader
// - Spankbang handler -

use crate::definitions::SiteDefinition;

use anyhow::Result;
use fantoccini::ClientBuilder;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;
use url::Url;

use crate::VIDEO;

const MAX_FILENAME_LENGTH: usize = 142; // filename is based on url path description string



fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");
            let body = c.source().await.expect("could not read the site source");
            video.info.push_str(body.as_str());
            c.close_window().await.expect("could not close the window");


        });
    }



    Ok(true)
}

// Implement the site definition:
struct SpankbangHandler;
impl SiteDefinition for SpankbangHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"spankbang.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Generic has playlists.
        Ok(false)
    }

    fn find_video_title<'a>(
        &'a self,
        _video: &mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<String> {
        // generates a valid base filename from url path for linux and windows
        // video title is less reliable to generate base filename for this particular site
        Ok(url_filename(url.to_string()))
    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {

        let _not_used = get_video_info(video, url, webdriver_port);
        let video_info_html = Html::parse_document(&video.info);

        let url_selector = Selector::parse(r#"source[type="video/mp4"]"#).unwrap();
        let url_elem = video_info_html.select(&url_selector).next().unwrap();
        let url_contents = url_elem.value().attr("src").unwrap();

        Ok(url_contents.to_string())
    }


    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<bool> {
        let _not_used = get_video_info(video, url, webdriver_port);
        Ok(!video.info.is_empty())

    }

    fn display_name<'a>(&'a self) -> String {
        "Spankbang".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

Changes to src/handlers/vidoza.rs.

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55




56
57
58
59
60
61
62
63
64
65
66
67
68

69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85




86
87
88
89
90
91
92
93
94
95
96

97
98
99
100
101
102
103

use crate::definitions::SiteDefinition;

use anyhow::Result;
use regex::Regex;
use scraper::{Html, Selector};

static mut VIDEO_INFO: String = String::new();

unsafe fn get_video_info(url: &str) -> Result<Html> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let req = ureq::get(&url).call()?;
        let body = req.into_string()?;

        VIDEO_INFO = body;
    }

    // Return it:
    let d = Html::parse_document(&VIDEO_INFO);
    Ok(d)
}

// Implement the site definition:
struct VidozaHandler;
impl SiteDefinition for VidozaHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"vidoza.net/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Vidoza does not seem to have playlists?
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<String> {
        unsafe {




            let video_info = get_video_info(url)?;

            // Currently, there only is one <H1> on Vidoza. Good for us.
            let h1_selector = Selector::parse("h1").unwrap();
            let text = video_info.select(&h1_selector).next().unwrap();
            let result = text.text().collect();

            Ok(result)
        }
    }

    fn find_video_direct_url<'a>(
        &'a self,

        url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        unsafe {
            let video_info = get_video_info(url)?;

            let url_selector = Selector::parse("source").unwrap();
            let url_elem = video_info.select(&url_selector).next().unwrap();
            let url_contents = url_elem.value().attr("src").unwrap();

            Ok(url_contents.to_string())
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<bool> {
        unsafe {




            let _video_info = get_video_info(url);
            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "Vidoza".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }








|

|
|





|



|















|
|
>
>
>
>
|

|
|
|
|

|
<




>




<
|

|
|
|

|
|
|
<
|
|
>
>
>
>
|
|
<








>







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67

68
69
70
71
72
73
74
75
76

77
78
79
80
81
82
83
84
85

86
87
88
89
90
91
92
93

94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109

use crate::definitions::SiteDefinition;

use anyhow::Result;
use regex::Regex;
use scraper::{Html, Selector};

use crate::VIDEO;

fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let req = ureq::get(&url).call()?;
        let body = req.into_string()?;

        video.info = body;
    }

    // Return it:
    let d = Html::parse_document(&video.info);
    Ok(d)
}

// Implement the site definition:
struct VidozaHandler;
impl SiteDefinition for VidozaHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"vidoza.net/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Vidoza does not seem to have playlists?
        Ok(false)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<String> {
        let video_info = get_video_info(video, url)?;

        // Currently, there only is one <H1> on Vidoza. Good for us.
        let h1_selector = Selector::parse("h1").unwrap();
        let text = video_info.select(&h1_selector).next().unwrap();
        let result = text.text().collect();

        Ok(result)

    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {

        let video_info = get_video_info(video, url)?;

        let url_selector = Selector::parse("source").unwrap();
        let url_elem = video_info.select(&url_selector).next().unwrap();
        let url_contents = url_elem.value().attr("src").unwrap();

        Ok(url_contents.to_string())
    }


    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<bool> {
        let _video_info = get_video_info(video, url);
        Ok(!video.info.is_empty())

    }

    fn display_name<'a>(&'a self) -> String {
        "Vidoza".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

Changes to src/handlers/vimeo.rs.

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80




81
82
83
84
85
86
87

88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119




120
121
122
123
124
125
126
127
128
129
130

131
132
133
134
135
136
137

use crate::definitions::SiteDefinition;

use anyhow::Result;
use regex::Regex;
use serde_json::Value;

static mut VIDEO_INFO: String = String::new();
static mut VIDEO_TITLE: String = String::new();

unsafe fn get_video_info(url: &str) -> Result<Value> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // Those are hidden behing a config file defined in the page source code.
        // Search for: window.vimeo.clip_page_config.player = {"config_url":"(.+?)"
        let req = ureq::get(url).call()?;
        let body = req.into_string()?;
        let re =
            Regex::new("window.vimeo.clip_page_config.player = .\"config_url\":\"(?P<URL>.+?)\"")
                .unwrap();
        let search = re.captures(&body).unwrap();

        // While we're grepping the source code: Vimeo also hides
        // the video title here.
        let title_re =
            Regex::new("<meta property=\"og:title\" content=\"(?P<TITLE>.+?)\"").unwrap();
        let title_search = title_re.captures(&body).unwrap();
        let video_title = title_search.name("TITLE").map_or("", |t| t.as_str());
        VIDEO_TITLE = video_title.to_string();

        // If yaydl stops here, the URL is invalid.
        // TODO: That should be more obvious to the user.
        let video_info_url = search
            .name("URL")
            .map_or("", |u| u.as_str())
            .replace("\\", "");

        // The "config_url" body is a JSON structure.
        // Grab and store it:
        let config_req = ureq::get(&video_info_url).call()?;
        let config_body = config_req.into_string()?;
        VIDEO_INFO = config_body;
    }

    // Return it:
    let v: Value = serde_json::from_str(&VIDEO_INFO)?;
    Ok(v)
}

// Implement the site definition:
struct VimeoHandler;
impl SiteDefinition for VimeoHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"(?:www\.)?vimeo.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Vimeo seems to have no playlists?
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<String> {
        unsafe {




            let ret = &VIDEO_TITLE;
            Ok(ret.to_string())
        }
    }

    fn find_video_direct_url<'a>(
        &'a self,

        url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        let id_regex = Regex::new(r"(?:vimeo.com/)(.*$)").unwrap();
        let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str();
        unsafe {
            let video_info = get_video_info(id)?;
            let video_info_streams_progressive =
                match video_info["request"]["files"]["progressive"].as_array() {
                    None => return Ok("".to_string()),
                    Some(streams) => streams,
                };

            // Vimeo makes it easy for us, as the size grows with the quality.
            // Thus, we can just take the largest width here.
            let mut url = "";
            let mut width = 0u64;
            for stream in video_info_streams_progressive.iter() {
                let this_width = stream["width"].as_u64().unwrap_or(0);
                if this_width > width {
                    width = this_width;
                    url = stream["url"].as_str().unwrap();
                }
            }

            Ok(url.to_string())
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<bool> {
        unsafe {




            let _video_info = get_video_info(url);
            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "Vimeo".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }








|
<

|
|
















|












|



|















|
|
>
>
>
>
|
|
<




>






<
|
|
|
|
|
|

|
|
|
|
|
|
|
|
|
|
|

|
|
|
<
|
|
>
>
>
>
|
|
<








>







18
19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
94
95
96

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118

119
120
121
122
123
124
125
126

127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142

use crate::definitions::SiteDefinition;

use anyhow::Result;
use regex::Regex;
use serde_json::Value;

use crate::VIDEO;


fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Value> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // Those are hidden behing a config file defined in the page source code.
        // Search for: window.vimeo.clip_page_config.player = {"config_url":"(.+?)"
        let req = ureq::get(url).call()?;
        let body = req.into_string()?;
        let re =
            Regex::new("window.vimeo.clip_page_config.player = .\"config_url\":\"(?P<URL>.+?)\"")
                .unwrap();
        let search = re.captures(&body).unwrap();

        // While we're grepping the source code: Vimeo also hides
        // the video title here.
        let title_re =
            Regex::new("<meta property=\"og:title\" content=\"(?P<TITLE>.+?)\"").unwrap();
        let title_search = title_re.captures(&body).unwrap();
        let video_title = title_search.name("TITLE").map_or("", |t| t.as_str());
        video.title = video_title.to_string();

        // If yaydl stops here, the URL is invalid.
        // TODO: That should be more obvious to the user.
        let video_info_url = search
            .name("URL")
            .map_or("", |u| u.as_str())
            .replace("\\", "");

        // The "config_url" body is a JSON structure.
        // Grab and store it:
        let config_req = ureq::get(&video_info_url).call()?;
        let config_body = config_req.into_string()?;
        video.info.push_str(config_body.as_str());
    }

    // Return it:
    let v: Value = serde_json::from_str(&video.info)?;
    Ok(v)
}

// Implement the site definition:
struct VimeoHandler;
impl SiteDefinition for VimeoHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"(?:www\.)?vimeo.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Vimeo seems to have no playlists?
        Ok(false)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
    ) -> Result<String> {
        let ret = &video.title;
        Ok(ret.to_string())

    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        let id_regex = Regex::new(r"(?:vimeo.com/)(.*$)").unwrap();
        let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str();

        let video_info = get_video_info(video, id)?;
        let video_info_streams_progressive =
            match video_info["request"]["files"]["progressive"].as_array() {
                None => return Ok("".to_string()),
                Some(streams) => streams,
            };

        // Vimeo makes it easy for us, as the size grows with the quality.
        // Thus, we can just take the largest width here.
        let mut url = "";
        let mut width = 0u64;
        for stream in video_info_streams_progressive.iter() {
            let this_width = stream["width"].as_u64().unwrap_or(0);
            if this_width > width {
                width = this_width;
                url = stream["url"].as_str().unwrap();
            }
        }

        Ok(url.to_string())
    }


    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<bool> {
        let _video_info = get_video_info(video, url);
        Ok(!video.info.is_empty())

    }

    fn display_name<'a>(&'a self) -> String {
        "Vimeo".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

Changes to src/handlers/vivo.rs.

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57




58
59
60
61
62
63
64
65
66
67
68
69

70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94




95
96
97
98
99
100
101
102
103
104
105

106
107
108
109
110
111
112

use anyhow::Result;
use cienli::ciphers::rot::{Rot, RotType};
use regex::Regex;
use scraper::{Html, Selector};
use urlencoding::decode;

static mut VIDEO_INFO: String = String::new();

unsafe fn get_video_info(url: &str) -> Result<Html> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let req = ureq::get(&url).call()?;
        let body = req.into_string()?;

        VIDEO_INFO = body;
    }

    // Return it:
    let d = Html::parse_document(&VIDEO_INFO);
    Ok(d)
}

// Implement the site definition:
struct VivoHandler;
impl SiteDefinition for VivoHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"vivo.sx/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Vivo has no playlists.
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<String> {
        unsafe {




            let video_info = get_video_info(url)?;

            let title_selector = Selector::parse("div.stream-content").unwrap();
            let title_elem = video_info.select(&title_selector).next().unwrap();
            let title_contents = title_elem.value().attr("data-name").unwrap();

            Ok(title_contents.to_string())
        }
    }

    fn find_video_direct_url<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        // VIVO displays the stream URL only after executing JavaScript.
        // It is buried inside the source code and ROT47-encrypted. Bah... :-)
        unsafe {
            let src_re = Regex::new("source: '(?P<SOURCE>.+?)',").unwrap();
            let src_search = src_re.captures(&VIDEO_INFO).unwrap();
            let video_src = src_search.name("SOURCE").map_or("", |t| t.as_str());

            // URL decoding:
            let url_decoded = match decode(video_src) {
                Ok(u) => u,
                _ => unreachable!(),
            };

            // un-ROT47:
            let unrotated = Rot::new(&url_decoded, RotType::Rot47);
            Ok(unrotated.decipher().to_string())
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<bool> {
        unsafe {




            let _video_info = get_video_info(url);
            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "VIVO".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }








|

|
|




|
<


<
|















|
|
>
>
>
>
|

|
|
|

|
<




>






<
|
|
|

|
|
|
|
|

|
|
|
|
|
<
|
|
>
>
>
>
|
|
<








>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35

36
37

38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66

67
68
69
70
71
72
73
74
75
76
77

78
79
80
81
82
83
84
85
86
87
88
89
90
91
92

93
94
95
96
97
98
99
100

101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116

use anyhow::Result;
use cienli::ciphers::rot::{Rot, RotType};
use regex::Regex;
use scraper::{Html, Selector};
use urlencoding::decode;

use crate::VIDEO;

fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let req = ureq::get(&url).call()?;
        let body = req.into_string()?;
        video.info.push_str(body.as_str());

    }


    let d = Html::parse_document(&video.info);
    Ok(d)
}

// Implement the site definition:
struct VivoHandler;
impl SiteDefinition for VivoHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"vivo.sx/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Vivo has no playlists.
        Ok(false)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<String> {
        let video_info = get_video_info(video, url)?;

        let title_selector = Selector::parse("div.stream-content").unwrap();
        let title_elem = video_info.select(&title_selector).next().unwrap();
        let title_contents = title_elem.value().attr("data-name").unwrap();

        Ok(title_contents.to_string())

    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        // VIVO displays the stream URL only after executing JavaScript.
        // It is buried inside the source code and ROT47-encrypted. Bah... :-)

        let src_re = Regex::new("source: '(?P<SOURCE>.+?)',").unwrap();
        let src_search = src_re.captures(&video.info).unwrap();
        let video_src = src_search.name("SOURCE").map_or("", |t| t.as_str());

        // URL decoding:
        let url_decoded = match decode(video_src) {
            Ok(u) => u,
            _ => unreachable!(),
        };

        // un-ROT47:
        let unrotated = Rot::new(&url_decoded, RotType::Rot47);
        Ok(unrotated.decipher().to_string())
    }


    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<bool> {
        let _video_info = get_video_info(video, url);
        Ok(!video.info.is_empty())

    }

    fn display_name<'a>(&'a self) -> String {
        "VIVO".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

Changes to src/handlers/voe.rs.

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55




56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71

72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87




88
89
90
91
92
93
94
95
96
97
98

99
100
101
102
103
104
105

use crate::definitions::SiteDefinition;

use anyhow::{anyhow, Result};
use regex::Regex;
use scraper::{Html, Selector};

static mut VIDEO_INFO: String = String::new();

unsafe fn get_video_info(url: &str) -> Result<Html> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let req = ureq::get(&url).call()?;
        let body = req.into_string()?;

        VIDEO_INFO = body;
    }

    // Return it:
    let d = Html::parse_document(&VIDEO_INFO);
    Ok(d)
}

// Implement the site definition:
struct VoeHandler;
impl SiteDefinition for VoeHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"(?:\.)?voe.sx/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // TODO: Does VOE still have playlists?
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<String> {
        unsafe {




            let video_info = get_video_info(url)?;

            let h1_selector = Selector::parse("h1.mt-1").unwrap();
            let text = video_info.select(&h1_selector).next();

            let result = match text {
                Some(txt) => txt.text().collect(),
                None => return Err(anyhow!("Erroneous video site - maybe embed-only?")),
            };

            Ok(result)
        }
    }

    fn find_video_direct_url<'a>(
        &'a self,

        url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        unsafe {
            let _video_info = get_video_info(url)?;
            let url_re = Regex::new("sources: ..src: '(?P<URL>.+?)'").unwrap();
            let url_search = url_re.captures(&VIDEO_INFO).unwrap();
            let video_url = url_search.name("URL").map_or("", |u| u.as_str());

            Ok(video_url.to_string())
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<bool> {
        unsafe {




            let _video_info = get_video_info(url);
            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "Voe".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }








|

|
|





|



|















|
|
>
>
>
>
|

|
|

|
|
|
|

|
|
|
<


>




<
|
|
|
|

|
|
|
<
|
|
>
>
>
>
|
|
<








>







18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72

73
74
75
76
77
78
79

80
81
82
83
84
85
86
87

88
89
90
91
92
93
94
95

96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111

use crate::definitions::SiteDefinition;

use anyhow::{anyhow, Result};
use regex::Regex;
use scraper::{Html, Selector};

use crate::VIDEO;

fn get_video_info(video: &mut VIDEO, url: &str) -> Result<Html> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let req = ureq::get(&url).call()?;
        let body = req.into_string()?;

        video.info = body;
    }

    // Return it:
    let d = Html::parse_document(&video.info);
    Ok(d)
}

// Implement the site definition:
struct VoeHandler;
impl SiteDefinition for VoeHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"(?:\.)?voe.sx/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // TODO: Does VOE still have playlists?
        Ok(false)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<String> {
        let video_info = get_video_info(video, url)?;

        let h1_selector = Selector::parse("h1.mt-1").unwrap();
        let text = video_info.select(&h1_selector).next();

        let result = match text {
            Some(txt) => txt.text().collect(),
            None => return Err(anyhow!("Erroneous video site - maybe embed-only?")),
        };

        Ok(result)
    }


    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {

        let _video_info = get_video_info(video, url)?;
        let url_re = Regex::new("sources: ..src: '(?P<URL>.+?)'").unwrap();
        let url_search = url_re.captures(&video.info).unwrap();
        let video_url = url_search.name("URL").map_or("", |u| u.as_str());

        Ok(video_url.to_string())
    }


    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<bool> {
        let _video_info = get_video_info(video, url);
        Ok(!video.info.is_empty())

    }

    fn display_name<'a>(&'a self) -> String {
        "Voe".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

Changes to src/handlers/watchmdh.rs.

20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47

48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72


73



74
75
76
77
78
79
80
81
82
83
84

85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125




126
127
128
129
130
131
132
133
134
135
136

137
138
139
140
141
142
143

use anyhow::Result;
use fantoccini::ClientBuilder;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;

static mut VIDEO_INFO: String = String::new();

unsafe fn get_video_info(url: &str, webdriver_port: u16) -> Result<Html> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");
            let body = c.source().await.expect("could not read the site source");

            c.close_window().await.expect("could not close the window");

            VIDEO_INFO = body;
        });
    }

    // Return it:
    let d = Html::parse_document(&VIDEO_INFO);
    Ok(d)
}

// Implement the site definition:
struct WatchMDHHandler;
impl SiteDefinition for WatchMDHHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"watchmdh.to/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // WatchMDH has no playlists.
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<String> {
        unsafe {


            let video_info = get_video_info(url, webdriver_port)?;




            let title_selector = Selector::parse(r#"meta[property="og:title"]"#).unwrap();
            let title_elem = video_info.select(&title_selector).next().unwrap();
            let title_contents = title_elem.value().attr("content").unwrap();

            Ok(title_contents.to_string())
        }
    }

    fn find_video_direct_url<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        unsafe {
            // Find the best video format and the rnd value:
            let re_rnd = Regex::new(r"rnd: '(\d+)'").unwrap();
            let rnd = re_rnd
                .captures(&VIDEO_INFO)
                .unwrap()
                .get(1)
                .unwrap()
                .as_str();

            let re_vid1 = Regex::new("video_alt_url: 'function/0/(.+?)',").unwrap();
            let re_vid2 = Regex::new("video_url: 'function/0/(.+?)',").unwrap();

            let url_contents;

            if re_vid1.is_match(&VIDEO_INFO) {
                url_contents = re_vid1
                    .captures(&VIDEO_INFO)
                    .unwrap()
                    .get(1)
                    .unwrap()
                    .as_str();
            } else {
                url_contents = re_vid2
                    .captures(&VIDEO_INFO)
                    .unwrap()
                    .get(1)
                    .unwrap()
                    .as_str();
            }

            Ok(String::from(url_contents) + "?rnd=" + rnd)
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool> {
        unsafe {




            let _video_info = get_video_info(url, webdriver_port);
            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "WatchMDH".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }








|

|
|

















>

<
<



<
<
|














|
|
>
>
|
>
>
>

|
|
|

|
<




>




<
|
|
|
|
|
|
|
|

|
|

|

|
|
|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
<
|
|
>
>
>
>
|
|
<








>







20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49


50
51
52


53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81

82
83
84
85
86
87
88
89
90

91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123

124
125
126
127
128
129
130
131

132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147

use anyhow::Result;
use fantoccini::ClientBuilder;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;

use crate::VIDEO;

fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");
            let body = c.source().await.expect("could not read the site source");
            video.info.push_str(body.as_str());
            c.close_window().await.expect("could not close the window");


        });
    }



    Ok(true)
}

// Implement the site definition:
struct WatchMDHHandler;
impl SiteDefinition for WatchMDHHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"watchmdh.to/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // WatchMDH has no playlists.
        Ok(false)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url, webdriver_port)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        let title_selector = Selector::parse(r#"meta[property="og:title"]"#).unwrap();
        let title_elem = video_info_html.select(&title_selector).next().unwrap();
        let title_contents = title_elem.value().attr("content").unwrap();

        Ok(title_contents.to_string())

    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {

        // Find the best video format and the rnd value:
        let re_rnd = Regex::new(r"rnd: '(\d+)'").unwrap();
        let rnd = re_rnd
            .captures(&video.info)
            .unwrap()
            .get(1)
            .unwrap()
            .as_str();

        let re_vid1 = Regex::new("video_alt_url: 'function/0/(.+?)',").unwrap();
        let re_vid2 = Regex::new("video_url: 'function/0/(.+?)',").unwrap();

        let url_contents;

        if re_vid1.is_match(&video.info) {
            url_contents = re_vid1
                .captures(&video.info)
                .unwrap()
                .get(1)
                .unwrap()
                .as_str();
        } else {
            url_contents = re_vid2
                .captures(&video.info)
                .unwrap()
                .get(1)
                .unwrap()
                .as_str();
        }

        Ok(String::from(url_contents) + "?rnd=" + rnd)
    }


    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<bool> {
        let _video_info = get_video_info(video, url, webdriver_port);
        Ok(!video.info.is_empty())

    }

    fn display_name<'a>(&'a self) -> String {
        "WatchMDH".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

Changes to src/handlers/xhamster.rs.

22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49

50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74


75



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90

91
92
93
94
95
96

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128




129
130
131
132
133
134
135
136
137
138
139

140
141
142
143
144
145
146
use fantoccini::ClientBuilder;
use nom::Finish;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;
use url::Url;

static mut VIDEO_INFO: String = String::new();

unsafe fn get_video_info(url: &str, webdriver_port: u16) -> Result<Html> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");
            let body = c.source().await.expect("could not read the site source");

            c.close_window().await.expect("could not close the window");

            VIDEO_INFO = body;
        });
    }

    // Return it:
    let d = Html::parse_document(&VIDEO_INFO);
    Ok(d)
}

// Implement the site definition:
struct XHamsterHandler;
impl SiteDefinition for XHamsterHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"xhamster.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // xHamster has playlists.
        Ok(true)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<String> {
        unsafe {


            let video_info = get_video_info(url, webdriver_port)?;




            let h1_selector = Selector::parse("h1").unwrap();
            let text = video_info.select(&h1_selector).next();

            let result = match text {
                Some(txt) => txt.text().collect(),
                None => return Err(anyhow!("Could not extract the video title.")),
            };

            Ok(result)
        }
    }

    fn find_video_direct_url<'a>(
        &'a self,

        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        unsafe {
            let video_info = get_video_info(url, webdriver_port)?;


            // Find the playlist first:
            let url_selector = Selector::parse(r#"link[rel="preload"][as="fetch"]"#).unwrap();
            let url_elem = video_info.select(&url_selector).next().unwrap();
            let url_contents = url_elem.value().attr("href").unwrap();

            let mut playlist_url = Url::parse(url_contents)?;
            let request = ureq::get(playlist_url.as_str());
            let playlist_text = request.call()?.into_string()?;

            // Parse the playlist:
            let playlist = m3u8_rs::parse_media_playlist(&playlist_text.as_bytes())
                .finish()
                .unwrap();

            // Grab the last (= best) segment from the media playlist to find the video "playlist"
            // (which contains all segments of the video):
            let video_uri = &playlist.1.segments.last().ok_or("").unwrap().uri;

            // xHamster uses relative URIs in its playlists, so we'll only need to replace
            // the last URL segment:
            playlist_url
                .path_segments_mut()
                .unwrap()
                .pop()
                .push(video_uri);
            Ok(playlist_url.to_string())
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool> {
        unsafe {




            let _video_info = get_video_info(url, webdriver_port);
            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "xHamster".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("ts".to_string())
    }








|

|
|

















>

<
<



<
<
|














|
|
>
>
|
>
>
>

|
|

|
|
|
|

|
|
|
<


>




<
|
>

|
|
|
|

|
|
|

|
|
|
|

|
|
|

|
|
|
|
|
|
|
|
|
|
<
|
|
>
>
>
>
|
|
<








>







22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51


52
53
54


55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89

90
91
92
93
94
95
96

97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127

128
129
130
131
132
133
134
135

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
use fantoccini::ClientBuilder;
use nom::Finish;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;
use url::Url;

use crate::VIDEO;

fn get_video_info(video: &mut VIDEO, url: &str, webdriver_port: u16) -> Result<bool> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");
            let body = c.source().await.expect("could not read the site source");
            video.info.push_str(body.as_str());
            c.close_window().await.expect("could not close the window");


        });
    }



    Ok(true)
}

// Implement the site definition:
struct XHamsterHandler;
impl SiteDefinition for XHamsterHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"xhamster.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // xHamster has playlists.
        Ok(true)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<String> {
        let _not_used = get_video_info(video, url, webdriver_port)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        let h1_selector = Selector::parse("h1").unwrap();
        let text = video_info_html.select(&h1_selector).next();

        let result = match text {
            Some(txt) => txt.text().collect(),
            None => return Err(anyhow!("Could not extract the video title.")),
        };

        Ok(result)
    }


    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {

        let _not_used = get_video_info(video, url, webdriver_port)?;
        let video_info_html = Html::parse_document(video.info.as_str());

        // Find the playlist first:
        let url_selector = Selector::parse(r#"link[rel="preload"][as="fetch"]"#).unwrap();
        let url_elem = video_info_html.select(&url_selector).next().unwrap();
        let url_contents = url_elem.value().attr("href").unwrap();

        let mut playlist_url = Url::parse(url_contents)?;
        let request = ureq::get(playlist_url.as_str());
        let playlist_text = request.call()?.into_string()?;

        // Parse the playlist:
        let playlist = m3u8_rs::parse_media_playlist(&playlist_text.as_bytes())
            .finish()
            .unwrap();

        // Grab the last (= best) segment from the media playlist to find the video "playlist"
        // (which contains all segments of the video):
        let video_uri = &playlist.1.segments.last().ok_or("").unwrap().uri;

        // xHamster uses relative URIs in its playlists, so we'll only need to replace
        // the last URL segment:
        playlist_url
            .path_segments_mut()
            .unwrap()
            .pop()
            .push(video_uri);
        Ok(playlist_url.to_string())
    }


    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        webdriver_port: u16,
    ) -> Result<bool> {
        let _video_info = get_video_info(video, url, webdriver_port);
        Ok(!video.info.is_empty())

    }

    fn display_name<'a>(&'a self) -> String {
        "xHamster".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("ts".to_string())
    }

Changes to src/handlers/youtube.rs.

18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64





65
66
67
68
69
70
71
72
73
74
75
76

77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166



167

168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183

184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209

use crate::definitions::SiteDefinition;

use anyhow::Result;
use regex::Regex;
use serde_json::{json, Value};

static mut VIDEO_INFO: String = String::new();
static mut VIDEO_MIME: String = String::new();

unsafe fn get_video_info(id: &str) -> Result<Value> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        let video_url = "https://youtubei.googleapis.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
        let req = ureq::post(&video_url).send_json(ureq::json!({
            "videoId": id,
            "context": {
                "client": {
                    "clientName": "ANDROID",
                    "clientVersion": "16.02"
                }
            }
        }))?;
        let body = req.into_string()?;
        VIDEO_INFO = body;
    }

    // Return it:
    let v: Value = serde_json::from_str(&VIDEO_INFO)?;
    Ok(v)
}

// Implement the site definition:
struct YouTubeHandler;
impl SiteDefinition for YouTubeHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"(?:www\.)?youtu(?:be\.com|\.be)/")
            .unwrap()
            .is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // YouTube has broken domains, but no playlists. :-)
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<String> {





        let id_regex = Regex::new(r"(?:v=|\.be/)(.*?)(&.*)*$").unwrap();
        let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str();
        unsafe {
            let video_info = get_video_info(id)?;
            let video_info_title = video_info["videoDetails"]["title"].as_str().unwrap_or("");

            Ok(String::from(video_info_title))
        }
    }

    fn find_video_direct_url<'a>(
        &'a self,

        url: &'a str,
        _webdriver_port: u16,
        onlyaudio: bool,
    ) -> Result<String> {
        let id_regex = Regex::new(r"(?:v=|\.be/)(.*?)(&.*)*$").unwrap();
        let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str();
        unsafe {
            let video_info = get_video_info(id)?;
            let video_info_itags = match video_info["streamingData"]["formats"].as_array() {
                None => return Ok("".to_string()),
                Some(itags) => itags,
            };
            let video_info_itags_adaptive =
                match video_info["streamingData"]["adaptiveFormats"].as_array() {
                    None => return Ok("".to_string()),
                    Some(itags) => itags,
                };

            let mut url_to_choose = "";

            // Finding the least horrible combination of video and audio:
            let vq1 = "tiny";
            let vq2 = "small";
            let vq3 = "medium";
            let vq4 = "large";
            let vq5 = "hd720";
            let vq6 = "hd1080";
            let mut last_vq = "".to_string();

            let aq1 = "AUDIO_QUALITY_LOW";
            let aq2 = "AUDIO_QUALITY_MEDIUM";
            let aq3 = "AUDIO_QUALITY_HIGH";
            let mut last_aq = "".to_string();

            for itag in video_info_itags.iter().chain(video_info_itags_adaptive) {
                // The highest quality wins.
                let this_aq = itag["audioQuality"].as_str().unwrap_or("");
                let this_vq = itag["quality"].as_str().unwrap_or("");

                let is_better_audio = (last_aq.is_empty() && !this_aq.is_empty())
                    || (last_aq == aq1 && (this_aq == aq2 || this_aq == aq3))
                    || (last_aq == aq2 && this_aq == aq3);
                let is_same_or_better_audio = (last_aq == this_aq) || is_better_audio;

                let is_better_video = (last_vq.is_empty() && !this_vq.is_empty())
                    || (last_vq == vq1
                        && (this_vq == vq2
                            || this_vq == vq3
                            || this_vq == vq4
                            || this_vq == vq5
                            || this_vq == vq6))
                    || (last_vq == vq2
                        && (this_vq == vq3 || this_vq == vq4 || this_vq == vq5 || this_vq == vq6))
                    || (last_vq == vq3 && (this_vq == vq4 || this_vq == vq5 || this_vq == vq6))
                    || (last_vq == vq4 && (this_vq == vq5 || this_vq == vq6))
                    || (last_vq == vq5 && this_vq == vq6);
                let is_same_or_better_video = (last_vq == this_vq) || is_better_video;

                let is_better_quality = (is_better_audio && is_same_or_better_video)
                    || (is_better_video && is_same_or_better_audio)
                    || (onlyaudio && is_better_audio);

                // If audio: Try to download the best audio quality.
                // If video: Try to download the best combination.
                if (onlyaudio && itag["mimeType"].to_string().contains("audio/")
                    || !onlyaudio && itag["mimeType"].to_string().contains("video/"))
                    && (!onlyaudio || itag["quality"] != json!(null))
                    && itag["audioQuality"] != json!(null)
                    && (onlyaudio && this_vq.is_empty()
                        || !onlyaudio && last_vq.is_empty() && !this_vq.is_empty())
                    || is_better_quality
                {
                    VIDEO_MIME = itag["mimeType"].to_string();
                    url_to_choose = itag["url"].as_str().unwrap();

                    last_vq = String::from(this_vq);
                    last_aq = String::from(this_aq);
                }
            }

            if url_to_choose.is_empty() {
                Err(anyhow::Error::msg(
                    "Could not find a working itag - aborting.".to_string(),
                ))
            } else {
                Ok(url_to_choose.to_string())
            }
        }
    }




    fn does_video_exist<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<bool> {

        let id_regex = Regex::new(r"(?:v=|\.be\/)(.*?)(&.*)*$").unwrap();
        let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str();
        unsafe {
            let video_info = get_video_info(id)?;
            let video_info_is_playable = video_info["playabilityStatus"]["status"] == json!("OK");
            let video_info_has_details = video_info["videoDetails"] != json!(null);
            Ok(video_info_has_details && video_info_is_playable)
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "YouTube".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,

        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        // By this point, we have already filled VIDEO_MIME. Let's just use that.
        unsafe {
            let mut ext = "mp4";
            if VIDEO_MIME.contains("/webm") {
                ext = "webm";
            } else if VIDEO_MIME.contains("audio/mp4") {
                ext = "m4a";
            }

            Ok(ext.to_string())
        }
    }

    fn web_driver_required<'a>(&'a self) -> bool {
        false
    }
}

// Push the site definition to the list of known handlers:
inventory::submit! {
    &YouTubeHandler as &dyn SiteDefinition
}







|
<

|
|












|



|

















|
>
>
>
>
>


<
|
|

|
<




>






<
|
|
|
|
|
|
|
|
|
|

|

|
|
|
|
|
|
|
|

|
|
|
|

|
|
|
|

|
|
|
|

|
|
|
|
|
|
|
|
|
|
|
|
|

|
|
|

|
|
|
|
|
|
|
|
|
|
|
|

|
|
|
|

|
|
|
|
|
|
|
|
|
|
>
>
>
|
>


<
|
|
|
|
<








>





<
|
|
|
|
|
|

|
<











18
19
20
21
22
23
24
25

26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70

71
72
73
74

75
76
77
78
79
80
81
82
83
84
85

86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175

176
177
178
179

180
181
182
183
184
185
186
187
188
189
190
191
192
193

194
195
196
197
198
199
200
201

202
203
204
205
206
207
208
209
210
211
212

use crate::definitions::SiteDefinition;

use anyhow::Result;
use regex::Regex;
use serde_json::{json, Value};

use crate::VIDEO;


fn get_video_info(video: &mut VIDEO, id: &str) -> Result<Value> {
    if video.info.is_empty() {
        // We need to fetch the video information first.
        let video_url = "https://youtubei.googleapis.com/youtubei/v1/player?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8";
        let req = ureq::post(&video_url).send_json(ureq::json!({
            "videoId": id,
            "context": {
                "client": {
                    "clientName": "ANDROID",
                    "clientVersion": "16.02"
                }
            }
        }))?;
        let body = req.into_string()?;
        video.info.push_str(body.as_str());
    }

    // Return it:
    let v: Value = serde_json::from_str(&video.info)?;
    Ok(v)
}

// Implement the site definition:
struct YouTubeHandler;
impl SiteDefinition for YouTubeHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"(?:www\.)?youtu(?:be\.com|\.be)/")
            .unwrap()
            .is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // YouTube has broken domains, but no playlists. :-)
        Ok(false)
    }

    fn find_video_title<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<String> {
        let id_regex = Regex::new(r"(?:v=|\.be/)(.*?)(&.*)*$").unwrap();
        let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str();

        let video_info = get_video_info(video, id)?;
        let video_info_title = video_info["videoDetails"]["title"].as_str().unwrap_or("");

        Ok(String::from(video_info_title))

    }

    fn find_video_direct_url<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
        onlyaudio: bool,
    ) -> Result<String> {
        let id_regex = Regex::new(r"(?:v=|\.be/)(.*?)(&.*)*$").unwrap();
        let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str();

        let video_info = get_video_info(video, id)?;
        let video_info_itags = match video_info["streamingData"]["formats"].as_array() {
            None => return Ok("".to_string()),
            Some(itags) => itags,
        };
        let video_info_itags_adaptive =
            match video_info["streamingData"]["adaptiveFormats"].as_array() {
                None => return Ok("".to_string()),
                Some(itags) => itags,
            };

        let mut url_to_choose = "";

        // Finding the least horrible combination of video and audio:
        let vq1 = "tiny";
        let vq2 = "small";
        let vq3 = "medium";
        let vq4 = "large";
        let vq5 = "hd720";
        let vq6 = "hd1080";
        let mut last_vq = "".to_string();

        let aq1 = "AUDIO_QUALITY_LOW";
        let aq2 = "AUDIO_QUALITY_MEDIUM";
        let aq3 = "AUDIO_QUALITY_HIGH";
        let mut last_aq = "".to_string();

        for itag in video_info_itags.iter().chain(video_info_itags_adaptive) {
            // The highest quality wins.
            let this_aq = itag["audioQuality"].as_str().unwrap_or("");
            let this_vq = itag["quality"].as_str().unwrap_or("");

            let is_better_audio = (last_aq.is_empty() && !this_aq.is_empty())
                || (last_aq == aq1 && (this_aq == aq2 || this_aq == aq3))
                || (last_aq == aq2 && this_aq == aq3);
            let is_same_or_better_audio = (last_aq == this_aq) || is_better_audio;

            let is_better_video = (last_vq.is_empty() && !this_vq.is_empty())
                || (last_vq == vq1
                    && (this_vq == vq2
                        || this_vq == vq3
                        || this_vq == vq4
                        || this_vq == vq5
                        || this_vq == vq6))
                || (last_vq == vq2
                    && (this_vq == vq3 || this_vq == vq4 || this_vq == vq5 || this_vq == vq6))
                || (last_vq == vq3 && (this_vq == vq4 || this_vq == vq5 || this_vq == vq6))
                || (last_vq == vq4 && (this_vq == vq5 || this_vq == vq6))
                || (last_vq == vq5 && this_vq == vq6);
            let is_same_or_better_video = (last_vq == this_vq) || is_better_video;

            let is_better_quality = (is_better_audio && is_same_or_better_video)
                || (is_better_video && is_same_or_better_audio)
                || (onlyaudio && is_better_audio);

            // If audio: Try to download the best audio quality.
            // If video: Try to download the best combination.
            if (onlyaudio && itag["mimeType"].to_string().contains("audio/")
                || !onlyaudio && itag["mimeType"].to_string().contains("video/"))
                && (!onlyaudio || itag["quality"] != json!(null))
                && itag["audioQuality"] != json!(null)
                && (onlyaudio && this_vq.is_empty()
                    || !onlyaudio && last_vq.is_empty() && !this_vq.is_empty())
                || is_better_quality
            {
                video.mime = itag["mimeType"].to_string();
                url_to_choose = itag["url"].as_str().unwrap();

                last_vq = String::from(this_vq);
                last_aq = String::from(this_aq);
            }
        }

        if url_to_choose.is_empty() {
            Err(anyhow::Error::msg(
                "Could not find a working itag - aborting.".to_string(),
            ))
        } else {
            Ok(url_to_choose.to_string())
        }
    }

    fn does_video_exist<'a>(
        &'a self,
        video: &'a mut VIDEO,
        url: &'a str,
        _webdriver_port: u16,
    ) -> Result<bool> {
        let id_regex = Regex::new(r"(?:v=|\.be\/)(.*?)(&.*)*$").unwrap();
        let id = id_regex.captures(url).unwrap().get(1).unwrap().as_str();

        let video_info = get_video_info(video, id)?;
        let video_info_is_playable = video_info["playabilityStatus"]["status"] == json!("OK");
        let video_info_has_details = video_info["videoDetails"] != json!(null);
        Ok(video_info_has_details && video_info_is_playable)

    }

    fn display_name<'a>(&'a self) -> String {
        "YouTube".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        video: &'a mut VIDEO,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        // By this point, we have already filled VIDEO_MIME. Let's just use that.

        let mut ext = "mp4";
        if video.mime.contains("/webm") {
            ext = "webm";
        } else if video.mime.contains("audio/mp4") {
            ext = "m4a";
        }

        Ok(ext.to_string())

    }

    fn web_driver_required<'a>(&'a self) -> bool {
        false
    }
}

// Push the site definition to the list of known handlers:
inventory::submit! {
    &YouTubeHandler as &dyn SiteDefinition
}

Changes to src/main.rs.

58
59
60
61
62
63
64










65
66
67
68
69
70
71
72






73
74
75
76
77
78
79

    #[clap(long, help = "The port of your web driver (required for some sites)")]
    webdriver: Option<u16>,

    #[clap(help = "Sets the input URL to use", index = 1)]
    url: String,
}











fn main() -> Result<()> {
    // Argument parsing:
    let args = Args::parse();

    let in_url = &args.url;
    inventory::collect!(&'static dyn definitions::SiteDefinition);
    let mut site_def_found = false;







    for handler in inventory::iter::<&dyn definitions::SiteDefinition> {
        // "15:15 And he found a pair of eyes, scanning the directories for files."
        // https://kingjamesprogramming.tumblr.com/post/123368869357/1515-and-he-found-a-pair-of-eyes-scanning-the
        // ------------------------------------

        // Find a known handler for <in_url>:







>
>
>
>
>
>
>
>
>
>








>
>
>
>
>
>







58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95

    #[clap(long, help = "The port of your web driver (required for some sites)")]
    webdriver: Option<u16>,

    #[clap(help = "Sets the input URL to use", index = 1)]
    url: String,
}

// #[derive(Debug)]
// usage:
// let v = VIDEO{info: String::new(), title:String::new(), mime:String::new()};
// println!("{:#?}",v);
pub struct VIDEO {
    info: String,
    title: String,
    mime: String,
}

fn main() -> Result<()> {
    // Argument parsing:
    let args = Args::parse();

    let in_url = &args.url;
    inventory::collect!(&'static dyn definitions::SiteDefinition);
    let mut site_def_found = false;

    let mut video = VIDEO {
        info: String::new(),
        title: String::new(),
        mime: String::new(),
    };

    for handler in inventory::iter::<&dyn definitions::SiteDefinition> {
        // "15:15 And he found a pair of eyes, scanning the directories for files."
        // https://kingjamesprogramming.tumblr.com/post/123368869357/1515-and-he-found-a-pair-of-eyes-scanning-the
        // ------------------------------------

        // Find a known handler for <in_url>:
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128





129


130


131
132
133
134
135
136
137

        if handler.web_driver_required() && webdriverport == 0 {
            // This handler would need a web driver, but none is supplied to yaydl.
            println!("{} requires a web driver installed and running as described in the README. Please tell yaydl which port to use (yaydl --webdriver <PORT>) and try again.", handler.display_name());
            continue;
        }

        let video_exists = handler.does_video_exist(in_url, webdriverport)?;
        if !video_exists {
            println!("The video could not be found. Invalid link?");
        } else {
            if args.verbose {
                println!("The requested video was found. Processing...");
            }

            let video_title = handler.find_video_title(in_url, webdriverport);
            let vt = match video_title {
                Err(_e) => "".to_string(),
                Ok(title) => title,
            };

            // Usually, we already find errors here.
            if vt.is_empty() {
                println!("The video title could not be extracted. Invalid link?");
            } else {
                if args.verbose {
                    println!("Title: {}", vt);
                }

                let url = handler.find_video_direct_url(in_url, webdriverport, args.onlyaudio)?;





                let ext =


                    handler.find_video_file_extension(in_url, webdriverport, args.onlyaudio)?;



                // Now let's download it:
                let mut targetfile = format!(
                    "{}.{}",
                    vt.trim()
                        .replace(&['|', '\'', '\"', ':', '\'', '\\', '/'][..], r#""#),
                    ext







|







|













|
>
>
>
>
>
|
>
>
|
>
>







115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162

        if handler.web_driver_required() && webdriverport == 0 {
            // This handler would need a web driver, but none is supplied to yaydl.
            println!("{} requires a web driver installed and running as described in the README. Please tell yaydl which port to use (yaydl --webdriver <PORT>) and try again.", handler.display_name());
            continue;
        }

        let video_exists = handler.does_video_exist(&mut video, in_url, webdriverport)?;
        if !video_exists {
            println!("The video could not be found. Invalid link?");
        } else {
            if args.verbose {
                println!("The requested video was found. Processing...");
            }

            let video_title = handler.find_video_title(&mut video, in_url, webdriverport);
            let vt = match video_title {
                Err(_e) => "".to_string(),
                Ok(title) => title,
            };

            // Usually, we already find errors here.
            if vt.is_empty() {
                println!("The video title could not be extracted. Invalid link?");
            } else {
                if args.verbose {
                    println!("Title: {}", vt);
                }

                let url = handler.find_video_direct_url(
                    &mut video,
                    in_url,
                    webdriverport,
                    args.onlyaudio,
                )?;
                let ext = handler.find_video_file_extension(
                    &mut video,
                    in_url,
                    webdriverport,
                    args.onlyaudio,
                )?;

                // Now let's download it:
                let mut targetfile = format!(
                    "{}.{}",
                    vt.trim()
                        .replace(&['|', '\'', '\"', ':', '\'', '\\', '/'][..], r#""#),
                    ext