Yet Another Youtube Down Loader

⌈⌋ ⎇ branch:  yaydl


Check-in [1bdfe0346e]

Many hyperlinks are disabled.
Use anonymous login to enable hyperlinks.

Overview
Comment:yaydl 0.11: Added spankbang handler (cheers, @egdv from GitHub), updated dependencies.
Downloads: Tarball | ZIP archive | SQL archive
Timelines: family | ancestors | descendants | both | trunk | release-0.11.0
Files: files | file ages | folders
SHA3-256: 1bdfe0346e4c1b1bcf7dc9baa894f2fd90789de8bb24b8eb8be2fdf57b2b689c
User & Date: Cthulhux 2022-07-21 00:34:37
Context
2022-07-28
01:52
yaydl 0.11.1: getting rid of unsafe { }. check-in: ebb5df12f3 user: Cthulhux tags: trunk, release-0.11.1
2022-07-21
00:34
yaydl 0.11: Added spankbang handler (cheers, @egdv from GitHub), updated dependencies. check-in: 1bdfe0346e user: Cthulhux tags: trunk, release-0.11.0
2022-07-08
12:14
Updated the CoC to reflect grammar changes by dpyro. check-in: 2363eb8b2b user: Cthulhux tags: trunk
Changes
Hide Diffs Unified Diffs Ignore Whitespace Patch

Changes to Cargo.lock.

108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
dependencies = [
 "num-integer",
 "regex",
]

[[package]]
name = "clap"
version = "3.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2dbdf4bdacb33466e854ce889eee8dfd5729abf7ccd7664d0a2d60cd384440b"
dependencies = [
 "atty",
 "bitflags",
 "clap_derive",
 "clap_lex",
 "indexmap",
 "lazy_static",
 "strsim",
 "termcolor",
 "textwrap",
]

[[package]]
name = "clap_derive"
version = "3.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25320346e922cffe59c0bbc5410c8d8784509efb321488971081313cb1e1a33c"
dependencies = [
 "heck",
 "proc-macro-error",
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "clap_lex"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a37c35f1112dad5e6e0b1adaff798507497a18fceeb30cceb3bae7d1427b9213"
dependencies = [
 "os_str_bytes",
]

[[package]]
name = "console"
version = "0.15.0"







|

|






|







|

|










|

|







108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
dependencies = [
 "num-integer",
 "regex",
]

[[package]]
name = "clap"
version = "3.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac2bd7a1eb07da9ac757c923f69373deb7bc2ba5efc951b873bcb5e693992dca"
dependencies = [
 "atty",
 "bitflags",
 "clap_derive",
 "clap_lex",
 "indexmap",
 "once_cell",
 "strsim",
 "termcolor",
 "textwrap",
]

[[package]]
name = "clap_derive"
version = "3.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "759bf187376e1afa7b85b959e6a664a3e7a95203415dba952ad19139e798f902"
dependencies = [
 "heck",
 "proc-macro-error",
 "proc-macro2",
 "quote",
 "syn",
]

[[package]]
name = "clap_lex"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2850f2f5a82cbf437dd5af4d49848fbdfc27c157c3d010345776f952765261c5"
dependencies = [
 "os_str_bytes",
]

[[package]]
name = "console"
version = "0.15.0"
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
dependencies = [
 "bitflags",
]

[[package]]
name = "regex"
version = "1.5.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d83f127d94bdbcda4c8cc2e50f6f84f4b611f69c902699ca385a39c3a75f9ff1"
dependencies = [
 "aho-corasick",
 "memchr",
 "regex-syntax",
]

[[package]]
name = "regex-syntax"
version = "0.6.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "49b3de9ec5dc0a3417da371aab17d729997c15010e7fd24ff707773a33bddb64"

[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [







|

|








|

|







1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
checksum = "62f25bc4c7e55e0b0b7a1d43fb893f4fa1361d0abe38b9ce4f323c2adfe6ef42"
dependencies = [
 "bitflags",
]

[[package]]
name = "regex"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c4eb3267174b8c6c2f654116623910a0fef09c4753f8dd83db29c48a0df988b"
dependencies = [
 "aho-corasick",
 "memchr",
 "regex-syntax",
]

[[package]]
name = "regex-syntax"
version = "0.6.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a3f87b73ce11b1619a3c6332f45341e0047173771e8b8b73f87bfeefb7b56244"

[[package]]
name = "remove_dir_all"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3acd125665422973a33ac9d3dd2df85edad0f4ae9b00dafb1a05e43a9f5ef8e7"
dependencies = [
1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
name = "untrusted"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"

[[package]]
name = "ureq"
version = "2.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9399fa2f927a3d327187cbd201480cee55bee6ac5d3c77dd27f0c6814cff16d5"
dependencies = [
 "base64 0.13.0",
 "chunked_transfer",
 "flate2",
 "log",
 "once_cell",
 "rustls",







|

|







1662
1663
1664
1665
1666
1667
1668
1669
1670
1671
1672
1673
1674
1675
1676
1677
1678
name = "untrusted"
version = "0.7.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a"

[[package]]
name = "ureq"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b97acb4c28a254fd7a4aeec976c46a7fa404eac4d7c134b30c75144846d7cb8f"
dependencies = [
 "base64 0.13.0",
 "chunked_transfer",
 "flate2",
 "log",
 "once_cell",
 "rustls",
1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"

[[package]]
name = "yaydl"
version = "0.10.0"
dependencies = [
 "anyhow",
 "cienli",
 "clap",
 "fantoccini",
 "indicatif",
 "inventory",







|







1921
1922
1923
1924
1925
1926
1927
1928
1929
1930
1931
1932
1933
1934
1935
name = "windows_x86_64_msvc"
version = "0.36.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c811ca4a8c853ef420abd8592ba53ddbbac90410fab6903b3e79972a631f7680"

[[package]]
name = "yaydl"
version = "0.11.0"
dependencies = [
 "anyhow",
 "cienli",
 "clap",
 "fantoccini",
 "indicatif",
 "inventory",

Changes to Cargo.toml.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
[package]
name = "yaydl"
description = "yet another youtube (and more) down loader"
version = "0.10.1"
authors = ["Cthulhux <git@tuxproject.de>"]
edition = "2021"
license = "CDDL-1.0"
repository = "https://code.rosaelefanten.org/yaydl"
categories = ["command-line-utilities"]
keywords = ["youtube", "downloading", "video"]

[dependencies]
anyhow = "1.0"
cienli = "0.3"
clap = { version = "3.1", features = ["derive"] }
fantoccini = "0.19"
indicatif = "0.16"
inventory = "0.1"
m3u8-rs = "4.0"
nom = "7.1"
regex = "1.5"
scraper = "0.13"
serde_json = "1.0"
tokio = { version = "1", features = ["rt"] }
ureq = { version = "2.4", features = ["json"] }
url = "2.2"
urlencoding = "2.1"

[profile.release]
lto = true
strip = true



|










|





|



|






1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
[package]
name = "yaydl"
description = "yet another youtube (and more) down loader"
version = "0.11.0"
authors = ["Cthulhux <git@tuxproject.de>"]
edition = "2021"
license = "CDDL-1.0"
repository = "https://code.rosaelefanten.org/yaydl"
categories = ["command-line-utilities"]
keywords = ["youtube", "downloading", "video"]

[dependencies]
anyhow = "1.0"
cienli = "0.3"
clap = { version = "3.2", features = ["derive"] }
fantoccini = "0.19"
indicatif = "0.16"
inventory = "0.1"
m3u8-rs = "4.0"
nom = "7.1"
regex = "1.6"
scraper = "0.13"
serde_json = "1.0"
tokio = { version = "1", features = ["rt"] }
ureq = { version = "2.5", features = ["json"] }
url = "2.2"
urlencoding = "2.1"

[profile.release]
lto = true
strip = true

Changes to README.md.

15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
* Can download videos.
* Can optionally keep only the audio part of them.
* Could convert the resulting file to something else (requires the `ffmpeg` binary).
* Comes as a single binary (once compiled) - take it everywhere on your thumbdrive, no Python cruft required.

## Currently supported sites

* porndoe.com · vidoza.net · vimeo.com · vivo.sx · voe.sx · watchmdh.to · xhamster.com · youtube.com

There is an easy way to add more supported sites, see below for details.

## Non-features

The list of features is deliberately kept short:








|







15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
* Can download videos.
* Can optionally keep only the audio part of them.
* Could convert the resulting file to something else (requires the `ffmpeg` binary).
* Comes as a single binary (once compiled) - take it everywhere on your thumbdrive, no Python cruft required.

## Currently supported sites

* porndoe.com · spankbang.com · vidoza.net · vimeo.com · vivo.sx · voe.sx · watchmdh.to · xhamster.com · youtube.com

There is an easy way to add more supported sites, see below for details.

## Non-features

The list of features is deliberately kept short:

Changes to src/handlers.rs.

13
14
15
16
17
18
19

20
21
22
23
24
25
26
27
28
 * distribution.
 */

// Yet Another Youtube Down Loader
// - handlers.rs file -

mod porndoe;

mod vidoza;
mod vimeo;
mod vivo;
mod voe;
mod watchmdh;
mod xhamster;
mod youtube;

// Add your own modules here.







>









13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
 * distribution.
 */

// Yet Another Youtube Down Loader
// - handlers.rs file -

mod porndoe;
mod spankbang;
mod vidoza;
mod vimeo;
mod vivo;
mod voe;
mod watchmdh;
mod xhamster;
mod youtube;

// Add your own modules here.

Added src/handlers/spankbang.rs.





























































































































































































































































































































































































>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
>
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
/*
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * See the file LICENSE in this distribution for details.
 * A copy of the CDDL is also available via the Internet at
 * http://www.opensource.org/licenses/cddl1.txt
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the contents of the LICENSE file from this
 * distribution.
 */

// specific url path format for this site
// https://spankbang.com/5-char-id/video/description+seprated+by+plus+char
//
// example:  https://spankbang.com/12345/video/description+for+this+video
// base filename: description_for_this_video lenght maximum is 142
// filename: description_for_this_video-12345.mp4
//
// https://spankbang.com/70841/video/nikki+fritz
//

// Yet Another Youtube Down Loader
// - Spankbang handler -

use crate::definitions::SiteDefinition;

use anyhow::Result;
use fantoccini::ClientBuilder;
use regex::Regex;
use scraper::{Html, Selector};
use tokio::runtime;
use url::Url;

const MAX_FILENAME_LENGTH: usize = 142; // filename is based on url path description string

static mut VIDEO_INFO: String = String::new();

unsafe fn get_video_info(url: &str, webdriver_port: u16) -> Result<Html> {
    if VIDEO_INFO.is_empty() {
        // We need to fetch the video information first.
        // It will contain the whole body for now.
        let local_url = url.to_owned();

        let rt = runtime::Builder::new_current_thread()
            .enable_time()
            .enable_io()
            .build()
            .unwrap();
        rt.block_on(async move {
            let webdriver_url = format!("http://localhost:{}", webdriver_port);
            let c = ClientBuilder::native()
                .connect(&webdriver_url)
                .await
                .expect("failed to connect to web driver");
            c.goto(&local_url).await.expect("could not go to the URL");
            let body = c.source().await.expect("could not read the site source");
            c.close_window().await.expect("could not close the window");

            VIDEO_INFO = body;
        });
    }

    // Return it:
    let d = Html::parse_document(&VIDEO_INFO);
    Ok(d)
}

// Implement the site definition:
struct SpankbangHandler;
impl SiteDefinition for SpankbangHandler {
    fn can_handle_url<'a>(&'a self, url: &'a str) -> bool {
        Regex::new(r"spankbang.com/.+").unwrap().is_match(url)
    }

    fn is_playlist<'a>(&'a self, _url: &'a str, _webdriver_port: u16) -> Result<bool> {
        // Generic has playlists.
        Ok(false)
    }

    fn find_video_title<'a>(&'a self, url: &'a str, _webdriver_port: u16) -> Result<String> {
        // generates a valid base filename from url path for linux and windows
        // video title is less reliable to generate base filename for this particular site
        Ok(url_filename(url.to_string()))
    }

    fn find_video_direct_url<'a>(
        &'a self,
        url: &'a str,
        webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        unsafe {
            let video_info = get_video_info(url, webdriver_port)?;

            let url_selector = Selector::parse(r#"source[type="video/mp4"]"#).unwrap();
            let url_elem = video_info.select(&url_selector).next().unwrap();
            let url_contents = url_elem.value().attr("src").unwrap();

            Ok(url_contents.to_string())
        }
    }

    fn does_video_exist<'a>(&'a self, url: &'a str, webdriver_port: u16) -> Result<bool> {
        unsafe {
            let _video_info = get_video_info(url, webdriver_port);
            Ok(!VIDEO_INFO.is_empty())
        }
    }

    fn display_name<'a>(&'a self) -> String {
        "Spankbang".to_string()
    }

    fn find_video_file_extension<'a>(
        &'a self,
        _url: &'a str,
        _webdriver_port: u16,
        _onlyaudio: bool,
    ) -> Result<String> {
        Ok("mp4".to_string())
    }

    fn web_driver_required<'a>(&'a self) -> bool {
        true
    }
}

// Push the site definition to the list of known handlers:
inventory::submit! {
    &SpankbangHandler as &dyn SiteDefinition
}

// covert url path to base filename
// path pattern is specific to web sites
fn url_filename(url: String) -> String {
    // https://spankbang.com/12345/video/description+for+this+video
    // extract url path
    let path = match Url::parse(&url) {
        Ok(urlx) => urlx.path().to_string(),
        Err(e) => {
            println!("Error: Could not parse '{}'. {}.", url, e);
            "url_filename_parse_error".to_string()
        }
    };

    // path = /12345/video/description+for+this+video
    let vec: Vec<&str> = path.as_str().clone().split("/").map(|s| s).collect();
    let id_5char = vec[1]; // = 12345
    let description = vec[3]; // use for base filename, = description+for+this+video

    let mut base_filename = format!("{}-{}", description, id_5char); // concat then converts &str to String

    if description.len() > MAX_FILENAME_LENGTH {
        let (shorten, _) = description.split_at(MAX_FILENAME_LENGTH); // shorten description
        base_filename = format!("{}...-{}", shorten, id_5char); // concat using shorten description
    }

    return windows_filename(linux_filename(base_filename));
}

// replace invalid linux chars with _ underscore
fn linux_filename(in_filename: String) -> String {
    let out_filename = format!(
        "{}",
        in_filename.trim().replace(
            &['|', '\'', '\"', ':', '\'', '\\', '/'][..], // '"', also works for quote char
            r#"_"#
        )
    );

    return out_filename;
}

// replace invalid windows chars with _ underscore
fn windows_filename(in_filename: String) -> String {
    let out_filename = format!(
        "{}",
        in_filename
            .trim()
            // also replace newline char
            // replacing plus '+' char is specific to spankbang
            .replace(&['<', '>', ':', '?', '*', '\n', '+'][..], r#"_"#) // replace with underscore char
    );

    return out_filename;
}