376 lines
12 KiB
Perl
376 lines
12 KiB
Perl
package YTYT;
|
|
|
|
use strict;
|
|
use warnings;
|
|
use utf8;
|
|
|
|
use DBI;
|
|
use JSON::Any;
|
|
use WWW::Mechanize;
|
|
use HTTP::Cookies;
|
|
|
|
sub new {
|
|
my %params = @_;
|
|
|
|
if (!defined $params{host}) {
|
|
$params{host} = 'www.youtube.com';
|
|
}
|
|
|
|
if (!defined $params{db_path}) {
|
|
$params{db_path} = "/var/lib/youtube/db/youtube.sqlite";
|
|
}
|
|
if (! -e $params{db_path} ) {
|
|
create_db(\%params);
|
|
}
|
|
|
|
my $cookie_jar = HTTP::Cookies->new('/var/www/yt/yt_cookie.txt');
|
|
if (!defined $params{mechanize}) {
|
|
if ($params{host} eq 'www.youtube.com') {
|
|
#$params{mechanize} = WWW::Mechanize->new(autocheck => 1, cookie_jar => $cookie_jar, agent => 'Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MDDRJS; rv:11.0) like Gecko');
|
|
$params{mechanize} = LWP::UserAgent->new();
|
|
} elsif ($params{host} eq 'm.youtube.com') {
|
|
$params{mechanize} = WWW::Mechanize->new(autocheck => 1, cookie_jar => $cookie_jar, agent => 'Mozilla/5.0 (Android 7.1.2; Mobile; rv:64.0) Gecko/64.0 Firefox/64.0');
|
|
} else {
|
|
die "Invalid host: $params{host}\nAcceptable options: 'www.youtube.com' or 'm.youtube.com'";
|
|
}
|
|
}
|
|
|
|
my $self = { db_path => $params{db_path}, mechanize => $params{mechanize}, location => "https://$params{host}" };
|
|
bless $self;
|
|
}
|
|
|
|
sub get_videos_page {
|
|
|
|
my $self = shift;
|
|
my %params = @_;
|
|
my $get;
|
|
my $target;
|
|
|
|
if (defined $params{channelId}) {
|
|
$target = $params{channelId};
|
|
$get = $self->{location} . '/' .
|
|
'channel/' . $params{channelId} .
|
|
'/videos';
|
|
} elsif (defined $params{channelHandle}) {
|
|
$target = $params{channelHandle};
|
|
$get = $self->{location} . '/' .
|
|
'@' . $params{channelHandle} .
|
|
'/videos';
|
|
} elsif (defined $params{channelName}) {
|
|
$target = $params{channelName};
|
|
$get = $self->{location} . '/' .
|
|
'c/' . $params{channelName} .
|
|
'/videos';
|
|
} elsif (defined $params{userName}) {
|
|
$target = $params{userName};
|
|
$get = $self->{location} . '/' .
|
|
'user/' . $params{userName} .
|
|
'/videos';
|
|
} else {
|
|
die "Failed to fetch video page.\n" .
|
|
"get_video_pages requires either channelId or channelName as an argument.\n";
|
|
}
|
|
|
|
my $response = $self->{mechanize}->get($get)->decoded_content;
|
|
|
|
my @lines = split '\n', $response;
|
|
my $initial_data = (grep {/var ytInitialData/} @lines)[0];
|
|
if (!defined $initial_data) {
|
|
die "Error: Channel $target may not exist\n";
|
|
}
|
|
|
|
$initial_data =~ s/.*var ytInitialData = (.*?);\s*<\/script>.*/$1/;
|
|
|
|
my $json = JSON::Any->new( utf8 => 1 );
|
|
return $json->decode($initial_data);
|
|
|
|
}
|
|
|
|
sub latest_videos {
|
|
|
|
my $self = shift;
|
|
my %params = @_;
|
|
|
|
my $content = get_videos_page($self, %params);
|
|
|
|
$params{table} = 'channels';
|
|
$params{channelName} = $content->{metadata}->{channelMetadataRenderer}->{title};
|
|
$params{channelThumbnail} = $content->{metadata}->{channelMetadataRenderer}->{avatar}->{thumbnails}->[0]->{url};
|
|
$self->db_update(%params);
|
|
|
|
#my $list_ref = $content->{contents}->{twoColumnBrowseResultsRenderer}->{tabs}->[1]->{tabRenderer}->{content}->{sectionListRenderer}->{contents}->[0]->{itemSectionRenderer}->{contents}->[0]->{gridRenderer}->{items};
|
|
my $list_ref = $content->{contents}->{twoColumnBrowseResultsRenderer}->{tabs}->[1]->{tabRenderer}->{content}->{richGridRenderer}->{contents};
|
|
my ($offset, $last) = (0, 0);
|
|
my @videos;
|
|
|
|
foreach my $item (@$list_ref) {
|
|
|
|
use Data::Dump 'dump';
|
|
#die (dump($item->{richItemRenderer}->{content}->{videoRenderer}->{videoId}));
|
|
# Premium don't provide age and/or view count. Skip.
|
|
my $badges = $item->{richItemRenderer}->{content}->{videoRenderer}->{badges}[0];
|
|
my $premium;
|
|
if (defined $badges) {
|
|
foreach ($badges) {
|
|
if ($_->{metadataBadgeRenderer}->{label} eq "Premium") {
|
|
$premium = 1;
|
|
last;
|
|
}
|
|
}
|
|
}
|
|
if (defined $premium) {
|
|
next;
|
|
}
|
|
|
|
my %video = (
|
|
channelId => $params{channelId},
|
|
);
|
|
$video{videoId} = $item->{richItemRenderer}->{content}->{videoRenderer}->{videoId} || next;
|
|
$video{videoThumbnail} = $item->{richItemRenderer}->{content}->{videoRenderer}->{thumbnail}->{thumbnails}[0]->{url};
|
|
foreach my $overlay (@{$item->{richItemRenderer}->{content}->{videoRenderer}->{thumbnailOverlays}}) {
|
|
if (defined ($overlay->{thumbnailOverlayTimeStatusRenderer})) {
|
|
$video{lengthText} = $overlay->{thumbnailOverlayTimeStatusRenderer}->{text}->{simpleText};
|
|
last;
|
|
}
|
|
}
|
|
|
|
foreach ( qw| publishedTimeText shortViewCountText | ) {
|
|
$video{$_} = $item->{richItemRenderer}->{content}->{videoRenderer}->{$_}->{simpleText};
|
|
}
|
|
|
|
foreach ( qw| title | ) {
|
|
$video{$_} = $item->{richItemRenderer}->{content}->{videoRenderer}->{$_}->{runs}[0]->{text};
|
|
}
|
|
|
|
if (defined $video{publishedTimeText}) {
|
|
$video{age} = $video{publishedTimeText};
|
|
if ($video{publishedTimeText} =~ m/Streamed /) {
|
|
$video{age} =~ s/^Streamed //;
|
|
}
|
|
} else {
|
|
# YouTube Premium doesn't show a publishTimeText, but I can't watch them anyways. Just skip
|
|
#This shows up for currently live streams
|
|
#print $video{videoId} . " looks like premium\n";
|
|
next;
|
|
}
|
|
if ($video{age} eq $last) {
|
|
$offset++;
|
|
} else {
|
|
$offset = 0;
|
|
}
|
|
$last = $video{age};
|
|
|
|
my $now = time();
|
|
|
|
if ($video{age} =~ m/\d+ seconds? ago/) {
|
|
$video{age} =~ s/(\d+) seconds? ago/$1/eeg;
|
|
if ($offset) {
|
|
$video{age} += $offset;
|
|
}
|
|
} elsif ($video{age} =~ m/\d+ minutes? ago/) {
|
|
$video{age} =~ s/(\d+) minutes? ago/$1/eeg;
|
|
$video{age} *= 60;
|
|
if ($offset) {
|
|
$video{age} += $offset;
|
|
}
|
|
} elsif ($video{age} =~ m/\d+ hours? ago/) {
|
|
$video{age} =~ s/(\d+) hours? ago/$1/eeg;
|
|
$video{age} *= 60*60;
|
|
if ($offset) {
|
|
$video{age} += $offset*60;
|
|
}
|
|
} elsif ($video{age} =~ m/\d+ days? ago/) {
|
|
$video{age} =~ s/(\d+) days? ago/$1/eeg;
|
|
$video{age} *= 60*60*24;
|
|
if ($offset) {
|
|
$video{age} += $offset*60*60;
|
|
}
|
|
} elsif ($video{age} =~ m/\d+ weeks? ago/) {
|
|
$video{age} =~ s/(\d+) weeks? ago/$1/eeg;
|
|
$video{age} *= 60*60*24*7;
|
|
if ($offset) {
|
|
$video{age} += $offset*60*60*24;
|
|
}
|
|
} elsif ($video{age} =~ m/\d+ months? ago/) {
|
|
$video{age} =~ s/(\d+) months? ago/$1/eeg;
|
|
$video{age} *= 60*60*24*30;
|
|
if ($offset) {
|
|
$video{age} += $offset*60*60*24;
|
|
}
|
|
} elsif ($video{age} =~ m/\d+ years? ago/) {
|
|
$video{age} =~ s/(\d+) years? ago/$1/eeg;
|
|
$video{age} *= 60*60*24*365;
|
|
if ($offset) {
|
|
$video{age} += $offset*60*60*24;
|
|
}
|
|
} else {
|
|
die "Invalid age $video{age}. Cannot convert to seconds.";
|
|
}
|
|
|
|
$video{age} = $now - $video{age};
|
|
|
|
my $video_ref = \%video;
|
|
push @videos, $video_ref;
|
|
|
|
}
|
|
|
|
return @videos;
|
|
}
|
|
|
|
sub db_connect {
|
|
my $self = shift;
|
|
my $db = 'dbi:SQLite:dbname=' . $self->{db_path};
|
|
$self->{dbh} = DBI->connect($db, '', '', {AutoCommit=>1, PrintError=>0})
|
|
or die "Unable to connect: $!\n";
|
|
return 1;
|
|
}
|
|
|
|
sub db_disconnect {
|
|
my $self = shift;
|
|
$self->{dbh}->disconnect();
|
|
return 1;
|
|
}
|
|
|
|
sub create_db {
|
|
|
|
my $self = shift;
|
|
|
|
my @path = split '/', $self->{db_path};
|
|
my $filename = pop @path;
|
|
shift @path;
|
|
|
|
# Ensure that entire path exists
|
|
my $path;
|
|
foreach (@path) {
|
|
$path .= "/" . $_ ;
|
|
if ($_ eq 'db') {
|
|
$) = getgrnam('www-data');
|
|
$> = getpwnam('www-data');
|
|
}
|
|
if (! -e $path) {
|
|
mkdir $path;
|
|
}
|
|
}
|
|
|
|
if (-e $self->{db_path}) {
|
|
die "Database already exists: $self->{db_path}\n";
|
|
} else {
|
|
db_connect($self);
|
|
$self->{dbh}->do("CREATE TABLE videos(channelId, videoId, title, videoThumbnail, publishedTimeText, lengthText, shortViewCountText, age INTERGER, seen BOOL);");
|
|
$self->{dbh}->do("CREATE TABLE channels(channelId, channelName, channelThumbnail, regex, category);");
|
|
$self->{dbh}->do("CREATE TABLE settings(enable4 BOOL, read4, write4, enable6 BOOL, read6, write6, refresh INTEGER, player, embed_type, theme);");
|
|
$self->{dbh}->do("INSERT INTO settings(enable4, read4, write4, enable6, read6, write6, refresh, player, embed_type, theme) values(1, '0.0.0.0/0', '0.0.0.0/0', 1, '::0/0', '::0/0', 15, 'web', 'proxy', 'default');");
|
|
db_disconnect($self);
|
|
}
|
|
$) = $(;
|
|
$> = $<;
|
|
}
|
|
|
|
sub db_update {
|
|
|
|
my $self = shift;
|
|
my %params = @_;
|
|
|
|
if (!defined $self->{dbh}) {
|
|
die "Missing database handle\n";
|
|
}
|
|
|
|
if (!defined $params{table}) {
|
|
die "Missing 'table' parameter\n";
|
|
}
|
|
|
|
my @cols;
|
|
my $end;
|
|
if ($params{table} eq 'videos') {
|
|
@cols = ( qw | channelId videoId title publishedTimeText lengthText age seen | );
|
|
$end = "videoId = '$params{videoId}'";
|
|
} elsif ($params{table} eq 'channels') {
|
|
@cols = ( qw | channelId channelName channelThumbnail | );
|
|
$end = "channelId = '$params{channelId}'";
|
|
} else {
|
|
die "Invalid table: $params{table}\n";
|
|
}
|
|
|
|
foreach (@cols) {
|
|
if (!defined $params{$_}) {
|
|
print STDERR "Missing necessary column $_\n";
|
|
return;
|
|
}
|
|
}
|
|
|
|
my $query = "UPDATE $params{table} SET";
|
|
|
|
foreach (@cols) {
|
|
if ($_ ne 'channelId' && $_ ne 'videoId' && $_ ne 'seen') {
|
|
$query .= " $_ = '$params{$_}',";
|
|
}
|
|
}
|
|
|
|
if ($params{table} eq 'channels' && defined $params{regex}) {
|
|
$query .= " regex = '$params{regex}'";
|
|
}
|
|
|
|
$query =~ s/,$/ /;
|
|
$query .= "WHERE $end";
|
|
|
|
$self->{dbh}->do($query);
|
|
}
|
|
|
|
sub db_insert {
|
|
|
|
my $self = shift;
|
|
my %params = @_;
|
|
|
|
if (!defined $self->{dbh}) {
|
|
die "Missing database handle\n";
|
|
}
|
|
|
|
if (!defined $params{table}) {
|
|
die "Missing 'table' parameter\n";
|
|
}
|
|
|
|
my @cols;
|
|
if ($params{table} eq 'videos') {
|
|
@cols = ( qw | channelId videoId title publishedTimeText lengthText age seen | );
|
|
} elsif ($params{table} eq 'channels') {
|
|
@cols = ( qw | channelId channelName channelThumbnail | );
|
|
} else {
|
|
die "Invalid table: $params{table}\n";
|
|
}
|
|
|
|
foreach (@cols) {
|
|
if (!defined $params{$_}) {
|
|
print STDERR "Missing necessary column $_\n";
|
|
return;
|
|
}
|
|
}
|
|
|
|
my $query = "INSERT INTO $params{table}(";
|
|
my $values = "VALUES(";
|
|
|
|
foreach (@cols) {
|
|
$query .= "$_,";
|
|
if ($_ ne 'age' && $_ ne 'seen') {
|
|
my $param = $params{$_};
|
|
$param =~ s/'/''/g;
|
|
$values .= "'$param',";
|
|
} else {
|
|
$values .= "$params{$_},";
|
|
}
|
|
}
|
|
|
|
if ($params{table} eq 'channels' && defined $params{regex}) {
|
|
$query .= "regex,";
|
|
$values .= "$params{regex}";
|
|
}
|
|
|
|
$query =~ s/,$/\) /;
|
|
$values =~ s/,$/\);/;
|
|
$query .= $values;
|
|
|
|
my $response = $self->{dbh}->do($query);
|
|
|
|
}
|
|
|
|
1;
|