Support ENV options and update metadata variable locations

This commit is contained in:
John Mertz 2024-11-06 11:12:26 -07:00
parent a9657b6fe2
commit c17f10c790
Signed by: jpm
GPG Key ID: E9C5EA2D867501AB
1 changed files with 26 additions and 9 deletions

View File

@ -3,13 +3,20 @@ use warnings;
use strict; use strict;
use lib '../lib'; use lib '../lib';
use Data::Dump qw% dump %;
use YTYT; use YTYT;
my %params; my %ytyt_args = (
host => 'www.youtube.com'
);
# Setup YouTube object foreach my $arg (qw/ YTYT_DB_PATH YTYT_HOST /) {
my $youtube = YTYT::new( host => 'www.youtube.com' ); if (defined($ENV{$arg}) && $ENV{$arg} != '') {
my ($ytyt_arg) = $arg =~ m/YTYT_(.*)/;
$ytyt_args{lc($ytyt_arg)} = $ENV{$arg};
}
}
my $youtube = YTYT::new( %ytyt_args );
my $content; my $content;
@ -18,48 +25,58 @@ unless (defined $ARGV[0]) {
"with either of these values included. RegEx argument optional" . (scalar @ARGV) . "\n"; "with either of these values included. RegEx argument optional" . (scalar @ARGV) . "\n";
} }
my %params;
if (defined $ARGV[1]) { if (defined $ARGV[1]) {
$params{regex} = $ARGV[1]; $params{regex} = $ARGV[1];
} }
# channelID is unambiguous, but not typical for a user to see.
# Treat it as most accurate, but fall back to more common identifiers
if ($ARGV[0] =~ m%^UC[a-zA-Z0-9\-\_]{22}$%) { if ($ARGV[0] =~ m%^UC[a-zA-Z0-9\-\_]{22}$%) {
$params{channelId} = $ARGV[0]; $params{channelId} = $ARGV[0];
#print "Found raw channelId " . $params{channelId} . "\n"; #print "Found raw channelId " . $params{channelId} . "\n";
# YouTube now presents the '@Handle' in the public URL, so this is the next most likely to be seen
} elsif ($ARGV[0] =~ m%^(?:(?:https://)?(?:www\.|m\.)?youtube\.com/)?\@([^\/]+)%) { } elsif ($ARGV[0] =~ m%^(?:(?:https://)?(?:www\.|m\.)?youtube\.com/)?\@([^\/]+)%) {
$params{channelHandle} = $1; $params{channelHandle} = $1;
#die("Handles are not yet supported. Found \@$params{channelHandle}\n"); #die("Found \@$params{channelHandle}\n");
# Channels that have not yet declared a handle will still have the ID in the URL
} elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/channel/UC[a-zA-Z0-9\-\_]{22}%) { } elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/channel/UC[a-zA-Z0-9\-\_]{22}%) {
$params{channelId} = $ARGV[0]; $params{channelId} = $ARGV[0];
$params{channelId} =~ s%(https://)?(www\.|m\.)?youtube\.com/channel/(UC[^/]{22})(/.*)?%$3%; $params{channelId} =~ s%(https://)?(www\.|m\.)?youtube\.com/channel/(UC[^/]{22})(/.*)?%$3%;
#print "Found URL encoded channelId " . $params{channelId} . "\n"; #print "Found URL encoded channelId " . $params{channelId} . "\n";
# Channel names used to be presented in the URL.
# This is probably deprecated, but it does not hurt to maintain support
} elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/c/%) { } elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/c/%) {
$params{channelName} = $ARGV[0]; $params{channelName} = $ARGV[0];
$params{channelName} =~ s%(https://)?(www\.|m\.)?youtube\.com/c/([^/]*)/?.*%$4%; $params{channelName} =~ s%(https://)?(www\.|m\.)?youtube\.com/c/([^/]*)/?.*%$4%;
#print "Found URL encoded channelName " . $params{channelName} . "\n"; #print "Found URL encoded channelName " . $params{channelName} . "\n";
# The '/user/' path still technically works, though I don't think it is used anywhere
} elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/user/%) { } elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/user/%) {
$params{userName} = $ARGV[0]; $params{userName} = $ARGV[0];
$params{userName} =~ s%(https://)?(www\.|m\.)?youtube\.com/user/([^/]*)/?.*%$4%; $params{userName} =~ s%(https://)?(www\.|m\.)?youtube\.com/user/([^/]*)/?.*%$4%;
#print "Found URL encoded channelName " . $params{userName} . "\n"; #print "Found URL encoded channelName " . $params{userName} . "\n";
# As a last attempt, assume that it is a raw channel name if no '@' is included.
} else { } else {
$params{channelName} = $ARGV[0]; $params{channelName} = $ARGV[0];
#print "Found raw channelName " . $params{channelName} . "\n"; #print "Found raw channelName " . $params{channelName} . "\n";
} }
# Fetch page to fill in missing columns
$content = $youtube->get_videos_page( %params ); $content = $youtube->get_videos_page( %params );
unless (defined $params{channelName}) { unless (defined $params{channelName}) {
$params{channelName} = $content->{header}->{c4TabbedHeaderRenderer}->{title}; $params{channelName} = $content->{metadata}->{channelMetadataRenderer}->{title};
} }
unless (defined $params{channelId}) { unless (defined $params{channelId}) {
$params{channelId} = $content->{header}->{c4TabbedHeaderRenderer}->{channelId}; $params{channelId} = $content->{metadata}->{channelMetadataRenderer}->{externalId};
} }
$params{channelThumbnail} = $content->{header}->{c4TabbedHeaderRenderer}->{avatar}->{thumbnails}[0]->{url}; $params{channelThumbnail} = $content->{metadata}->{channelMetadataRenderer}->{avatar}->{thumbnails}->[0]->{url};
$youtube->db_connect(); $youtube->db_connect();
my @check = $youtube->{dbh}->selectrow_array("SELECT channelId FROM channels WHERE channelId = '$params{channelId}';"); my @check = $youtube->{dbh}->selectrow_array("SELECT channelId FROM channels WHERE channelId = '$params{channelId}';");
if (scalar @check) { if (scalar @check) {
$youtube->db_disconnect(); $youtube->db_disconnect();
die "Channel already exists in database.\n"; die "Channel already exists in database.\n";