From c17f10c7907219ef743829edd84ee4f269feb840 Mon Sep 17 00:00:00 2001 From: John Mertz Date: Wed, 6 Nov 2024 11:12:26 -0700 Subject: [PATCH] Support ENV options and update metadata variable locations --- bin/add_youtube_subscription.pl | 35 ++++++++++++++++++++++++--------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/bin/add_youtube_subscription.pl b/bin/add_youtube_subscription.pl index 51a15a4..544711c 100755 --- a/bin/add_youtube_subscription.pl +++ b/bin/add_youtube_subscription.pl @@ -3,13 +3,20 @@ use warnings; use strict; use lib '../lib'; -use Data::Dump qw% dump %; use YTYT; -my %params; +my %ytyt_args = ( + host => 'www.youtube.com' +); -# Setup YouTube object -my $youtube = YTYT::new( host => 'www.youtube.com' ); +foreach my $arg (qw/ YTYT_DB_PATH YTYT_HOST /) { + if (defined($ENV{$arg}) && $ENV{$arg} != '') { + my ($ytyt_arg) = $arg =~ m/YTYT_(.*)/; + $ytyt_args{lc($ytyt_arg)} = $ENV{$arg}; + } +} + +my $youtube = YTYT::new( %ytyt_args ); my $content; @@ -18,48 +25,58 @@ unless (defined $ARGV[0]) { "with either of these values included. RegEx argument optional" . (scalar @ARGV) . "\n"; } +my %params; if (defined $ARGV[1]) { $params{regex} = $ARGV[1]; } +# channelID is unambiguous, but not typical for a user to see. +# Treat it as most accurate, but fall back to more common identifiers if ($ARGV[0] =~ m%^UC[a-zA-Z0-9\-\_]{22}$%) { $params{channelId} = $ARGV[0]; #print "Found raw channelId " . $params{channelId} . "\n"; +# YouTube now presents the '@Handle' in the public URL, so this is the next most likely to be seen } elsif ($ARGV[0] =~ m%^(?:(?:https://)?(?:www\.|m\.)?youtube\.com/)?\@([^\/]+)%) { $params{channelHandle} = $1; - #die("Handles are not yet supported. Found \@$params{channelHandle}\n"); + #die("Found \@$params{channelHandle}\n"); +# Channels that have not yet declared a handle will still have the ID in the URL } elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/channel/UC[a-zA-Z0-9\-\_]{22}%) { $params{channelId} = $ARGV[0]; $params{channelId} =~ s%(https://)?(www\.|m\.)?youtube\.com/channel/(UC[^/]{22})(/.*)?%$3%; #print "Found URL encoded channelId " . $params{channelId} . "\n"; +# Channel names used to be presented in the URL. +# This is probably deprecated, but it does not hurt to maintain support } elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/c/%) { $params{channelName} = $ARGV[0]; $params{channelName} =~ s%(https://)?(www\.|m\.)?youtube\.com/c/([^/]*)/?.*%$4%; #print "Found URL encoded channelName " . $params{channelName} . "\n"; +# The '/user/' path still technically works, though I don't think it is used anywhere } elsif ($ARGV[0] =~ m%^(https://)?(www\.|m\.)?youtube\.com/user/%) { $params{userName} = $ARGV[0]; $params{userName} =~ s%(https://)?(www\.|m\.)?youtube\.com/user/([^/]*)/?.*%$4%; #print "Found URL encoded channelName " . $params{userName} . "\n"; +# As a last attempt, assume that it is a raw channel name if no '@' is included. } else { $params{channelName} = $ARGV[0]; #print "Found raw channelName " . $params{channelName} . "\n"; } +# Fetch page to fill in missing columns + $content = $youtube->get_videos_page( %params ); unless (defined $params{channelName}) { - $params{channelName} = $content->{header}->{c4TabbedHeaderRenderer}->{title}; + $params{channelName} = $content->{metadata}->{channelMetadataRenderer}->{title}; } unless (defined $params{channelId}) { - $params{channelId} = $content->{header}->{c4TabbedHeaderRenderer}->{channelId}; + $params{channelId} = $content->{metadata}->{channelMetadataRenderer}->{externalId}; } -$params{channelThumbnail} = $content->{header}->{c4TabbedHeaderRenderer}->{avatar}->{thumbnails}[0]->{url}; +$params{channelThumbnail} = $content->{metadata}->{channelMetadataRenderer}->{avatar}->{thumbnails}->[0]->{url}; $youtube->db_connect(); my @check = $youtube->{dbh}->selectrow_array("SELECT channelId FROM channels WHERE channelId = '$params{channelId}';"); - if (scalar @check) { $youtube->db_disconnect(); die "Channel already exists in database.\n";