Server IP : 127.0.0.2 / Your IP : 3.23.61.205 Web Server : Apache/2.4.18 (Ubuntu) System : User : www-data ( ) PHP Version : 7.0.33-0ubuntu0.16.04.16 Disable Function : disk_free_space,disk_total_space,diskfreespace,dl,exec,fpaththru,getmyuid,getmypid,highlight_file,ignore_user_abord,leak,listen,link,opcache_get_configuration,opcache_get_status,passthru,pcntl_alarm,pcntl_fork,pcntl_waitpid,pcntl_wait,pcntl_wifexited,pcntl_wifstopped,pcntl_wifsignaled,pcntl_wexitstatus,pcntl_wtermsig,pcntl_wstopsig,pcntl_signal,pcntl_signal_dispatch,pcntl_get_last_error,pcntl_strerror,pcntl_sigprocmask,pcntl_sigwaitinfo,pcntl_sigtimedwait,pcntl_exec,pcntl_getpriority,pcntl_setpriority,php_uname,phpinfo,posix_ctermid,posix_getcwd,posix_getegid,posix_geteuid,posix_getgid,posix_getgrgid,posix_getgrnam,posix_getgroups,posix_getlogin,posix_getpgid,posix_getpgrp,posix_getpid,posix,_getppid,posix_getpwnam,posix_getpwuid,posix_getrlimit,posix_getsid,posix_getuid,posix_isatty,posix_kill,posix_mkfifo,posix_setegid,posix_seteuid,posix_setgid,posix_setpgid,posix_setsid,posix_setuid,posix_times,posix_ttyname,posix_uname,pclose,popen,proc_open,proc_close,proc_get_status,proc_nice,proc_terminate,shell_exec,source,show_source,system,virtual MySQL : OFF | cURL : ON | WGET : ON | Perl : ON | Python : ON | Sudo : ON | Pkexec : ON Directory : /usr/share/doc/libhtml-parser-perl/examples/ |
Upload File : |
#!/usr/bin/perl # Perform transformations on link attributes in an HTML document. # Examples: # # $ hrefsub 's/foo/bar/g' index.html # $ hrefsub '$_=URI->new_abs($_, "http://foo")' index.html # # The first argument is a perl expression that might modify $_. # It is called for each link in the document with $_ set to # the original value of the link URI. The variables $tag and # $attr can be used to access the tagname and attributename # within the tag where the current link is found. # # The second argument is the name of a file to process. use strict; use HTML::Parser (); use URI; # Construct a hash of tag names that may have links. my %link_attr; { # To simplify things, reformat the %HTML::Tagset::linkElements # hash so that it is always a hash of hashes. require HTML::Tagset; while (my($k,$v) = each %HTML::Tagset::linkElements) { if (ref($v)) { $v = { map {$_ => 1} @$v }; } else { $v = { $v => 1}; } $link_attr{$k} = $v; } # Uncomment this to see what HTML::Tagset::linkElements thinks are # the tags with link attributes #use Data::Dump; Data::Dump::dump(\%link_attr); exit; } # Create a subroutine named 'edit' to perform the operation # passed in from the command line. The code should modify $_ # to change things. my $code = shift; my $code = 'sub edit { local $_ = shift; my($attr, $tag) = @_; no strict; ' . $code . '; $_; }'; #print $code; eval $code; die $@ if $@; # Set up the parser. my $p = HTML::Parser->new(api_version => 3); # The default is to print everything as is. $p->handler(default => sub { print @_ }, "text"); # All links are found in start tags. This handler will evaluate # &edit for each link attribute found. $p->handler(start => sub { my($tagname, $pos, $text) = @_; if (my $link_attr = $link_attr{$tagname}) { while (4 <= @$pos) { # use attribute sets from right to left # to avoid invalidating the offsets # when replacing the values my($k_offset, $k_len, $v_offset, $v_len) = splice(@$pos, -4); my $attrname = lc(substr($text, $k_offset, $k_len)); next unless $link_attr->{$attrname}; next unless $v_offset; # 0 v_offset means no value my $v = substr($text, $v_offset, $v_len); $v =~ s/^([\'\"])(.*)\1$/$2/; my $new_v = edit($v, $attrname, $tagname); next if $new_v eq $v; $new_v =~ s/\"/"/g; # since we quote with "" substr($text, $v_offset, $v_len) = qq("$new_v"); } } print $text; }, "tagname, tokenpos, text"); # Parse the file passed in from the command line my $file = shift || usage(); $p->parse_file($file) || die "Can't open file $file: $!\n"; sub usage { my $progname = $0; $progname =~ s,^.*/,,; die "Usage: $progname <perlexpr> <filename>\n"; }