#!/usr/bin/perl # -*- perl -*- # # Copyright (c) 1997, 2000 DJ Delorie, All Rights Reserved. NO WARRANTEE. # push(@INC, split(':', $ENV{'PATH'})); push(@INC, "/home/apache/bin"); #open(LOCK, ">/tmp/lock"); #flock(LOCK, 2); if ($ARGV[0] eq "see-script") { print "Content-type: text/plain\n\n"; open(IN, "ses.cgi"); print while ; exit 0; } $sockaddr = "S n a4 x8"; #require "sys/socket.ph"; use Socket; require "./common.pl"; push(@INC, "/usr/local/etc/httpd/bin"); push(@INC, split(':', $ENV{"PATH"})); require "cgi-lib.pl"; &ReadParse; $in{'url'} =~ s@http://http://@http://@; print "Content-type: text/html\n\n"; print `header -sky Search Engine Simulator`; $agent = "Delorie.com SES"; if ($in{'url'}) { print "
\n"; $value = $in{'url'}; $value = "http://" unless $value; print "\n"; print "

\n"; print "
\n"; print "
\n"; if ($in{'url'} !~ m@^http://([^\.]+\.)+[^\.]+@) { print "Sorry, I can only handle http://some.host/ URLs.\n"; $u = &HtmlEncode($in{'url'}); print "You typed in `$u'.\n"; print `trailer`; exit 0; } $page = &webget($in{'url'}); $has_tables = 1 if $page =~ /<\s*table/i; if ($webgot_url ne $in{'url'}) { print "

Note: The web server returned a \"redirect\". Search\n"; print "engines may not always follow redirects. The URL below reflect the\n"; print "page I was redirected to.

\n"; } ($date) = $web_header =~ /Last-modified:\s*(.*\S)/i; $size = length($page); print "

$webgot_url

\n\n"; $page =~ s/[\r\n\t ]+/ /g; $page =~ s@]* alt="([^\"]*)"[^>]*>@$1@gi; $page =~ s@]* alt='([^\']*)'[^>]*>@$1@gi; $page =~ s@]* alt=([^ >]*)[^>]*>@$1@gi; ($title) = $page =~ m@([^<]*)@i; $title =~ s@.*@@; if (! $title) { $title = $in{'url'}; $title =~ s@.*/(.+)@$1@; $title = "$title"; } $banner = "$title   "; if ($date) { $banner .= " $date, $size bytes
\n"; } else { $banner .= " $size bytes
\n"; } $start = "

\n"; $trailer = "
$in{'url'}"; $end = "

\n"; $summary = $page; $summary =~ s@.*@@i; # From striphtml $summary =~ s{}{\ }gsx; $summary =~ s{\<([^>\'\"]|\"[^\"]*\"|\'[^\']*\')*\>}{\ }gsx; #" $summary =~ s/\ / /g; $summary =~ s/ +/ /g; $summary =~ s/^ +//g; $summary =~ s/^(.{1,200}\S) .*/$1/; $summary =~ s/\/\>/g; print "

Example 1: Summary based on all text on page

\n"; print $start, $banner, $summary, $trailer, $end; $headers = $page; $htext = ""; $i = 0; while ($headers =~ m@]*>(.*?)@ig) { $htext .= $1; $htext .= " "; last if $i++ == 100; # just in case } $htext =~ s{}{}gsx; $htext =~ s@\<([^\>\'\"]|\"[^\"]*\"|\'[^\']*\')*\>@@gsx; #" $htext =~ s/\ / /g; $htext =~ s/ +/ /g; $htext =~ s/^ +//g; $htext =~ s/^(.{1,200}\S) .*/$1/; $htext =~ s//>/g; print "

Example 2: Summary based on headers only

\n"; print $start, $banner, $htext, $trailer, $end; $text = $page; $text =~ s/[\r\n\t\ ]+/ /g; # From striphtml $text =~ s{}{\ }gsx; $text =~ s{\<([^>\'\"]|\"[^\"]*\"|\'[^\']*\')*\>}{\ }gsx; #" $text =~ s/\ / /g; $text =~ s/ +/ /g; $text =~ s/^ +//g; $text =~ s/\/\>/g; print "

Example 3: Text excerpt of page

\n"; print $start, "", $text, "", $end; if ($has_tables) { print <If your page uses tables, and the wrong text is showing up in these examples (i.e. the sidebar instead of the body), I have some helpful hints for laying out your tables to make your main text show up first

EOF } } else { print "

This service allows web authors to see what their pages will look\n"; print "like (sort of) to a search engine. This service ignores the META\n"; print "tags that some search engines honor.

\n"; print "
\n"; print "

Enter the URL you want to view:
\n"; $value = $in{'url'}; $value = "http://" unless $value; print "

\n"; print "

\n"; print "
\n"; print <If your page uses tables, and the wrong text is showing up in these examples (i.e. the sidebar instead of the body), I have some helpful hints for laying out your tables to make your main text show up first

EOF print "

See the CGI's perl source

\n"; open(V, "viewers.html"); print while ; close(V); } print `trailer`;