#!/usr/bin/ruby -I/home/tschroed/lib/ruby # Licensed under the GNU General Public License 2.0 or greater # # Trevor Schroeder # Last Change: 8/2/2002 # # You'll need REXML to make this go. require 'net/http' require 'rexml/document' require 'cgi' require 'rsshtml' ### ### This would be the part you change ### # Max word length before it's broken up. Set to 0 to not split # words at any length. MAXWORD=15 urls = { 'Boston' => 'http://www.moreover.com/cgi-local/page?o=rss&c=Boston%20news', 'Nation' => 'http://trainedmonkey.com/news/rss.php?s=22', 'Slashdot' => 'http://slashdot.org/slashdot.rss', 'Blogdex' => 'http://blogdex.media.mit.edu/xml/fresh.asp?c=25', 'Boing Boing' => 'http://boingboing.net/rss.xml', 'Memepool' => 'http://memepool.com/memepool.rss', 'Newsvac' => 'http://newsforge.com/newsvac.rss', 'Newsforge' => 'http://newsforge.com/reports.rss' } outfilename = '/home/tschroed/public_html/blog/import/sidebar.html' rsshtml = [] ## Grab Google News # Googl dropped in favor of newsisfree feeds of the Washington Post and AP # Newswire #g="

google

" #conn = Net::HTTP.new('news.google.com',80) #conn.start #resp, rawdata = conn.get('/',nil) #rawdata.each do |ln| # # Get the headline # ln=~/class=g href=(http:\/\/www.google.com\/url[^ ">]*)"?[^>]*>([^<]*)#$2\n" if($1!=nil) #end #rsshtml.push g # Method for formatting individual items. # Split up long words, strip out HTML tags (because it doesn't work in the # tooltips window), put into a nice looking link. def formatItem(element) # Get title, link, and description info for this item. t0=element.elements["title"][0].to_s lnk=element.elements["link"][0].to_s d0=element.elements["description"] # strip out HTML tags, etc from the description if(d0 != nil) d1=d0[0].to_s d1=CGI.unescapeHTML(d1) d1.gsub!('<[^>]*>','') dsc=CGI.escapeHTML(d1) else dsc='' end # Split the title if words are too long. t1='' t0.split(' ').each do |word| if MAXWORD != 0 && word.length > MAXWORD 0.upto(word.length/MAXWORD) do |n| t1+= word.slice(n*MAXWORD,MAXWORD)+' ' end else t1 += word+' ' end end ttl=CGI.escapeHTML(t1) # and format it. return "

#{ttl}\n" end # And now the rest urls.sort.each do |title,url| rsshtml.push "

#{title}

\n" + # Note the use of the formatting block in the call to # getHTML RSSHTML.new(title, url).getHTML { |it| formatItem(it) } end outfile = File.open(outfilename,"w") rsshtml.each { |text| outfile.puts text } outfile.close