Tuesday, August 16, 2011

mechanize examples

#00 Initialization

require 'rubygems'

require 'mechanize'

agent = WWW::Mechanize.new

agent.set_proxy('localhost', '8000')

agent.user_agent = 'Individueller User-Agent'

agent.user_agent_alias = 'Linux Mozilla'

agent.open_timeout = 3

agent.read_timeout = 4

agent.keep_alive = false



agent.max_history = 0 # reduce memory if you make lots of requests



#01 manual get requests

url = 'http://apoc.sixserv.org/requestinfo/'

page = agent.get url

# or ...

page = agent.get(url, {"name" => "value", "key" => "val"})



#02 manual post submits

url = 'http://apoc.sixserv.org/requestinfo/'

page = agent.post(url, {"name" => "value", "key" => "val"})



#03 form post submits

page = agent.get 'https://twitter.com/login'

login_form = page.form_with(:action => 'https://twitter.com/sessions')

login_form['session[username_or_email]'] = '[Username]'

login_form['session[password]'] = '[Password]'

page = agent.submit login_form



#04 link and history navigation

page = agent.get 'http://www.heise.de/'

page = agent.click(page.link_with(:text => /Telepolis/))

page = agent.click(page.link_with(:href => /artikel/))

agent.back

agent.back

puts page.body



#05 exceptions

begin

page = agent.get 'http://apoc.sixserv.org/diese/seite/gibt/es/nicht/'

rescue WWW::Mechanize::ResponseCodeError

puts "ResponseCodeError - Code: #{$!}"

end



#06 referer

page = agent.get(:url => 'http://apoc.sixserv.org/requestinfo/',

:referer => 'http://google.com/this/is/a/custom/referer')

puts page.body



#07 request header manipulation

agent.pre_connect_hooks << lambda do |params|

params[:request]['X-Requested-With'] = 'XMLHttpRequest'

end



#08 response header

page = agent.head 'http://sixserv.org'

server_version = page.header['server']

puts "Server: #{server_version}"

if page.header.key? 'x-powered-by'

php_version = page.header['x-powered-by']

puts "X-Powered-By: #{php_version}"

end

# redirection urls:

agent.redirect_ok = false

page = agent.get 'http://www.sixserv.org/'

puts page.header['location']



#09 content parsing

# X Path / CSS-Selector:

page = agent.get 'http://xkcd.com/'

img = page.search '/html/body/div/div[2]/div/div[2]/div/div/img'

puts img

# Regular Expression:

page = agent.get 'http://example.com/'

page.body.match /< h3>([^<]+)< \/h3>/

puts "Heading 3: #{$1}"



#10 "with" method examples

# *_with: form, link, base, frame or iframe



# get the first link including "foo" inside url:

page.link_with(:href => /foo/)



# all links with text 'more'

page.links_with(:text => 'more')



# get the form with the name 'foo'

page.form_with('foo') # or form_with(:name => 'foo')

1 comment:

  1. Are you looking for free Instagram Followers and Likes?
    Did you know that you can get them ON AUTOPILOT & TOTALLY FREE by registering on Like 4 Like?

    ReplyDelete