class Scrape456::Download
Given a username and password, retrieves all event attendee data from 123Signup.
Usage:
csv_result = Scrape456::Download.new
(
username: ENV['LOGIN_USER_1'], password: ENV['LOGIN_PASSWORD_1']
).call
puts csv_result
Constants
- LOGIN_URL
- ONLOAD_REDIRECT_XPATH
- REPORT_TAB_LINK_CSS_SELECTOR
Machine-generated: pulled from Firefox dev tools. But it works, where my handcrafted XPath did not.
- THIRTY_SECONDS
Public Class Methods
new(username:, password:)
click to toggle source
# File lib/scrape456/download.rb, line 33 def initialize(username:, password:) @username = username @password = password set_up_capybara end
Public Instance Methods
call()
click to toggle source
# File lib/scrape456/download.rb, line 91 def call csv_result = nil session = Capybara.current_session STDERR.puts 'Logging in to 123Signup...' log_in session.within_frame('FrameApplication') do session.within_frame('top_menu') do STDERR.puts 'Navigating to reports tab...' session.find(REPORT_TAB_LINK_CSS_SELECTOR).click end session.within_frame('contents') do session.within_frame('MenuList') do STDERR.puts 'Navigating to event reports...' session.click_link 'Event Reports' end session.within_frame('Results') do session.click_link('Event Attendee Data') session.click_button 'BottomDownloadReport' STDERR.puts 'Waiting on report download...' csv_result = extract_csv_from_full_response(fetch_csv_response_body) end end end csv_result end
extract_csv_from_full_response(full_response)
click to toggle source
# File lib/scrape456/download.rb, line 86 def extract_csv_from_full_response(full_response) strip_regex = /\A<html.*<pre[^>]*>(SignupMemberID,.*)<\/pre><\/body><\/html>\z/m full_response.sub(strip_regex, '\1') end
extract_redirect_url()
click to toggle source
# File lib/scrape456/download.rb, line 61 def extract_redirect_url onload_value = session.find('body')['onload'] lines = onload_value.split(';') redirect_destination_abs_path = lines.grep(/pageLoadRedirect/).first.sub(/^.*"([^"]*)".*$/, '\1') session.current_host + redirect_destination_abs_path end
fetch_csv_response_body()
click to toggle source
# File lib/scrape456/download.rb, line 68 def fetch_csv_response_body session.assert_selector('body') expect(session.find('body')).to ( have_text(/^SignupMemberID,/) ).or( match_xpath(ONLOAD_REDIRECT_XPATH) ) if session.find('body').matches_xpath?(ONLOAD_REDIRECT_XPATH) csv_url = extract_redirect_url session.visit(csv_url) STDERR.puts "Report generated by 123Signup. Fetching CSV..." csv_response_body = open(csv_url).read else csv_response_body = session.body end csv_response_body.force_encoding(Encoding::ISO_8859_1).encode(Encoding::UTF_8) end
log_in()
click to toggle source
# File lib/scrape456/download.rb, line 50 def log_in session.visit LOGIN_URL session.fill_in 'username', with: @username session.fill_in 'password', with: @password session.click_link 'SignInButton' end
session()
click to toggle source
# File lib/scrape456/download.rb, line 57 def session Capybara.current_session end
set_up_capybara()
click to toggle source
# File lib/scrape456/download.rb, line 39 def set_up_capybara options = {phantomjs_options: ['--ssl-protocol=any']} Capybara.register_driver :poltergeist do |app| Capybara::Poltergeist::Driver.new(app, options) end Capybara.current_driver = :poltergeist Capybara.javascript_driver = :poltergeist Capybara.run_server = false Capybara.default_max_wait_time = THIRTY_SECONDS end