class Scrape456::Download

Given a username and password, retrieves all event attendee data from 123Signup.

Usage:

csv_result = Scrape456::Download.new(

username: ENV['LOGIN_USER_1'],
password: ENV['LOGIN_PASSWORD_1']

).call

puts csv_result

Constants

LOGIN_URL
ONLOAD_REDIRECT_XPATH

Machine-generated: pulled from Firefox dev tools. But it works, where my handcrafted XPath did not.

THIRTY_SECONDS

Public Class Methods

new(username:, password:) click to toggle source
# File lib/scrape456/download.rb, line 33
def initialize(username:, password:)
  @username = username
  @password = password
  set_up_capybara
end

Public Instance Methods

call() click to toggle source
# File lib/scrape456/download.rb, line 91
def call
  csv_result = nil

  session = Capybara.current_session
  STDERR.puts 'Logging in to 123Signup...'
  log_in
  session.within_frame('FrameApplication') do
    session.within_frame('top_menu') do
      STDERR.puts 'Navigating to reports tab...'
      session.find(REPORT_TAB_LINK_CSS_SELECTOR).click
    end
    session.within_frame('contents') do
      session.within_frame('MenuList') do
        STDERR.puts 'Navigating to event reports...'
        session.click_link 'Event Reports'
      end
      session.within_frame('Results') do
        session.click_link('Event Attendee Data')
        session.click_button 'BottomDownloadReport'
        STDERR.puts 'Waiting on report download...'
        csv_result = extract_csv_from_full_response(fetch_csv_response_body)
      end
    end
  end
  csv_result
end
extract_csv_from_full_response(full_response) click to toggle source
# File lib/scrape456/download.rb, line 86
def extract_csv_from_full_response(full_response)
  strip_regex = /\A<html.*<pre[^>]*>(SignupMemberID,.*)<\/pre><\/body><\/html>\z/m
  full_response.sub(strip_regex, '\1')
end
extract_redirect_url() click to toggle source
# File lib/scrape456/download.rb, line 61
def extract_redirect_url
  onload_value = session.find('body')['onload']
  lines = onload_value.split(';')
  redirect_destination_abs_path = lines.grep(/pageLoadRedirect/).first.sub(/^.*"([^"]*)".*$/, '\1')
  session.current_host + redirect_destination_abs_path
end
fetch_csv_response_body() click to toggle source
# File lib/scrape456/download.rb, line 68
def fetch_csv_response_body
  session.assert_selector('body')
  expect(session.find('body')).to (
    have_text(/^SignupMemberID,/)
  ).or(
    match_xpath(ONLOAD_REDIRECT_XPATH)
  )
  if session.find('body').matches_xpath?(ONLOAD_REDIRECT_XPATH)
    csv_url = extract_redirect_url
    session.visit(csv_url)
    STDERR.puts "Report generated by 123Signup. Fetching CSV..."
    csv_response_body = open(csv_url).read
  else
    csv_response_body = session.body
  end
  csv_response_body.force_encoding(Encoding::ISO_8859_1).encode(Encoding::UTF_8)
end
log_in() click to toggle source
# File lib/scrape456/download.rb, line 50
def log_in
  session.visit LOGIN_URL
  session.fill_in 'username', with: @username
  session.fill_in 'password', with: @password
  session.click_link 'SignInButton'
end
session() click to toggle source
# File lib/scrape456/download.rb, line 57
def session
  Capybara.current_session
end
set_up_capybara() click to toggle source
# File lib/scrape456/download.rb, line 39
def set_up_capybara
  options = {phantomjs_options: ['--ssl-protocol=any']}
  Capybara.register_driver :poltergeist do |app|
    Capybara::Poltergeist::Driver.new(app, options)
  end
  Capybara.current_driver = :poltergeist
  Capybara.javascript_driver = :poltergeist
  Capybara.run_server = false
  Capybara.default_max_wait_time = THIRTY_SECONDS
end