class TooHotToHandel::Scraper
Constants
- BASE_URL
Public Class Methods
scrape_classical_reviews()
click to toggle source
# File lib/too_hot_to_handel/scraper.rb, line 5 def self.scrape_classical_reviews doc = Nokogiri::HTML(open(BASE_URL)) doc.css(".flex.view-row-carton--flex").each do |review| title = review.css("h2").text blurb = review.css(".field-item.even").text url = "https://www.gramophone.co.uk" + review.css("h2 a").attr("href").text TooHotToHandel::ClassicalReview.new(title, blurb, url) # binding.pry end end
scrape_review_content(classical_review)
click to toggle source
# File lib/too_hot_to_handel/scraper.rb, line 17 def self.scrape_review_content(classical_review) url = classical_review.url doc = Nokogiri::HTML(open(url)) # binding.pry classical_review.content = doc.css(".article-layout p").text.gsub!(/\A.*?(?=\))/, "(#{classical_review.title}") #only shows text after first closing parenthesis. Unwanted text in <p> was being rendered. end