class RelatonNist::Scrapper

Constants

DOMAIN

Public Class Methods

parse_page(hit_data) click to toggle source

Parse page. @param hit_data [Hash] @return [Hash]

# File lib/relaton_nist/scrapper.rb, line 13
def parse_page(hit_data)
  item_data = from_json hit_data
  titles = fetch_titles(hit_data)
  unless /^(SP|NISTIR|FIPS) /.match? item_data[:docid][0].id
    item_data[:docid][0] = RelatonBib::DocumentIdentifier.new(
      id: titles[0][:content].upcase, type: "NIST",
    )
  end
  item_data[:fetched] = Date.today.to_s
  item_data[:type] = "standard"
  item_data[:title] = titles
  item_data[:doctype] = "standard"

  NistBibliographicItem.new(**item_data)
end

Private Class Methods

contributors_json(doc, role, lang = "en", script = "Latn") click to toggle source

@param doc [Array<Hash>] @param role [String] @return [Array<RelatonBib::ContributionInfo>]

# File lib/relaton_nist/scrapper.rb, line 163
def contributors_json(doc, role, lang = "en", script = "Latn")
  doc.map do |contr|
    if contr["affiliation"]
      if contr["affiliation"]["acronym"]
        abbrev = RelatonBib::LocalizedString.new(contr["affiliation"]["acronym"])
      end
      org = RelatonBib::Organization.new(
        name: contr["affiliation"]["name"], abbreviation: abbrev,
      )
    end
    if contr["surname"]
      affiliation = []
      affiliation << RelatonBib::Affiliation.new(organization: org) if org
      entity = RelatonBib::Person.new(
        name: full_name(contr, lang, script), affiliation: affiliation,
      )
    elsif org
      entity = org
    end
    if entity
      RelatonBib::ContributionInfo.new entity: entity, role: [type: role]
    end
  end.compact
end
doc_relation(type, ref, uri, lang = "en", script = "Latn") click to toggle source

@param type [String] @param ref [String] @param uri [String] @return [RelatonNist::DocumentRelation]

# File lib/relaton_nist/scrapper.rb, line 345
def doc_relation(type, ref, uri, lang = "en", script = "Latn")
  DocumentRelation.new(
    type: type,
    bibitem: RelatonBib::BibliographicItem.new(
      formattedref: RelatonBib::FormattedRef.new(
        content: ref, language: lang, script: script, format: "text/plain",
      ),
      link: [RelatonBib::TypedUri.new(type: "src", content: uri)],
    ),
  )
end
fetch_commentperiod_json(json) click to toggle source

@param json [Hash] @return [RelatonNist::CommentPeriod, NilClass]

# File lib/relaton_nist/scrapper.rb, line 421
def fetch_commentperiod_json(json)
  return unless json["comment-from"]

  CommentPeriod.new from: json["comment-from"], to: json["comment-to"]
end
fetch_contributors(doc) click to toggle source

rubocop:disable Metrics/AbcSize, Metrics/MethodLength @param doc [Hash] @return [Array<RelatonBib::ContributionInfo>]

# File lib/relaton_nist/scrapper.rb, line 138
def fetch_contributors(doc)
  contribs = []
  # if doc.is_a? Hash
  contribs += contributors_json(
    doc["authors"], "author", doc["language"], doc["script"]
  )
  contribs + contributors_json(
    doc["editors"], "editor", doc["language"], doc["script"]
  )
  # else
  #   name = "National Institute of Standards and Technology"
  #   org = RelatonBib::Organization.new(
  #     name: name, url: "www.nist.gov", abbreviation: "NIST",
  #   )
  #   contribs << RelatonBib::ContributionInfo.new(entity: org, role: [type: "publisher"])
  #   authors = doc.at('//h4[.="Author(s)"]/following-sibling::p')
  #   contribs += contributors(authors, "author")
  #   editors = doc.at('//h4[.="Editor(s)"]/following-sibling::p')
  #   contribs + contributors(editors, "editor")
  # end
end
fetch_dates(doc, release_date) click to toggle source

Fetch dates @param doc [Hash] @param release_date [Date] @return [Array<Hash>]

# File lib/relaton_nist/scrapper.rb, line 118
def fetch_dates(doc, release_date) # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
  dates = [{ type: "published", on: release_date.to_s }]

  # if doc.is_a? Hash
  issued = RelatonBib.parse_date doc["issued-date"]
  updated = RelatonBib.parse_date doc["updated-date"]
  dates << { type: "updated", on: updated.to_s } if updated
  obsoleted = RelatonBib.parse_date doc["obsoleted-date"]
  dates << { type: "obsoleted", on: obsoleted.to_s } if obsoleted
  # else
  #   d = doc.at("//span[@id='pub-release-date']")&.text&.strip
  #   issued = RelatonBib.parse_date d
  # end
  dates << { type: "issued", on: issued.to_s }
  dates
end
fetch_docid(docid) click to toggle source

Fetch docid. @param docid [String] @return [Array<RelatonBib::DocumentIdentifier>]

# File lib/relaton_nist/scrapper.rb, line 55
def fetch_docid(docid)
  item_ref = docid
  # item_ref ||= "?"
  item_ref.sub!(/\sAddendum$/, "-Add")
  [RelatonBib::DocumentIdentifier.new(id: item_ref, type: "NIST")]
end
fetch_edition(doc) click to toggle source

@param doc [Hash] @return [String, NilClass]

# File lib/relaton_nist/scrapper.rb, line 251
def fetch_edition(doc)
  # if doc.is_a? Hash
  return unless doc["edition"]

  rev = doc["edition"]
  # else
  #   return unless /(?<=Rev\.\s)(?<rev>\d+)/ =~ doc
  # end

  "Revision #{rev}"
end
fetch_keywords(doc) click to toggle source

@param doc [Hash] @return [Array<RelatonNist::Keyword>]

# File lib/relaton_nist/scrapper.rb, line 390
def fetch_keywords(doc)
  # kws = if doc.is_a? Hash
  #         doc["keywords"]
  #       else
  #         doc.xpath "//span[@id='pub-keywords-container']/span"
  #       end
  doc["keywords"].map { |kw| kw.is_a?(String) ? kw : kw.text }
end
fetch_relations_json(doc) click to toggle source
relations + doc.xpath('//span[@id="pub-related-container"]/a').map do |r|
  doc_relation "updates", r.text, DOMAIN + r[:href]
end

end rubocop:enable Metrics/AbcSize

# File lib/relaton_nist/scrapper.rb, line 331
def fetch_relations_json(doc)
  relations = doc["supersedes"].map do |r|
    doc_relation "supersedes", r["docidentifier"], r["uri"]
  end

  relations + doc["superseded-by"].map do |r|
    doc_relation "updates", r["docidentifier"], r["uri"]
  end
end
fetch_status(doc) click to toggle source

Fetch status. @param doc [Hash] @return [RelatonNist::DocumentStatus]

# File lib/relaton_nist/scrapper.rb, line 65
def fetch_status(doc) # , status)
  # if doc.is_a? Hash
  stage = doc["status"]
  subst = doc["substage"]
  iter = doc["iteration"] == "initial" ? 1 : doc["iteration"]
  # else
  #   case status
  #   when "draft (obsolete)"
  #     stage = "draft-public"
  #     subst = "withdrawn"
  #   when "retired draft"
  #     stage = "draft-public"
  #     subst = "retired"
  #   when "withdrawn"
  #     stage = "final"
  #     subst = "withdrawn"
  #   when /^draft/
  #     stage = "draft-public"
  #     subst = "active"
  #   else
  #     stage = status
  #     subst = "active"
  #   end

  #   iter = nil
  #   if stage.include? "draft"
  #     iter = 1
  #     history = doc.xpath("//span[@id='pub-history-container']/a"\
  #                         "|//span[@id='pub-history-container']/span")
  #     history.each_with_index do |h, idx|
  #       next if h.name == "a"

  #       iter = idx + 1 if idx.positive?
  #       break
  #     end
  #   end
  # end

  RelatonNist::DocumentStatus.new stage: stage, substage: subst, iteration: iter.to_s
end
fetch_titles(hit_data) click to toggle source

Fetch titles. @param hit_data [Hash] @return [Array<Hash>]

# File lib/relaton_nist/scrapper.rb, line 109
def fetch_titles(hit_data)
  [{ content: hit_data[:title], language: "en", script: "Latn",
     format: "text/plain" }]
end
from_json(hit_data) click to toggle source
# File lib/relaton_nist/scrapper.rb, line 31
def from_json(hit_data)
  json = hit_data[:json]
  {
    link: fetch_link(json),
    docid: fetch_docid(json["docidentifier"]),
    date: fetch_dates(json, hit_data[:release_date]),
    contributor: fetch_contributors(json),
    edition: fetch_edition(json),
    language: [json["language"]],
    script: [json["script"]],
    docstatus: fetch_status(json), # hit_data[:status]),
    copyright: fetch_copyright(json["published-date"]),
    relation: fetch_relations_json(json),
    place: ["Gaithersburg, MD"],
    keyword: fetch_keywords(json),
    commentperiod: fetch_commentperiod_json(json),
  }
end
full_name(name, lang, script) click to toggle source

@param name [Hash] @param lang [Strong] @param script [String] @return [RelatonBib::FullName]

# File lib/relaton_nist/scrapper.rb, line 229
def full_name(name, lang, script)
  RelatonBib::FullName.new(
    surname: RelatonBib::LocalizedString.new(name["surname"], lang, script),
    forename: name_parts(name["givenName"], lang, script),
    addition: name_parts(name["suffix"], lang, script),
    prefix: name_parts(name["title"], lang, script),
    completename: RelatonBib::LocalizedString.new(name["fullName"], lang, script),
  )
end
name_parts(part, lang, script) click to toggle source

@param part [String, NilClass] @param lang [Strong] @param script [String] @return [Array<RelatonBib::LocalizedString>]

# File lib/relaton_nist/scrapper.rb, line 243
def name_parts(part, lang, script)
  return [] unless part

  [RelatonBib::LocalizedString.new(part, lang, script)]
end