class Medreg::CompanyImporter
Constants
- RECIPIENTS
- Search_failure
Public Class Methods
all_companies()
click to toggle source
# File lib/medreg/company_importer.rb, line 309 def CompanyImporter.all_companies @@all_companies end
new(glns_to_import = [])
click to toggle source
# File lib/medreg/company_importer.rb, line 60 def initialize(glns_to_import = []) @glns_to_import = glns_to_import.clone @glns_to_import.delete_if {|item| item.size == 0} @info_to_gln = {} @@logInfo = [] FileUtils.rm_f(Companies_YAML) if File.exists?(Companies_YAML) @yaml_file = File.open(Companies_YAML, 'w+') @companies_prev_import = 0 @companies_created = 0 @companies_skipped = 0 @companies_deleted = 0 @archive = ARCHIVE_PATH @@all_companies = {} setup_default_agent end
Public Instance Methods
add_item(info, item)
click to toggle source
# File lib/medreg/company_importer.rb, line 76 def add_item(info, item) info << item.to_s.gsub(',',' ') end
get_detail_to_glns(glns)
click to toggle source
# File lib/medreg/company_importer.rb, line 176 def get_detail_to_glns(glns) r_loop = ResilientLoop.new(File.basename(__FILE__, '.rb')) failure = 'Die Personensuche dauerte zu lange' idx = 0 max_retries = 3 Medreg.log "get_detail_to_glns for #{glns.size} glns. first 10 are #{glns[0..9]} state_id is #{r_loop.state_id.inspect}" if DebugImport glns.each { |gln| idx += 1 if r_loop.must_skip?(gln) Medreg.log "Skipping #{gln}. Waiting for #{r_loop.state_id.inspect}" if DebugImport next end nr_tries = 0 success = false while nr_tries < max_retries and not success begin r_loop.try_run(gln, defined?(Minitest) ? 500 : 5 ) do Medreg.log "Searching for company with GLN #{gln}. Created #{@companies_created}. At #{@companies_created+@companies_prev_import} of #{glns.size}.#{nr_tries > 0 ? ' nr_tries is ' + nr_tries.to_s : ''}" page_1 = @agent.get(BetriebeURL) raise Search_failure if page_1.content.match(failure) hash = [ ['Betriebsname', ''], ['Plz', ''], ['Ort', ''], ['GlnBetrieb', gln.to_s], ['BetriebsCodeId', '0'], ['KantonsCodeId', '0'], ] res_2 = @agent.post(BetriebeURL, hash) if res_2.link(:href => RegExpBetriebDetail) page_3 = res_2.link(:href => RegExpBetriebDetail).click raise Search_failure if page_3.content.match(failure) company = parse_details(page_3, gln) store_company(company) @@all_companies[gln] = company else Medreg.log "could not find gln #{gln}" @companies_skipped += 1 end success = true end rescue Timeout => e nr_tries += max_retries if defined?(MiniTest) Medreg.log "rescue #{e} will retry #{max_retries - nr_tries} times" nr_tries += 1 sleep defined?(MiniTest) ? 0.01 : 60 end if (@companies_created + @companies_prev_import) % 100 == 99 Medreg.log "Start saving after #{@companies_created} created #{@companies_prev_import} from previous import" end end } r_loop.finished ensure Medreg.log "Start saving" Medreg.log "Finished" end
get_latest_file()
click to toggle source
# File lib/medreg/company_importer.rb, line 233 def get_latest_file agent = Mechanize.new target = Companies_curr needs_update = true return target if File.exist?(target) file = agent.get(BetriebeXLS_URL) download = file.body File.open(target, 'w+') { |f| f.write download } save_for_log "saved #{file.body.size} bytes as #{target}" target end
parse_details(html, gln)
click to toggle source
# File lib/medreg/company_importer.rb, line 148 def parse_details(html, gln) left = html.at('div[class="colLeft"]').text right = html.at('div[class="colRight"]').text btm = html.at('div[class="twoColSpan"]').text infos = [] infos = left.split(/\r\n\s*/) unless infos[2].eql?(gln.to_s) Medreg.log "Mismatch between searched gln #{gln} and details #{infos[2]}" return nil end company = Hash.new company[:ean13] = gln.to_s.clone company[:name] = infos[4] idx_plz = infos.index("PLZ \\ Ort") idx_canton = infos.index('Bewilligungskanton') address = infos[6..idx_plz-1].join(' ') company[:plz] = infos[idx_plz+1] company[:location] = infos[idx_plz+2] idx_typ = infos.index('Betriebstyp') ba_type = infos[idx_typ+1] company[:address] = address company[:ba_type] = ba_type company[:narcotics] = btm.split(/\r\n\s*/)[-1] update_address(company) Medreg.log company if $VERBOSE company end
parse_xls(path)
click to toggle source
# File lib/medreg/company_importer.rb, line 289 def parse_xls(path) Medreg.log "parsing #{path}" workbook = RubyXL::Parser.parse(path) positions = [] rows = 0 workbook[0].each do |row| next unless row and (row[COMPANY_COL[:gln]] or row[COMPANY_COL[:name_1]]) rows += 1 if rows > 1 info = CompanyInfo.new [:gln, :name_1, :name_2, :plz, :canton_giving_permit, :country, :company_type,:drug_permit].each { |field| cmd = "info.#{field} = row[COMPANY_COL[#{field.inspect}]] ? row[COMPANY_COL[#{field.inspect}]].value : nil" eval(cmd) } @info_to_gln[ row[COMPANY_COL[:gln]] ? row[COMPANY_COL[:gln]].value : row[COMPANY_COL[:name_1]].value ] = info end end @glns_to_import = @info_to_gln.keys.sort.uniq end
report()
click to toggle source
# File lib/medreg/company_importer.rb, line 244 def report report = "Companies update \n\n" report << "New companies: " << @companies_created.to_s << "\n" report << "Companies from previous imports: " << @companies_prev_import.to_s << "\n" report << "Deleted companies: " << @companies_deleted.to_s << "\n" report end
save_for_log(msg)
click to toggle source
# File lib/medreg/company_importer.rb, line 55 def save_for_log(msg) Medreg.log(msg) withTimeStamp = "#{Time.now.strftime('%Y-%m-%d %H:%M:%S')}: #{msg}" @@logInfo << withTimeStamp end
save_import_to_csv(filename)
click to toggle source
# File lib/medreg/company_importer.rb, line 75 def save_import_to_csv(filename) def add_item(info, item) info << item.to_s.gsub(',',' ') end field_names = ["ean13", "name", "plz", "location", "address", "ba_type", "narcotics", "address_additional_lines", "address_canton", "address_fax", "address_fon", "address_location", "address_type", ] CSV.open(filename, "wb") do |csv| csv << field_names @@all_companies.each{ |gln, person| maxlines = 1 maxlines = person[:addresses].size if person[:addresses].size > maxlines 0.upto(maxlines-1). each{ |idx| info = [] field_names[0..6].each{ |name| add_item(info, eval("person[:#{name}]")) } address = person[:addresses][idx] field_names[7..-1].each{ |name| add_item(info, eval("x = address.#{name.sub('address_','')}; x.is_a?(Array) ? x.join(\"\n\") : x")) } if address csv << info } } end end
save_import_to_yaml(filename)
click to toggle source
# File lib/medreg/company_importer.rb, line 110 def save_import_to_yaml(filename) File.open(filename, 'w+') {|f| f.write(@@all_companies.to_yaml) } save_for_log "Saved #{@@all_companies.size} companies in #{filename}" end
setup_default_agent()
click to toggle source
# File lib/medreg/company_importer.rb, line 137 def setup_default_agent @agent = Mechanize.new @agent.user_agent = 'Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Firefox/31.0 Iceweasel/31.1.0' @agent.redirect_ok = :all @agent.follow_meta_refresh_self = true @agent.follow_meta_refresh = :everwhere @agent.redirection_limit = 55 @agent.follow_meta_refresh = true @agent.ignore_bad_chunking = true @agent end
store_company(data)
click to toggle source
# File lib/medreg/company_importer.rb, line 265 def store_company(data) @companies_created += 1 company = Company.new action = 'create' ba_type = nil case data[:ba_type] when /kantonale Beh/i ba_type = Medreg::BA_type::BA_cantonal_authority when /ffentliche Apotheke/i ba_type = Medreg::BA_type::BA_public_pharmacy when /Spitalapotheke/i ba_type = Medreg::BA_type::BA_hospital_pharmacy when /wissenschaftliches Institut/i ba_type = Medreg::BA_type::BA_research_institute else ba_type = 'unknown' end company.ean13 = data[:ean13] company.name = data[:name] company.business_area = ba_type company.narcotics = data[:narcotics] company.addresses = data[:addresses] Medreg.log "store_company updated #{data[:ean13]} database. ba_type #{ba_type}." if $VERBOSE end
update()
click to toggle source
# File lib/medreg/company_importer.rb, line 114 def update saved = @glns_to_import.clone r_loop = ResilientLoop.new(File.basename(__FILE__, '.rb')) @state_yaml = r_loop.state_file.sub('.state', '.yaml') if File.exist?(@state_yaml) and File.size(@state_yaml) > 10 @@all_companies = YAML.load_file(@state_yaml) @companies_prev_import = @@all_companies.size puts "Got #{@companies_prev_import} items from previous import saved in #{@state_yaml}" end latest = get_latest_file save_for_log "parse_xls #{latest} specified GLN ids #{saved.inspect}" parse_xls(latest) @info_to_gln.keys get_detail_to_glns(saved.size > 0 ? saved : @glns_to_import) save_import_to_yaml(Companies_YAML) save_import_to_csv(Companies_CSV) return @companies_created, @companies_prev_import, @companies_deleted, @companies_skipped ensure if @companies_created > 0 save_import_to_yaml(@state_yaml) save_import_to_csv(@state_yaml.sub('.yaml','.csv')) end end
update_address(data)
click to toggle source
# File lib/medreg/company_importer.rb, line 251 def update_address(data) addr = Address2.new addr.name = data[:name ] addr.address = data[:address] # addr.additional_lines = [data[:address] ] addr.location = [data[:plz], data[:location]].compact.join(' ') if(fon = data[:phone]) addr.fon = [fon] end if(fax = data[:fax]) addr.fax = [fax] end data[:addresses] = [addr] end