From cd8a6a4ecebebf6963a13351c8b679f63dcef259 Mon Sep 17 00:00:00 2001 From: Thomas Hochstein Date: Sat, 29 Apr 2017 21:18:24 +0200 Subject: [PATCH] Add caching to DejureIntegrator filter. Signed-off-by: Thomas Hochstein --- lib/filters/dejure.rb | 122 +++++++++++++++++++++++++++++++----------- 1 file changed, 92 insertions(+), 30 deletions(-) diff --git a/lib/filters/dejure.rb b/lib/filters/dejure.rb index 940132c..6543ff6 100644 --- a/lib/filters/dejure.rb +++ b/lib/filters/dejure.rb @@ -33,15 +33,36 @@ # params[:buzer] -> fallback to buzer.de? (1) require 'net/http' +require 'digest' module Nanoc::Filters - class DeJureIntegrator < Nanoc::Filter + class DejureIntegrator < Nanoc::Filter identifier :dejure type :text + VERSION = '0.2' + CACHEDIR = 'tmp/dejure-org' + CACHEDAYS = 4 + + def run(input, params={}) + if !(/§|§|Art\.|\/[0-9][0-9](?![0-9\/])| [0-9][0-9]?[\/\.][0-9][0-9](?![0-9\.])|[0-9][0-9], / =~ input) + # nothing to replace + return input + end + # return output if it's already cached + if !(output = cache_read(input.strip)) + # purge cache if a purge is due + puts "DejureIntegrator cache purged!\n" if cache_purge + # call out to dejure.org + output = call_dejure(input.strip, set_params(params)) + end + # do an integrity check + return integrity_check(input,output) + end + def set_params (params) # set default params - params[:version] = '0.1' + params[:version] = VERSION if !@config[:base_url].nil? params[:Anbieterkennung] = @config[:base_url] else @@ -52,17 +73,7 @@ module Nanoc::Filters return params end - def run(content, params={}) - if !(/§|§|Art\.|\/[0-9][0-9](?![0-9\/])| [0-9][0-9]?[\/\.][0-9][0-9](?![0-9\.])|[0-9][0-9], / =~ content) - # nothing to replace - return content - else - params = set_params(params) - return DeJureIntegrator(content.strip, params) - end - end - - def DeJureIntegrator (text, params={}) + def call_dejure (input, params={}) prot = 'http://' host = 'rechtsnetz.dejure.org' path = '/dienste/vernetzung/vernetzen' @@ -70,35 +81,86 @@ module Nanoc::Filters http = Net::HTTP.new(uri.host, uri.port) request = Net::HTTP::Post.new(uri.request_uri) - request['User-Agent'] = params[:Anbieterkennung] + ' (DeJureIntegrator for nanoc ruby-' + params[:version] + ')' + request['User-Agent'] = params[:Anbieterkennung] + ' (DejureIntegrator for nanoc ruby-' + params[:version] + ')' request['Content-Type'] = 'application/x-www-form-urlencoded' formdata = params - formdata['Originaltext'] = text + formdata['Originaltext'] = input request.set_form_data(formdata) response = http.request(request) - if (response.code != '200') || response.body.nil? || (text.length > response.body.length) + if (response.code != '200') || response.body.nil? || (input.length > response.body.length) # HTTP error, empty body or response body smaller than original text - printf("DeJureIntegrator HTTP error: %s\n", response.code) - return text + printf("DejureIntegrator HTTP error: %s\n", response.code) + return input else - return IntegrityCheck(text,response.body.force_encoding('UTF-8')) + output = response.body.force_encoding('UTF-8').strip + # write cache + cache_write(input,output) + return output end end - def IntegrityCheck (input,output) - # compare input and output text after removing all added links - texts should match! - regexp = / cache_age(cache_days) + return File.read(cache_file) + else + return false + end + end + + def cache_purge (cache_days=CACHEDAYS,cache_dir=CACHEDIR) + # cache_dir is not a directory? + return false if !File.directory?(cache_dir) + lastpurge = File.read(cache_dir + '/lastpurge') if File.exist?(cache_dir + '/lastpurge') + # already purged in the last cache_days days? + return false if lastpurge && lastpurge.to_i > cache_age(cache_days) + # delete all files in cache_dir older than cache_days + Pathname.new(cache_dir).children.each do |f| + f.unlink if File.mtime(f).to_i < cache_age(cache_days) + end + # save the time of the purge + File.open(cache_dir + '/lastpurge', 'w') do |f| + f.write(Time.now.to_i) + end + return true + end + + def integrity_check (input,output) + # compare input and output text after removing all added links - texts should match! + regexp = /