-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patharchive_old_data.rb
67 lines (49 loc) · 1.53 KB
/
archive_old_data.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env ruby
# frozen_string_literal: true
require 'rubygems'
require 'bundler/setup'
require 'elasticsearch'
require 'digest'
require 'date'
require_relative './localconfig'
months = 3
now = DateTime.now
limit = now << months
limit_s = limit.strftime('%Y-%m-%dT%H:%M:%S.%LZ')
client = Elasticsearch::Client.new log: false,
user: @config[:elastic_username],
password: @config[:elastic_password]
@seen_ids = []
def random_id
Digest::SHA2.hexdigest (('a'..'z').to_a * 50).sample(50).join('')
end
loop do
data = client.search index: 'tlshandshakes', body: { size: 500, query: {
bool: { filter: [{ range: {
"meta.last_seen": {
format: 'strict_date_optional_time',
gte: '1970-01-01T00:00:00.000Z',
lte: limit_s
}
} }] }
} }
#puts data['hits']['hits'].length
break if data['hits']['hits'].empty?
delete_bulk = []
insert_bulk = {}
data['hits']['hits'].each do |hit|
body = hit['_source']
id = hit['_id']
next if @seen_ids.include? id
@seen_ids << id
lastseen = body['meta']['last_seen']
lastseen_d = DateTime.parse(lastseen)
delete_bulk << { delete: { _id: id } }
insert_bulk[lastseen_d.year] ||= []
insert_bulk[lastseen_d.year] << { index: { _id: random_id, data: body } }
end
client.bulk index: 'tlshandshakes', body: delete_bulk unless delete_bulk.empty?
insert_bulk.each_key do |year|
client.bulk index: "tlshandshakes_archive_#{year}", body: insert_bulk[year]
end
end