forked from robincamille/ezproxy-analysis
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathezp-analysis.py
executable file
·128 lines (98 loc) · 5.15 KB
/
ezp-analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# Robin Camille Davis
# created 2014-03-28 // revised 2018-05-16
## Script runs over all EZproxy-generated logs in a given directory. Best used with SPU logs.
## Must be edited with your organization's internal IP ranges. See two commented locations.
## Familiarize yourself with the data in the log.
## See http://emerging.commons.gc.cuny.edu/2014/04/analyzing-ezproxy-logs-python/
## Must be called on command line with this structure:
## python ezp-analysis.py [directory of SPU logs to analyze] [desired output filename.csv]
## Outputs a file with these columns:
## Filename of log
## total connections
## # on-campus connections
## % on-campus connections of total
## # off-campus connections
## % off-campus connections of total
## # library connections
## % library of on-campus connections
## % library of total connections
## # student sessions off-campus
## % student sessions of total off-campus
## # fac/staff sessions off-campus
## % fac/staff sessions of total off-campus
import re, sys, glob, os
def main():
"""EZproxy log analysis: count up student and faculty/staff sessions"""
print('EZproxy analysis beginning... This may take a few minutes.\n')
dirname = sys.argv[1] #must be a directory
output = sys.argv[2] #must be CSV
outfile = open(output,"w")
outfile.write('filename,total connections, # on-campus connections, % on-campus connections of total, \
# off-campus connections, % off-campus connections of total, # library connections, \
% library of on-campus connections, % library of total connections, # student sessions off-campus, \
% student sessions of total off-campus, # fac/staff sessions off-campus, % fac/staff sessions of \
total off-campus')
for filename in glob.glob(os.path.join(dirname, '*.log')): #opens all log files in directory
print('Now analyzing', filename)
lines = [line.strip() for line in open(filename)] #reads file
studcount = 0 #initialize counters
faccount = 0
sessions = []
oncampus = 0
offcampus = 0
libraryconnections = 0
for line in lines:
ipaddr = re.search(r'10\.\d+?\s-', line) # Edit this IP range
if ipaddr:
oncampus = oncampus + 1 #this counts all on-campus connections from 10.x
else:
offcampus = offcampus + 1 #this counts all other connections (off-campus)
libip = re.search(r'L10\.1\s-', line) # Edit this IP range
#this counts all connections from the library (marked L10.1)
if libip:
libraryconnections = libraryconnections + 1
sessionid = re.search(r'.* - ([0-9A-Z].*?)\s', line)
#session IDs are ONLY assigned to off-campus connections!
#there may be multiple connections to multiple databases per session.
#this counts all sessions.
#note that on-campus sessions aren't tagged as student/faculty
if sessionid:
session = re.search(r'- .*', sessionid.group())
session = session.group()[2:]
if session not in sessions:
sessions.append(session)
stud = re.search( r'(Default\+OPAC\+Student)', line)
fac = re.search(r'(Default\+OPAC\+Staff)', line)
if stud:
studcount = studcount + 1 #counts all off-campus student sessions
if fac:
faccount = faccount + 1 #counts all off-campus faculty/staff sessions
else:
pass
else:
pass
total = offcampus + oncampus #all connections
totalcountoffcamp = studcount + faccount #all offcampus sessions
if totalcountoffcamp is not 0:
studfrac = (float(studcount)/totalcountoffcamp) * 100 #students/total offcampus sessions
facfrac = (float(faccount)/totalcountoffcamp) * 100 #faculty-staff/total offcampus sessions
else:
studfrac = 'n/a'
facfrac = 'n/a'
if oncampus is not 0:
libfraccamp = (float(libraryconnections)/oncampus) * 100 # library/total oncampus connections
else:
libfraccamp = 'n/a'
libfrac = (float(libraryconnections)/total) * 100 #library/total connections
offcampfrac = (float(offcampus)/total) * 100 #oncampus/total connections
oncampfrac = (float(oncampus)/total) * 100 #offcampus/total connections
outfile.write('\n')
outfile.write(str(filename) + ',' + str(total) + ',' + str(oncampus) \
+ ',' + str(oncampfrac) + ',' + str(offcampus) + ',' + \
str(offcampfrac) + ',' + str(libraryconnections) + ',' + \
str(libfraccamp) + ',' + str(libfrac) + ',' + \
str(studcount) + ',' + str(studfrac) + ',' + \
str(faccount) + ',' + str(facfrac))
outfile.close
print('\nAll done!\n\nOutput:', output)
main()