-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathanalyzer.go
62 lines (54 loc) · 1.4 KB
/
analyzer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
package main
import (
"fmt"
"strings"
"github.com/dghubble/go-twitter/twitter"
)
func matchAndGetQuery(t *TextData, trends []twitter.Trend) (*trendingTopicMatch, error) {
textTokens := getTextTokens(t.Text)
for _, trend := range trends {
topicTokens := getTopicTokens(trend.Name)
if _, ok := match(textTokens, topicTokens); ok {
m := &trendingTopicMatch{
query: trend.Query,
name: trend.Name,
volume: trend.TweetVolume,
}
return m, nil
}
}
return nil, fmt.Errorf(fmt.Sprintf("did not find a match for %v", t.Text))
}
func getTopicTokens(str string) (tokens map[string]bool) {
tokens = make(map[string]bool)
var ss string
if str[0] == '#' {
ss = str[1:len(str)]
}
ss = matchFirstCap.ReplaceAllString(str, "${1}_${2}")
ss = matchAllCap.ReplaceAllString(ss, "${1}_${2}")
ss = strings.ToLower(ss)
for _, token := range strings.Split(ss, "_") {
token = strings.TrimSpace(token)
tokens[token] = true
}
return
}
func getTextTokens(str string) (tokens map[string]bool) {
tokens = make(map[string]bool)
for _, token := range strings.Split(str, " ") { // TODO: use proper tokenization
token = strings.TrimSpace(token)
if len(token) > 2 {
tokens[token] = true // instead of splitting on whitespace
}
}
return
}
func match(one, other map[string]bool) (string, bool) {
for element := range one {
if other[element] {
return element, true
}
}
return "", false
}