This repository has been archived by the owner on Mar 13, 2018. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSocialMediaScraper.rb
171 lines (155 loc) · 5.17 KB
/
SocialMediaScraper.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
require 'rubygems'
require 'net/http'
require 'uri'
require 'date'
require 'json'
require 'oauth'
require 'csv'
#require './secrets.rb' if File.exists?('secrets.rb')
require './env.rb' if File.exists?('env.rb')
class Scraper
def initialize(*args)
if args.length==0 then
@organizations = JSON.parse('[{
"organization": "NYCLU",
"facebook_screen_name": "NewYorkCivilLibertiesUnion",
"twitter_screen_name": "NYCLU"
},{
"organization": "Center for Constitutional Rights",
"facebook_screen_name": "CenterforConstitutionalRights",
"twitter_screen_name": "theCCR"
},{
"organization": "The Brennan Center",
"facebook_screen_name": "BrennanCenter",
"twitter_screen_name": "BrennanCenter"
},{
"organization": "NAACP_LDF",
"facebook_screen_name": "naacpldf",
"twitter_screen_name": "NAACP_LDF"
},{
"organization": "ACLU",
"facebook_screen_name": "aclu.nationwide",
"twitter_screen_name": "ACLU"
},{
"organization": "ACLU_SoCal",
"facebook_screen_name": "ACLU.SoCal",
"twitter_screen_name": "ACLU_SoCal"
},{
"organization": "ACLU_WA",
"facebook_screen_name": "acluwa",
"twitter_screen_name": "ACLU_WA"
},{
"organization": "ACLUUtah",
"facebook_screen_name": "aclu.utah",
"twitter_screen_name": "acluutah"
},{
"organization": "National Action",
"facebook_screen_name": "nationalactionnetwork",
"twitter_screen_name": "nationalaction"
},{
"organization": "SPL Center",
"facebook_screen_name": "SPLCenter",
"twitter_screen_name": "splcenter"
},{
"organization": "Lambda Legal",
"facebook_screen_name": "lambdalegal",
"twitter_screen_name": "LambdaLegal"
},{
"organization": "Latino Justice",
"facebook_screen_name": "latinojustice",
"twitter_screen_name": "latinojustice"
},{
"organization": "CivilRights.org",
"facebook_screen_name": "civilandhumanrights",
"twitter_screen_name": "civilrightsorg"
}
]')
else
@organizations = JSON.parse(args[0])
end
end# of initialize
#for twitter:
# Exchange your oauth_token and oauth_token_secret for an AccessToken instance.
def prepare_access_token(oauth_token, oauth_token_secret)
consumer = OAuth::Consumer.new(ENV['TWITTER_API_KEY'],ENV['TWITTER_API_SECRET'],
{ :site => "https://api.twitter.com",
:scheme=> :header
})
# now create the access token object from passed values
token_hash = { :oauth_token => oauth_token,
:oauth_token_secret => oauth_token_secret}
access_token = OAuth::AccessToken.from_hash(consumer, token_hash)
puts access_token
access_token
end# of prepare_access_token
# def organizations
# JSON.parse(File.open(ARGV[0]).read)
#
# end# of #organizations
def facebook_scrape (facebook_screen_name)
puts "Facebook scrape!"
uri = URI.parse("http://graph.facebook.com/#{facebook_screen_name}")
response = JSON.parse(Net::HTTP.get(uri))
{facebook_checkins: response["checkins"],
facebook_likes: response["likes"],
facebook_talking_about_count: response["talking_about_count"],
facebook_were_here_count: response["were_here_count"]}
end# of facebook_scrape
def twitter_scrape (twitter_screen_name)
puts "Scraping twitter!"
access_token = prepare_access_token(ENV['TWITTER_ACCESS_TOKEN'],ENV['TWITTER_ACCESS_TOKEN_SECRET'])
response = access_token.request(:get, "https://api.twitter.com/1.1/users/show.json?screen_name=#{twitter_screen_name}")
response = JSON.parse(response.body)
{twitter_followers_count: response["followers_count"],
twitter_friends_count: response["friends_count"],
twitter_listed_count: response["listed_count"],
twitter_favourites_count: response["favourites_count"],
twitter_statuses_count: response["statuses_count"] }
end# of #twitter_scrape
def print (results)
headers = results[0].keys
CSV.open("../scrapes/SMS-#{date.month}-#{date.day}-#{date.year}.csv","w") do |csv|
csv << headers
results.each do |hash|
csv << CSV::Row.new(hash.keys, hash.values)
end
end
end# of print
def self.as_csv(scrapes)
result = ""
headers = scrapes[0].keys
headers.each_with_index do |key, i|
if i<(headers.count-1) then
result += "#{key}, "
else
result += "#{key} \n"
end
end
scrapes.each do |row|
row.values.each_with_index do |value, i|
if i<(row.values.count-1) then
result += "#{value}, "
else
result += "#{value} \n"
end
end
end
result
end# of as_csv
def scrape
date = Date.today
#puts organizations
results = @organizations.map do |organization|
output = {date: "#{date.month}-#{date.day}-#{date.year}",
organization: organization["organization"]}
output.merge! facebook_scrape(organization["facebook_screen_name"])
output.merge! twitter_scrape(organization["twitter_screen_name"])
end
puts results
puts "and now to csv"
#print(results)
results
end# of #scrape
end# of class
# scraper = Scraper.new
# scraper.scrape