How to store only the text of tweet using Tweepy
up vote
1
down vote
favorite
I'm watching this series https://www.youtube.com/watch?v=wlnx-7cm4Gg&list=PL5tcWHG-UPH2zBfOz40HSzcGUPAVOOnu1 which is about mining tweets with tweepy (python) and the guy stores the tweets with everything ( such as created_at, id, id_str, text) and then he uses Dataframes in pandas to store only the text. Is this way efficient ? How Can I only store the "text" in the Json file instead of all other details ?
The code:
ACCESS_TOKEN = "xxxxxxxxxxxxxxxxxxxxx"
ACCESS_TOKEN_SECRET = "xxxxxxxxxxxxxxxxxxxxxxxxx"
CONSUMER_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
CONSUMER_SECRET = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
import tweepy
import numpy as np
import pandas as pd
# import twitter_credentials
class TwitterAuthenticator():
def authenticate_twitter_app(self):
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
class TwitterStreamer():
"""
Class for streaming and processing live tweets.
"""
def __init__(self):
self.twitter_authenticator = TwitterAuthenticator()
def stream_tweets(self, fetched_tweets_filename, hash_tag):
# This handles Twitter authetification and the connection to Twitter Streaming API
listener = TwitterListener(fetched_tweets_filename)
auth = self.twitter_authenticator.authenticate_twitter_app()
# api = tweepy.API(auth)
stream = tweepy.Stream(auth,listener)
stream.filter(track = hash_tag)
class TwitterListener(tweepy.StreamListener):
"""
This is a basic listener class that just prints received tweets to stdout.
"""
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
def on_status(self, status):
print(status)
def on_error(self, status):
if status == 420:
# Returning False on_data method in case rate limit occurs.
return False
print(status)
# public_tweets = api.home_timeline()
# for tweet in public_tweets:
# print tweet.text
if __name__ == '__main__':
hash_tag = ["python"]
fetched_tweets_filename = "tweets.json"
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(fetched_tweets_filename,hash_tag)
# print stream.text
The tweet stored in the json file:
"created_at":"Sun Nov 04 18:43:59 +0000 2018","id":1059154305498972160,"id_str":"1059154305498972160","text":"RT @hmason: When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourself to learn hu2026","source":"u003ca href="http://twitter.com/download/android" rel="nofollow"u003eTwitter for Androidu003c/au003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":"id":14858491,"id_str":"14858491","name":"Alexandra Lemus","screen_name":"nankyoku","location":"Mu00e9xico","url":null,"description":"Transitioning into the Permanent Beta state...","translator_type":"none","protected":false,"verified":false,"followers_count":173,"friends_count":585,"listed_count":18,"favourites_count":658,"statuses_count":572,"created_at":"Wed May 21 16:35:49 +0000 2008","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"EDECE9","profile_background_image_url":"http://abs.twimg.com/images/themes/theme3/bg.gif","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme3/bg.gif","profile_background_tile":false,"profile_link_color":"088253","profile_sidebar_border_color":"D3D2CF","profile_sidebar_fill_color":"E3E2DE","profile_text_color":"634047","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/378800000575875952/f00390453684dd243d7ca95c69a05f74_normal.jpeg","profile_image_url_https":"https://pbs.twimg.com/profile_images/378800000575875952/f00390453684dd243d7ca95c69a05f74_normal.jpeg","profile_banner_url":"https://pbs.twimg.com/profile_banners/14858491/1381524599","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":"created_at":"Sat Nov 03 17:36:24 +0000 2018","id":1058774912201035776,"id_str":"1058774912201035776","text":"When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourselu2026 https://t.co/9F7SmlGfyf","source":"u003ca href="http://twitter.com" rel="nofollow"u003eTwitter Web Clientu003c/au003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":"id":765548,"id_str":"765548","name":"Hilary Mason","screen_name":"hmason","location":"NYC","url":"http://www.hilarymason.com","description":"GM for Machine Learning at @Cloudera. Founder at @FastForwardLabs. Data Scientist in Residence at @accel. I u2665 data and cheeseburgers.","translator_type":"none","protected":false,"verified":true,"followers_count":111311,"friends_count":1539,"listed_count":5276,"favourites_count":12049,"statuses_count":17602,"created_at":"Sun Feb 11 21:22:24 +0000 2007","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http://abs.twimg.com/images/themes/theme1/bg.png","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme1/bg.png","profile_background_tile":false,"profile_link_color":"282F8A","profile_sidebar_border_color":"87BC44","profile_sidebar_fill_color":"AB892B","profile_text_color":"000000","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/948689418709323777/sTBM3vG0_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/948689418709323777/sTBM3vG0_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/765548/1353033581","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":"full_text":"When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourself to learn how it works, and then use a library to benefit from robust code.nnHere's one article showing this with neural networks in Python: https://t.co/3ehO86NFKI","display_text_range":[0,280],"entities":"hashtags":,"urls":["url":"https://t.co/3ehO86NFKI","expanded_url":"https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6","display_url":"towardsdatascience.com/how-to-build-yu2026","indices":[257,280]],"user_mentions":,"symbols":,"quote_count":14,"reply_count":8,"retweet_count":290,"favorite_count":1019,"entities":"hashtags":,"urls":["url":"https://t.co/9F7SmlGfyf","expanded_url":"https://twitter.com/i/web/status/1058774912201035776","display_url":"twitter.com/i/web/status/1u2026","indices":[117,140]],"user_mentions":,"symbols":,"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":"hashtags":,"urls":,"user_mentions":["screen_name":"hmason","name":"Hilary Mason","id":765548,"id_str":"765548","indices":[3,10]],"symbols":,"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1541357039223"
If the question is not clear then please comment it out and I will try to edit the question.
python tweepy
add a comment |
up vote
1
down vote
favorite
I'm watching this series https://www.youtube.com/watch?v=wlnx-7cm4Gg&list=PL5tcWHG-UPH2zBfOz40HSzcGUPAVOOnu1 which is about mining tweets with tweepy (python) and the guy stores the tweets with everything ( such as created_at, id, id_str, text) and then he uses Dataframes in pandas to store only the text. Is this way efficient ? How Can I only store the "text" in the Json file instead of all other details ?
The code:
ACCESS_TOKEN = "xxxxxxxxxxxxxxxxxxxxx"
ACCESS_TOKEN_SECRET = "xxxxxxxxxxxxxxxxxxxxxxxxx"
CONSUMER_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
CONSUMER_SECRET = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
import tweepy
import numpy as np
import pandas as pd
# import twitter_credentials
class TwitterAuthenticator():
def authenticate_twitter_app(self):
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
class TwitterStreamer():
"""
Class for streaming and processing live tweets.
"""
def __init__(self):
self.twitter_authenticator = TwitterAuthenticator()
def stream_tweets(self, fetched_tweets_filename, hash_tag):
# This handles Twitter authetification and the connection to Twitter Streaming API
listener = TwitterListener(fetched_tweets_filename)
auth = self.twitter_authenticator.authenticate_twitter_app()
# api = tweepy.API(auth)
stream = tweepy.Stream(auth,listener)
stream.filter(track = hash_tag)
class TwitterListener(tweepy.StreamListener):
"""
This is a basic listener class that just prints received tweets to stdout.
"""
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
def on_status(self, status):
print(status)
def on_error(self, status):
if status == 420:
# Returning False on_data method in case rate limit occurs.
return False
print(status)
# public_tweets = api.home_timeline()
# for tweet in public_tweets:
# print tweet.text
if __name__ == '__main__':
hash_tag = ["python"]
fetched_tweets_filename = "tweets.json"
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(fetched_tweets_filename,hash_tag)
# print stream.text
The tweet stored in the json file:
"created_at":"Sun Nov 04 18:43:59 +0000 2018","id":1059154305498972160,"id_str":"1059154305498972160","text":"RT @hmason: When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourself to learn hu2026","source":"u003ca href="http://twitter.com/download/android" rel="nofollow"u003eTwitter for Androidu003c/au003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":"id":14858491,"id_str":"14858491","name":"Alexandra Lemus","screen_name":"nankyoku","location":"Mu00e9xico","url":null,"description":"Transitioning into the Permanent Beta state...","translator_type":"none","protected":false,"verified":false,"followers_count":173,"friends_count":585,"listed_count":18,"favourites_count":658,"statuses_count":572,"created_at":"Wed May 21 16:35:49 +0000 2008","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"EDECE9","profile_background_image_url":"http://abs.twimg.com/images/themes/theme3/bg.gif","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme3/bg.gif","profile_background_tile":false,"profile_link_color":"088253","profile_sidebar_border_color":"D3D2CF","profile_sidebar_fill_color":"E3E2DE","profile_text_color":"634047","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/378800000575875952/f00390453684dd243d7ca95c69a05f74_normal.jpeg","profile_image_url_https":"https://pbs.twimg.com/profile_images/378800000575875952/f00390453684dd243d7ca95c69a05f74_normal.jpeg","profile_banner_url":"https://pbs.twimg.com/profile_banners/14858491/1381524599","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":"created_at":"Sat Nov 03 17:36:24 +0000 2018","id":1058774912201035776,"id_str":"1058774912201035776","text":"When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourselu2026 https://t.co/9F7SmlGfyf","source":"u003ca href="http://twitter.com" rel="nofollow"u003eTwitter Web Clientu003c/au003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":"id":765548,"id_str":"765548","name":"Hilary Mason","screen_name":"hmason","location":"NYC","url":"http://www.hilarymason.com","description":"GM for Machine Learning at @Cloudera. Founder at @FastForwardLabs. Data Scientist in Residence at @accel. I u2665 data and cheeseburgers.","translator_type":"none","protected":false,"verified":true,"followers_count":111311,"friends_count":1539,"listed_count":5276,"favourites_count":12049,"statuses_count":17602,"created_at":"Sun Feb 11 21:22:24 +0000 2007","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http://abs.twimg.com/images/themes/theme1/bg.png","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme1/bg.png","profile_background_tile":false,"profile_link_color":"282F8A","profile_sidebar_border_color":"87BC44","profile_sidebar_fill_color":"AB892B","profile_text_color":"000000","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/948689418709323777/sTBM3vG0_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/948689418709323777/sTBM3vG0_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/765548/1353033581","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":"full_text":"When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourself to learn how it works, and then use a library to benefit from robust code.nnHere's one article showing this with neural networks in Python: https://t.co/3ehO86NFKI","display_text_range":[0,280],"entities":"hashtags":,"urls":["url":"https://t.co/3ehO86NFKI","expanded_url":"https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6","display_url":"towardsdatascience.com/how-to-build-yu2026","indices":[257,280]],"user_mentions":,"symbols":,"quote_count":14,"reply_count":8,"retweet_count":290,"favorite_count":1019,"entities":"hashtags":,"urls":["url":"https://t.co/9F7SmlGfyf","expanded_url":"https://twitter.com/i/web/status/1058774912201035776","display_url":"twitter.com/i/web/status/1u2026","indices":[117,140]],"user_mentions":,"symbols":,"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":"hashtags":,"urls":,"user_mentions":["screen_name":"hmason","name":"Hilary Mason","id":765548,"id_str":"765548","indices":[3,10]],"symbols":,"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1541357039223"
If the question is not clear then please comment it out and I will try to edit the question.
python tweepy
I tried to usedata["text"]but I get this error messageerror on_data string indices must be integers
– sam jack
Nov 10 at 17:27
I also triedtf.write(data.text)and I got this error messageError on_data 'unicode' object has no attribute 'text'
– sam jack
Nov 10 at 17:35
add a comment |
up vote
1
down vote
favorite
up vote
1
down vote
favorite
I'm watching this series https://www.youtube.com/watch?v=wlnx-7cm4Gg&list=PL5tcWHG-UPH2zBfOz40HSzcGUPAVOOnu1 which is about mining tweets with tweepy (python) and the guy stores the tweets with everything ( such as created_at, id, id_str, text) and then he uses Dataframes in pandas to store only the text. Is this way efficient ? How Can I only store the "text" in the Json file instead of all other details ?
The code:
ACCESS_TOKEN = "xxxxxxxxxxxxxxxxxxxxx"
ACCESS_TOKEN_SECRET = "xxxxxxxxxxxxxxxxxxxxxxxxx"
CONSUMER_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
CONSUMER_SECRET = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
import tweepy
import numpy as np
import pandas as pd
# import twitter_credentials
class TwitterAuthenticator():
def authenticate_twitter_app(self):
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
class TwitterStreamer():
"""
Class for streaming and processing live tweets.
"""
def __init__(self):
self.twitter_authenticator = TwitterAuthenticator()
def stream_tweets(self, fetched_tweets_filename, hash_tag):
# This handles Twitter authetification and the connection to Twitter Streaming API
listener = TwitterListener(fetched_tweets_filename)
auth = self.twitter_authenticator.authenticate_twitter_app()
# api = tweepy.API(auth)
stream = tweepy.Stream(auth,listener)
stream.filter(track = hash_tag)
class TwitterListener(tweepy.StreamListener):
"""
This is a basic listener class that just prints received tweets to stdout.
"""
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
def on_status(self, status):
print(status)
def on_error(self, status):
if status == 420:
# Returning False on_data method in case rate limit occurs.
return False
print(status)
# public_tweets = api.home_timeline()
# for tweet in public_tweets:
# print tweet.text
if __name__ == '__main__':
hash_tag = ["python"]
fetched_tweets_filename = "tweets.json"
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(fetched_tweets_filename,hash_tag)
# print stream.text
The tweet stored in the json file:
"created_at":"Sun Nov 04 18:43:59 +0000 2018","id":1059154305498972160,"id_str":"1059154305498972160","text":"RT @hmason: When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourself to learn hu2026","source":"u003ca href="http://twitter.com/download/android" rel="nofollow"u003eTwitter for Androidu003c/au003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":"id":14858491,"id_str":"14858491","name":"Alexandra Lemus","screen_name":"nankyoku","location":"Mu00e9xico","url":null,"description":"Transitioning into the Permanent Beta state...","translator_type":"none","protected":false,"verified":false,"followers_count":173,"friends_count":585,"listed_count":18,"favourites_count":658,"statuses_count":572,"created_at":"Wed May 21 16:35:49 +0000 2008","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"EDECE9","profile_background_image_url":"http://abs.twimg.com/images/themes/theme3/bg.gif","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme3/bg.gif","profile_background_tile":false,"profile_link_color":"088253","profile_sidebar_border_color":"D3D2CF","profile_sidebar_fill_color":"E3E2DE","profile_text_color":"634047","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/378800000575875952/f00390453684dd243d7ca95c69a05f74_normal.jpeg","profile_image_url_https":"https://pbs.twimg.com/profile_images/378800000575875952/f00390453684dd243d7ca95c69a05f74_normal.jpeg","profile_banner_url":"https://pbs.twimg.com/profile_banners/14858491/1381524599","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":"created_at":"Sat Nov 03 17:36:24 +0000 2018","id":1058774912201035776,"id_str":"1058774912201035776","text":"When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourselu2026 https://t.co/9F7SmlGfyf","source":"u003ca href="http://twitter.com" rel="nofollow"u003eTwitter Web Clientu003c/au003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":"id":765548,"id_str":"765548","name":"Hilary Mason","screen_name":"hmason","location":"NYC","url":"http://www.hilarymason.com","description":"GM for Machine Learning at @Cloudera. Founder at @FastForwardLabs. Data Scientist in Residence at @accel. I u2665 data and cheeseburgers.","translator_type":"none","protected":false,"verified":true,"followers_count":111311,"friends_count":1539,"listed_count":5276,"favourites_count":12049,"statuses_count":17602,"created_at":"Sun Feb 11 21:22:24 +0000 2007","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http://abs.twimg.com/images/themes/theme1/bg.png","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme1/bg.png","profile_background_tile":false,"profile_link_color":"282F8A","profile_sidebar_border_color":"87BC44","profile_sidebar_fill_color":"AB892B","profile_text_color":"000000","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/948689418709323777/sTBM3vG0_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/948689418709323777/sTBM3vG0_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/765548/1353033581","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":"full_text":"When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourself to learn how it works, and then use a library to benefit from robust code.nnHere's one article showing this with neural networks in Python: https://t.co/3ehO86NFKI","display_text_range":[0,280],"entities":"hashtags":,"urls":["url":"https://t.co/3ehO86NFKI","expanded_url":"https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6","display_url":"towardsdatascience.com/how-to-build-yu2026","indices":[257,280]],"user_mentions":,"symbols":,"quote_count":14,"reply_count":8,"retweet_count":290,"favorite_count":1019,"entities":"hashtags":,"urls":["url":"https://t.co/9F7SmlGfyf","expanded_url":"https://twitter.com/i/web/status/1058774912201035776","display_url":"twitter.com/i/web/status/1u2026","indices":[117,140]],"user_mentions":,"symbols":,"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":"hashtags":,"urls":,"user_mentions":["screen_name":"hmason","name":"Hilary Mason","id":765548,"id_str":"765548","indices":[3,10]],"symbols":,"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1541357039223"
If the question is not clear then please comment it out and I will try to edit the question.
python tweepy
I'm watching this series https://www.youtube.com/watch?v=wlnx-7cm4Gg&list=PL5tcWHG-UPH2zBfOz40HSzcGUPAVOOnu1 which is about mining tweets with tweepy (python) and the guy stores the tweets with everything ( such as created_at, id, id_str, text) and then he uses Dataframes in pandas to store only the text. Is this way efficient ? How Can I only store the "text" in the Json file instead of all other details ?
The code:
ACCESS_TOKEN = "xxxxxxxxxxxxxxxxxxxxx"
ACCESS_TOKEN_SECRET = "xxxxxxxxxxxxxxxxxxxxxxxxx"
CONSUMER_KEY = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
CONSUMER_SECRET = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"
import tweepy
import numpy as np
import pandas as pd
# import twitter_credentials
class TwitterAuthenticator():
def authenticate_twitter_app(self):
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
return auth
class TwitterStreamer():
"""
Class for streaming and processing live tweets.
"""
def __init__(self):
self.twitter_authenticator = TwitterAuthenticator()
def stream_tweets(self, fetched_tweets_filename, hash_tag):
# This handles Twitter authetification and the connection to Twitter Streaming API
listener = TwitterListener(fetched_tweets_filename)
auth = self.twitter_authenticator.authenticate_twitter_app()
# api = tweepy.API(auth)
stream = tweepy.Stream(auth,listener)
stream.filter(track = hash_tag)
class TwitterListener(tweepy.StreamListener):
"""
This is a basic listener class that just prints received tweets to stdout.
"""
def __init__(self, fetched_tweets_filename):
self.fetched_tweets_filename = fetched_tweets_filename
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
tf.write(data)
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
def on_status(self, status):
print(status)
def on_error(self, status):
if status == 420:
# Returning False on_data method in case rate limit occurs.
return False
print(status)
# public_tweets = api.home_timeline()
# for tweet in public_tweets:
# print tweet.text
if __name__ == '__main__':
hash_tag = ["python"]
fetched_tweets_filename = "tweets.json"
twitter_streamer = TwitterStreamer()
twitter_streamer.stream_tweets(fetched_tweets_filename,hash_tag)
# print stream.text
The tweet stored in the json file:
"created_at":"Sun Nov 04 18:43:59 +0000 2018","id":1059154305498972160,"id_str":"1059154305498972160","text":"RT @hmason: When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourself to learn hu2026","source":"u003ca href="http://twitter.com/download/android" rel="nofollow"u003eTwitter for Androidu003c/au003e","truncated":false,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":"id":14858491,"id_str":"14858491","name":"Alexandra Lemus","screen_name":"nankyoku","location":"Mu00e9xico","url":null,"description":"Transitioning into the Permanent Beta state...","translator_type":"none","protected":false,"verified":false,"followers_count":173,"friends_count":585,"listed_count":18,"favourites_count":658,"statuses_count":572,"created_at":"Wed May 21 16:35:49 +0000 2008","utc_offset":null,"time_zone":null,"geo_enabled":true,"lang":"es","contributors_enabled":false,"is_translator":false,"profile_background_color":"EDECE9","profile_background_image_url":"http://abs.twimg.com/images/themes/theme3/bg.gif","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme3/bg.gif","profile_background_tile":false,"profile_link_color":"088253","profile_sidebar_border_color":"D3D2CF","profile_sidebar_fill_color":"E3E2DE","profile_text_color":"634047","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/378800000575875952/f00390453684dd243d7ca95c69a05f74_normal.jpeg","profile_image_url_https":"https://pbs.twimg.com/profile_images/378800000575875952/f00390453684dd243d7ca95c69a05f74_normal.jpeg","profile_banner_url":"https://pbs.twimg.com/profile_banners/14858491/1381524599","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"retweeted_status":"created_at":"Sat Nov 03 17:36:24 +0000 2018","id":1058774912201035776,"id_str":"1058774912201035776","text":"When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourselu2026 https://t.co/9F7SmlGfyf","source":"u003ca href="http://twitter.com" rel="nofollow"u003eTwitter Web Clientu003c/au003e","truncated":true,"in_reply_to_status_id":null,"in_reply_to_status_id_str":null,"in_reply_to_user_id":null,"in_reply_to_user_id_str":null,"in_reply_to_screen_name":null,"user":"id":765548,"id_str":"765548","name":"Hilary Mason","screen_name":"hmason","location":"NYC","url":"http://www.hilarymason.com","description":"GM for Machine Learning at @Cloudera. Founder at @FastForwardLabs. Data Scientist in Residence at @accel. I u2665 data and cheeseburgers.","translator_type":"none","protected":false,"verified":true,"followers_count":111311,"friends_count":1539,"listed_count":5276,"favourites_count":12049,"statuses_count":17602,"created_at":"Sun Feb 11 21:22:24 +0000 2007","utc_offset":null,"time_zone":null,"geo_enabled":false,"lang":"en","contributors_enabled":false,"is_translator":false,"profile_background_color":"000000","profile_background_image_url":"http://abs.twimg.com/images/themes/theme1/bg.png","profile_background_image_url_https":"https://abs.twimg.com/images/themes/theme1/bg.png","profile_background_tile":false,"profile_link_color":"282F8A","profile_sidebar_border_color":"87BC44","profile_sidebar_fill_color":"AB892B","profile_text_color":"000000","profile_use_background_image":true,"profile_image_url":"http://pbs.twimg.com/profile_images/948689418709323777/sTBM3vG0_normal.jpg","profile_image_url_https":"https://pbs.twimg.com/profile_images/948689418709323777/sTBM3vG0_normal.jpg","profile_banner_url":"https://pbs.twimg.com/profile_banners/765548/1353033581","default_profile":false,"default_profile_image":false,"following":null,"follow_request_sent":null,"notifications":null,"geo":null,"coordinates":null,"place":null,"contributors":null,"is_quote_status":false,"extended_tweet":"full_text":"When you want to use a new algorithm that you don't deeply understand, the best approach is to implement it yourself to learn how it works, and then use a library to benefit from robust code.nnHere's one article showing this with neural networks in Python: https://t.co/3ehO86NFKI","display_text_range":[0,280],"entities":"hashtags":,"urls":["url":"https://t.co/3ehO86NFKI","expanded_url":"https://towardsdatascience.com/how-to-build-your-own-neural-network-from-scratch-in-python-68998a08e4f6","display_url":"towardsdatascience.com/how-to-build-yu2026","indices":[257,280]],"user_mentions":,"symbols":,"quote_count":14,"reply_count":8,"retweet_count":290,"favorite_count":1019,"entities":"hashtags":,"urls":["url":"https://t.co/9F7SmlGfyf","expanded_url":"https://twitter.com/i/web/status/1058774912201035776","display_url":"twitter.com/i/web/status/1u2026","indices":[117,140]],"user_mentions":,"symbols":,"favorited":false,"retweeted":false,"possibly_sensitive":false,"filter_level":"low","lang":"en","is_quote_status":false,"quote_count":0,"reply_count":0,"retweet_count":0,"favorite_count":0,"entities":"hashtags":,"urls":,"user_mentions":["screen_name":"hmason","name":"Hilary Mason","id":765548,"id_str":"765548","indices":[3,10]],"symbols":,"favorited":false,"retweeted":false,"filter_level":"low","lang":"en","timestamp_ms":"1541357039223"
If the question is not clear then please comment it out and I will try to edit the question.
python tweepy
python tweepy
edited Nov 10 at 17:31
asked Nov 10 at 17:23
sam jack
106
106
I tried to usedata["text"]but I get this error messageerror on_data string indices must be integers
– sam jack
Nov 10 at 17:27
I also triedtf.write(data.text)and I got this error messageError on_data 'unicode' object has no attribute 'text'
– sam jack
Nov 10 at 17:35
add a comment |
I tried to usedata["text"]but I get this error messageerror on_data string indices must be integers
– sam jack
Nov 10 at 17:27
I also triedtf.write(data.text)and I got this error messageError on_data 'unicode' object has no attribute 'text'
– sam jack
Nov 10 at 17:35
I tried to use
data["text"] but I get this error message error on_data string indices must be integers– sam jack
Nov 10 at 17:27
I tried to use
data["text"] but I get this error message error on_data string indices must be integers– sam jack
Nov 10 at 17:27
I also tried
tf.write(data.text) and I got this error message Error on_data 'unicode' object has no attribute 'text' – sam jack
Nov 10 at 17:35
I also tried
tf.write(data.text) and I got this error message Error on_data 'unicode' object has no attribute 'text' – sam jack
Nov 10 at 17:35
add a comment |
2 Answers
2
active
oldest
votes
up vote
1
down vote
accepted
If you want only the "text" field to be saved in the json file, you can tweak the definition of the TwitterListener.on_data method:
import json
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
json_load = json.loads(data)
text = 'text': json_load['text']
tf.write(json.dumps(text))
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
Fair warning, I don't have tweepy installed/set up, so I was only able to test a version of the above code using the json file you posted above. Let me know if you run into any bugs and I'll see what I can do.
I get this error messageError on_data global name 'json' is not definedwhen I run your version of on_data
– sam jack
Nov 10 at 17:54
You'll need to addimport jsonto the top of the file along with the rest of theimportstatements.
– tel
Nov 10 at 17:56
Thanks. I added it and that error message is gone. I'm getting this error message nowError on_data local variable 'texts' referenced before assignment
– sam jack
Nov 10 at 17:57
@samjack There were a couple of bugs that should be fixed now. Unfortunately, I don't have tweepy so I can't test out the whole thing. Let me know if there's any other trouble.
– tel
Nov 10 at 18:23
Thanks sooo much ! it works like a charm
– sam jack
Nov 10 at 18:28
add a comment |
up vote
1
down vote
It looks like what you're getting from the API and storing in your variable "data" is unicode text in a json format. You are just writing that text directly to a file. Using the API call you do, you're always going to get all of the data so it isn't that inefficient. If you just wanted to get/write the text of the tweet, try using a json load and then processing from there.
I see ... Thanks so much
– sam jack
Nov 10 at 17:43
add a comment |
2 Answers
2
active
oldest
votes
2 Answers
2
active
oldest
votes
active
oldest
votes
active
oldest
votes
up vote
1
down vote
accepted
If you want only the "text" field to be saved in the json file, you can tweak the definition of the TwitterListener.on_data method:
import json
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
json_load = json.loads(data)
text = 'text': json_load['text']
tf.write(json.dumps(text))
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
Fair warning, I don't have tweepy installed/set up, so I was only able to test a version of the above code using the json file you posted above. Let me know if you run into any bugs and I'll see what I can do.
I get this error messageError on_data global name 'json' is not definedwhen I run your version of on_data
– sam jack
Nov 10 at 17:54
You'll need to addimport jsonto the top of the file along with the rest of theimportstatements.
– tel
Nov 10 at 17:56
Thanks. I added it and that error message is gone. I'm getting this error message nowError on_data local variable 'texts' referenced before assignment
– sam jack
Nov 10 at 17:57
@samjack There were a couple of bugs that should be fixed now. Unfortunately, I don't have tweepy so I can't test out the whole thing. Let me know if there's any other trouble.
– tel
Nov 10 at 18:23
Thanks sooo much ! it works like a charm
– sam jack
Nov 10 at 18:28
add a comment |
up vote
1
down vote
accepted
If you want only the "text" field to be saved in the json file, you can tweak the definition of the TwitterListener.on_data method:
import json
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
json_load = json.loads(data)
text = 'text': json_load['text']
tf.write(json.dumps(text))
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
Fair warning, I don't have tweepy installed/set up, so I was only able to test a version of the above code using the json file you posted above. Let me know if you run into any bugs and I'll see what I can do.
I get this error messageError on_data global name 'json' is not definedwhen I run your version of on_data
– sam jack
Nov 10 at 17:54
You'll need to addimport jsonto the top of the file along with the rest of theimportstatements.
– tel
Nov 10 at 17:56
Thanks. I added it and that error message is gone. I'm getting this error message nowError on_data local variable 'texts' referenced before assignment
– sam jack
Nov 10 at 17:57
@samjack There were a couple of bugs that should be fixed now. Unfortunately, I don't have tweepy so I can't test out the whole thing. Let me know if there's any other trouble.
– tel
Nov 10 at 18:23
Thanks sooo much ! it works like a charm
– sam jack
Nov 10 at 18:28
add a comment |
up vote
1
down vote
accepted
up vote
1
down vote
accepted
If you want only the "text" field to be saved in the json file, you can tweak the definition of the TwitterListener.on_data method:
import json
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
json_load = json.loads(data)
text = 'text': json_load['text']
tf.write(json.dumps(text))
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
Fair warning, I don't have tweepy installed/set up, so I was only able to test a version of the above code using the json file you posted above. Let me know if you run into any bugs and I'll see what I can do.
If you want only the "text" field to be saved in the json file, you can tweak the definition of the TwitterListener.on_data method:
import json
def on_data(self, data):
try:
print(data)
with open(self.fetched_tweets_filename, 'a') as tf:
json_load = json.loads(data)
text = 'text': json_load['text']
tf.write(json.dumps(text))
return True
except BaseException as e:
print("Error on_data %s" % str(e))
return True
Fair warning, I don't have tweepy installed/set up, so I was only able to test a version of the above code using the json file you posted above. Let me know if you run into any bugs and I'll see what I can do.
edited Nov 10 at 18:21
answered Nov 10 at 17:44
tel
2,5441326
2,5441326
I get this error messageError on_data global name 'json' is not definedwhen I run your version of on_data
– sam jack
Nov 10 at 17:54
You'll need to addimport jsonto the top of the file along with the rest of theimportstatements.
– tel
Nov 10 at 17:56
Thanks. I added it and that error message is gone. I'm getting this error message nowError on_data local variable 'texts' referenced before assignment
– sam jack
Nov 10 at 17:57
@samjack There were a couple of bugs that should be fixed now. Unfortunately, I don't have tweepy so I can't test out the whole thing. Let me know if there's any other trouble.
– tel
Nov 10 at 18:23
Thanks sooo much ! it works like a charm
– sam jack
Nov 10 at 18:28
add a comment |
I get this error messageError on_data global name 'json' is not definedwhen I run your version of on_data
– sam jack
Nov 10 at 17:54
You'll need to addimport jsonto the top of the file along with the rest of theimportstatements.
– tel
Nov 10 at 17:56
Thanks. I added it and that error message is gone. I'm getting this error message nowError on_data local variable 'texts' referenced before assignment
– sam jack
Nov 10 at 17:57
@samjack There were a couple of bugs that should be fixed now. Unfortunately, I don't have tweepy so I can't test out the whole thing. Let me know if there's any other trouble.
– tel
Nov 10 at 18:23
Thanks sooo much ! it works like a charm
– sam jack
Nov 10 at 18:28
I get this error message
Error on_data global name 'json' is not defined when I run your version of on_data– sam jack
Nov 10 at 17:54
I get this error message
Error on_data global name 'json' is not defined when I run your version of on_data– sam jack
Nov 10 at 17:54
You'll need to add
import json to the top of the file along with the rest of the import statements.– tel
Nov 10 at 17:56
You'll need to add
import json to the top of the file along with the rest of the import statements.– tel
Nov 10 at 17:56
Thanks. I added it and that error message is gone. I'm getting this error message now
Error on_data local variable 'texts' referenced before assignment– sam jack
Nov 10 at 17:57
Thanks. I added it and that error message is gone. I'm getting this error message now
Error on_data local variable 'texts' referenced before assignment– sam jack
Nov 10 at 17:57
@samjack There were a couple of bugs that should be fixed now. Unfortunately, I don't have tweepy so I can't test out the whole thing. Let me know if there's any other trouble.
– tel
Nov 10 at 18:23
@samjack There were a couple of bugs that should be fixed now. Unfortunately, I don't have tweepy so I can't test out the whole thing. Let me know if there's any other trouble.
– tel
Nov 10 at 18:23
Thanks sooo much ! it works like a charm
– sam jack
Nov 10 at 18:28
Thanks sooo much ! it works like a charm
– sam jack
Nov 10 at 18:28
add a comment |
up vote
1
down vote
It looks like what you're getting from the API and storing in your variable "data" is unicode text in a json format. You are just writing that text directly to a file. Using the API call you do, you're always going to get all of the data so it isn't that inefficient. If you just wanted to get/write the text of the tweet, try using a json load and then processing from there.
I see ... Thanks so much
– sam jack
Nov 10 at 17:43
add a comment |
up vote
1
down vote
It looks like what you're getting from the API and storing in your variable "data" is unicode text in a json format. You are just writing that text directly to a file. Using the API call you do, you're always going to get all of the data so it isn't that inefficient. If you just wanted to get/write the text of the tweet, try using a json load and then processing from there.
I see ... Thanks so much
– sam jack
Nov 10 at 17:43
add a comment |
up vote
1
down vote
up vote
1
down vote
It looks like what you're getting from the API and storing in your variable "data" is unicode text in a json format. You are just writing that text directly to a file. Using the API call you do, you're always going to get all of the data so it isn't that inefficient. If you just wanted to get/write the text of the tweet, try using a json load and then processing from there.
It looks like what you're getting from the API and storing in your variable "data" is unicode text in a json format. You are just writing that text directly to a file. Using the API call you do, you're always going to get all of the data so it isn't that inefficient. If you just wanted to get/write the text of the tweet, try using a json load and then processing from there.
answered Nov 10 at 17:39
mini totent
320211
320211
I see ... Thanks so much
– sam jack
Nov 10 at 17:43
add a comment |
I see ... Thanks so much
– sam jack
Nov 10 at 17:43
I see ... Thanks so much
– sam jack
Nov 10 at 17:43
I see ... Thanks so much
– sam jack
Nov 10 at 17:43
add a comment |
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
StackExchange.ready(
function ()
StackExchange.openid.initPostLogin('.new-post-login', 'https%3a%2f%2fstackoverflow.com%2fquestions%2f53241521%2fhow-to-store-only-the-text-of-tweet-using-tweepy%23new-answer', 'question_page');
);
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Sign up or log in
StackExchange.ready(function ()
StackExchange.helpers.onClickDraftSave('#login-link');
);
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Sign up using Google
Sign up using Facebook
Sign up using Email and Password
Post as a guest
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
Required, but never shown
I tried to use
data["text"]but I get this error messageerror on_data string indices must be integers– sam jack
Nov 10 at 17:27
I also tried
tf.write(data.text)and I got this error messageError on_data 'unicode' object has no attribute 'text'– sam jack
Nov 10 at 17:35