In [3]:
# Importing the relevant libraries.
import tweepy
import json
import pandas as pd
from scipy.misc import imread
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib as mpl
import csv
import matplotlib.pyplot as plt

import operator
from textblob import TextBlob
from textblob import Word
from textblob.sentiments import NaiveBayesAnalyzer
import imageio
In [37]:
#Authentication

consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''

auth = tweepy.OAuthHandler(consumer_key, consumer_secret) #Interacting with twitter's API
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API (auth) #creating the API object

# Search word/hashtag value 
HashValue = "ODSCEurope"

# search start date value. the search will start from this date to the current date.
StartDate = "2019-06-25"

#Extracting Tweets
results = []

for tweet in tweepy.Cursor(api.search,q=HashValue,lang="en",since=StartDate).items(2000):
    results.append(tweet)
    
print (type(results))
print (len(results))
<class 'list'>
85
In [38]:
#Store tweets data in a dataframe

def tweets_df(results):
    id_list = [tweet.id for tweet  in results]
    data_set = pd.DataFrame(id_list, columns = ["id"])
    
    data_set["text"] = [tweet.text for tweet in results]
    data_set["created_at"] = [tweet.created_at for tweet in results]
    data_set["retweet_count"] = [tweet.retweet_count for tweet in results]
    data_set["user_screen_name"] = [tweet.author.screen_name for tweet in results]
    data_set["user_followers_count"] = [tweet.author.followers_count for tweet in results]
    data_set["user_location"] = [tweet.author.location for tweet in results]
    data_set["Hashtags"] = [tweet.entities.get('hashtags') for tweet in results]
    
    return data_set
data_set = tweets_df(results)
In [39]:
# Remove tweets with duplicate text

text = data_set["text"]

for i in range(0,len(text)):
    txt = ' '.join(word for word in text[i] .split() if not word.startswith('https:'))
    data_set.set_value(i, 'text2', txt)
    
data_set.drop_duplicates('text2', inplace=True)
data_set.reset_index(drop = True, inplace=True)
data_set.drop('text', axis = 1, inplace = True)
data_set.rename(columns={'text2': 'text'}, inplace=True)
C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:7: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead
In [40]:
data_set.head()
Out[40]:
id created_at retweet_count user_screen_name user_followers_count user_location Hashtags text
0 1159279337046233088 2019-08-08 01:45:06 11 DamiSammarro 1929 Buenos Aires - Argentina [{'text': 'AI', 'indices': [81, 84]}, {'text':... RT @KirkDBorne: The trend is clear. With some ...
1 1159207366170152960 2019-08-07 20:59:07 0 stanford__ai 441 Cambridge, MA [{'text': 'ODSCEurope', 'indices': [7, 18]}, {... Attend #ODSCEurope, the largest applied data s...
2 1159193190865522690 2019-08-07 20:02:48 1 JustBeMentalist 1216 Shropshire [{'text': 'ODSCEurope', 'indices': [17, 28]}, ... RT @odsc: Attend #ODSCEurope, the largest appl...
3 1159182135107969025 2019-08-07 19:18:52 3 CorrieG75686244 112 Zuid-Holland, Nederland [{'text': 'ML', 'indices': [38, 41]}, {'text':... RT @odsc: Stay at the cutting-edge of #ML and ...
4 1159147098123771905 2019-08-07 16:59:38 0 stanford__ai 441 Cambridge, MA [{'text': 'datascience', 'indices': [41, 53]},... Gain insights on how the intersection of #data...
In [41]:
## Sentiment Analysis
text = data_set["text"]

for i in range(0,len(text)):
    textB = TextBlob(text[i])
    sentiment = textB.sentiment.polarity
    data_set.set_value(i, 'Sentiment',sentiment)
    if sentiment <0.00:
        SentimentClass = 'Negative'
        data_set.set_value(i, 'SentimentClass', SentimentClass )
    elif sentiment >0.00:
        SentimentClass = 'Positive'
        data_set.set_value(i, 'SentimentClass', SentimentClass )
    else:
        SentimentClass = 'Neutral'
        data_set.set_value(i, 'SentimentClass', SentimentClass )
C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:7: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead
C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:13: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead
C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:16: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead
In [42]:
data_set.head()
Out[42]:
id created_at retweet_count user_screen_name user_followers_count user_location Hashtags text Sentiment SentimentClass
0 1159279337046233088 2019-08-08 01:45:06 11 DamiSammarro 1929 Buenos Aires - Argentina [{'text': 'AI', 'indices': [81, 84]}, {'text':... RT @KirkDBorne: The trend is clear. With some ... 0.550000 Positive
1 1159207366170152960 2019-08-07 20:59:07 0 stanford__ai 441 Cambridge, MA [{'text': 'ODSCEurope', 'indices': [7, 18]}, {... Attend #ODSCEurope, the largest applied data s... 0.500000 Positive
2 1159193190865522690 2019-08-07 20:02:48 1 JustBeMentalist 1216 Shropshire [{'text': 'ODSCEurope', 'indices': [17, 28]}, ... RT @odsc: Attend #ODSCEurope, the largest appl... 0.500000 Positive
3 1159182135107969025 2019-08-07 19:18:52 3 CorrieG75686244 112 Zuid-Holland, Nederland [{'text': 'ML', 'indices': [38, 41]}, {'text':... RT @odsc: Stay at the cutting-edge of #ML and ... 0.318182 Positive
4 1159147098123771905 2019-08-07 16:59:38 0 stanford__ai 441 Cambridge, MA [{'text': 'datascience', 'indices': [41, 53]},... Gain insights on how the intersection of #data... 0.000000 Neutral
In [59]:
# Create dataframe containing polarity values and text
new_data_df=pd.DataFrame(data_set,columns=['Sentiment','text'])
new_data_df=new_data_df[new_data_df.Sentiment !=0]
new_data_df.head()
Out[59]:
Sentiment text
0 0.550000 RT @KirkDBorne: The trend is clear. With some ...
1 0.500000 Attend #ODSCEurope, the largest applied data s...
2 0.500000 RT @odsc: Attend #ODSCEurope, the largest appl...
3 0.318182 RT @odsc: Stay at the cutting-edge of #ML and ...
8 0.550000 RT KirkDBorne: The trend is clear. With some o...
In [60]:
fig, ax = plt.subplots(figsize=(8, 6))

new_data_df.hist(bins=[-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1],
        ax=ax, color="purple")

plt.title("Sentiments from Tweets on the ODSCEurope")
plt.show()
In [44]:
data_set.to_csv("C:\\Users\\iam00\\Desktop\\Bigpodium\\trialeurope.csv")
In [45]:
Htag_df = pd.DataFrame()
j = 0

for tweet in range(0,len(results)):
    hashtag = results[tweet].entities.get('hashtags')
    for i in range(0,len(hashtag)):
        Htag = hashtag[i]['text'] 
        Htag_df.set_value(j, 'Hashtag',Htag)
        j = j+1
C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:8: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead
In [47]:
trialeurope_Htag_wordcloud = Htag_df.groupby('Hashtag').size()
trialeurope_Htag_wordcloud.to_csv("C:\\Users\\iam00\\Desktop\\Bigpodium\\trialeurope_Htag_wordcloud.csv")
C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:2: FutureWarning: The signature of `Series.to_csv` was aligned to that of `DataFrame.to_csv`, and argument 'header' will change its default value from False to True: please pass an explicit value to suppress this warning.
  from ipykernel import kernelapp as app
In [48]:
# Join all the text from the 1000 tweets
Hashtag_Combined = " ".join(Htag_df['Hashtag'].values.astype(str))
no_europe = " ".join([word for word in Hashtag_Combined.split()
                               
                                
                 ])
                                                                
Tweet_mask = imageio.imread("C:\\Users\\iam00\\Desktop\\Bigpodium\\twitter_mask.png")
In [50]:
#Create a Word Cloud
wc = WordCloud(background_color="white", stopwords=STOPWORDS, mask = Tweet_mask)
wc.generate(no_europe)
plt.imshow(wc)
plt.axis("off")
plt.savefig('C:\\Users\\iam00\\Desktop\\Bigpodium\\europe_Hashtag.png', dpi=300)
plt.show()