# Importing the relevant libraries.
import tweepy
import json
import pandas as pd
from scipy.misc import imread
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib as mpl
import csv
import matplotlib.pyplot as plt
import operator
from textblob import TextBlob
from textblob import Word
from textblob.sentiments import NaiveBayesAnalyzer
import imageio
#Authentication
consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''
auth = tweepy.OAuthHandler(consumer_key, consumer_secret) #Interacting with twitter's API
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API (auth) #creating the API object
# Search word/hashtag value
HashValue = "ODSCEurope"
# search start date value. the search will start from this date to the current date.
StartDate = "2019-06-25"
#Extracting Tweets
results = []
for tweet in tweepy.Cursor(api.search,q=HashValue,lang="en",since=StartDate).items(2000):
results.append(tweet)
print (type(results))
print (len(results))
#Store tweets data in a dataframe
def tweets_df(results):
id_list = [tweet.id for tweet in results]
data_set = pd.DataFrame(id_list, columns = ["id"])
data_set["text"] = [tweet.text for tweet in results]
data_set["created_at"] = [tweet.created_at for tweet in results]
data_set["retweet_count"] = [tweet.retweet_count for tweet in results]
data_set["user_screen_name"] = [tweet.author.screen_name for tweet in results]
data_set["user_followers_count"] = [tweet.author.followers_count for tweet in results]
data_set["user_location"] = [tweet.author.location for tweet in results]
data_set["Hashtags"] = [tweet.entities.get('hashtags') for tweet in results]
return data_set
data_set = tweets_df(results)
# Remove tweets with duplicate text
text = data_set["text"]
for i in range(0,len(text)):
txt = ' '.join(word for word in text[i] .split() if not word.startswith('https:'))
data_set.set_value(i, 'text2', txt)
data_set.drop_duplicates('text2', inplace=True)
data_set.reset_index(drop = True, inplace=True)
data_set.drop('text', axis = 1, inplace = True)
data_set.rename(columns={'text2': 'text'}, inplace=True)
data_set.head()
## Sentiment Analysis
text = data_set["text"]
for i in range(0,len(text)):
textB = TextBlob(text[i])
sentiment = textB.sentiment.polarity
data_set.set_value(i, 'Sentiment',sentiment)
if sentiment <0.00:
SentimentClass = 'Negative'
data_set.set_value(i, 'SentimentClass', SentimentClass )
elif sentiment >0.00:
SentimentClass = 'Positive'
data_set.set_value(i, 'SentimentClass', SentimentClass )
else:
SentimentClass = 'Neutral'
data_set.set_value(i, 'SentimentClass', SentimentClass )
data_set.head()
# Create dataframe containing polarity values and text
new_data_df=pd.DataFrame(data_set,columns=['Sentiment','text'])
new_data_df=new_data_df[new_data_df.Sentiment !=0]
new_data_df.head()
fig, ax = plt.subplots(figsize=(8, 6))
new_data_df.hist(bins=[-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1],
ax=ax, color="purple")
plt.title("Sentiments from Tweets on the ODSCEurope")
plt.show()
data_set.to_csv("C:\\Users\\iam00\\Desktop\\Bigpodium\\trialeurope.csv")
Htag_df = pd.DataFrame()
j = 0
for tweet in range(0,len(results)):
hashtag = results[tweet].entities.get('hashtags')
for i in range(0,len(hashtag)):
Htag = hashtag[i]['text']
Htag_df.set_value(j, 'Hashtag',Htag)
j = j+1
trialeurope_Htag_wordcloud = Htag_df.groupby('Hashtag').size()
trialeurope_Htag_wordcloud.to_csv("C:\\Users\\iam00\\Desktop\\Bigpodium\\trialeurope_Htag_wordcloud.csv")
# Join all the text from the 1000 tweets
Hashtag_Combined = " ".join(Htag_df['Hashtag'].values.astype(str))
no_europe = " ".join([word for word in Hashtag_Combined.split()
])
Tweet_mask = imageio.imread("C:\\Users\\iam00\\Desktop\\Bigpodium\\twitter_mask.png")
#Create a Word Cloud
wc = WordCloud(background_color="white", stopwords=STOPWORDS, mask = Tweet_mask)
wc.generate(no_europe)
plt.imshow(wc)
plt.axis("off")
plt.savefig('C:\\Users\\iam00\\Desktop\\Bigpodium\\europe_Hashtag.png', dpi=300)
plt.show()