Job Task Analysis - Content collection from twitter related to hashtag #ODSCEurope.(Deepa)¶

# Importing the relevant libraries.
import tweepy
import json
import pandas as pd
from scipy.misc import imread
from wordcloud import WordCloud, STOPWORDS, ImageColorGenerator
import matplotlib as mpl
import csv
import matplotlib.pyplot as plt

import operator
from textblob import TextBlob
from textblob import Word
from textblob.sentiments import NaiveBayesAnalyzer
import imageio

#Authentication

consumer_key = ''
consumer_secret = ''
access_token = ''
access_token_secret = ''

auth = tweepy.OAuthHandler(consumer_key, consumer_secret) #Interacting with twitter's API
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API (auth) #creating the API object

# Search word/hashtag value 
HashValue = "ODSCEurope"

# search start date value. the search will start from this date to the current date.
StartDate = "2019-06-25"

#Extracting Tweets
results = []

for tweet in tweepy.Cursor(api.search,q=HashValue,lang="en",since=StartDate).items(2000):
    results.append(tweet)
    
print (type(results))
print (len(results))

<class 'list'>
85

#Store tweets data in a dataframe

def tweets_df(results):
    id_list = [tweet.id for tweet  in results]
    data_set = pd.DataFrame(id_list, columns = ["id"])
    
    data_set["text"] = [tweet.text for tweet in results]
    data_set["created_at"] = [tweet.created_at for tweet in results]
    data_set["retweet_count"] = [tweet.retweet_count for tweet in results]
    data_set["user_screen_name"] = [tweet.author.screen_name for tweet in results]
    data_set["user_followers_count"] = [tweet.author.followers_count for tweet in results]
    data_set["user_location"] = [tweet.author.location for tweet in results]
    data_set["Hashtags"] = [tweet.entities.get('hashtags') for tweet in results]
    
    return data_set
data_set = tweets_df(results)

# Remove tweets with duplicate text

text = data_set["text"]

for i in range(0,len(text)):
    txt = ' '.join(word for word in text[i] .split() if not word.startswith('https:'))
    data_set.set_value(i, 'text2', txt)
    
data_set.drop_duplicates('text2', inplace=True)
data_set.reset_index(drop = True, inplace=True)
data_set.drop('text', axis = 1, inplace = True)
data_set.rename(columns={'text2': 'text'}, inplace=True)

C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:7: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead

data_set.head()

## Sentiment Analysis
text = data_set["text"]

for i in range(0,len(text)):
    textB = TextBlob(text[i])
    sentiment = textB.sentiment.polarity
    data_set.set_value(i, 'Sentiment',sentiment)
    if sentiment <0.00:
        SentimentClass = 'Negative'
        data_set.set_value(i, 'SentimentClass', SentimentClass )
    elif sentiment >0.00:
        SentimentClass = 'Positive'
        data_set.set_value(i, 'SentimentClass', SentimentClass )
    else:
        SentimentClass = 'Neutral'
        data_set.set_value(i, 'SentimentClass', SentimentClass )

C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:7: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead
C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:13: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead
C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:16: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead

data_set.head()

# Create dataframe containing polarity values and text
new_data_df=pd.DataFrame(data_set,columns=['Sentiment','text'])
new_data_df=new_data_df[new_data_df.Sentiment !=0]
new_data_df.head()

fig, ax = plt.subplots(figsize=(8, 6))

new_data_df.hist(bins=[-1, -0.75, -0.5, -0.25, 0, 0.25, 0.5, 0.75, 1],
        ax=ax, color="purple")

plt.title("Sentiments from Tweets on the ODSCEurope")
plt.show()

data_set.to_csv("C:\\Users\\iam00\\Desktop\\Bigpodium\\trialeurope.csv")

Htag_df = pd.DataFrame()
j = 0

for tweet in range(0,len(results)):
    hashtag = results[tweet].entities.get('hashtags')
    for i in range(0,len(hashtag)):
        Htag = hashtag[i]['text'] 
        Htag_df.set_value(j, 'Hashtag',Htag)
        j = j+1

C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:8: FutureWarning: set_value is deprecated and will be removed in a future release. Please use .at[] or .iat[] accessors instead

trialeurope_Htag_wordcloud = Htag_df.groupby('Hashtag').size()
trialeurope_Htag_wordcloud.to_csv("C:\\Users\\iam00\\Desktop\\Bigpodium\\trialeurope_Htag_wordcloud.csv")

C:\Users\iam00\Anaconda3\lib\site-packages\ipykernel\__main__.py:2: FutureWarning: The signature of `Series.to_csv` was aligned to that of `DataFrame.to_csv`, and argument 'header' will change its default value from False to True: please pass an explicit value to suppress this warning.
  from ipykernel import kernelapp as app

# Join all the text from the 1000 tweets
Hashtag_Combined = " ".join(Htag_df['Hashtag'].values.astype(str))
no_europe = " ".join([word for word in Hashtag_Combined.split()
                               
                                
                 ])
                                                                
Tweet_mask = imageio.imread("C:\\Users\\iam00\\Desktop\\Bigpodium\\twitter_mask.png")

#Create a Word Cloud
wc = WordCloud(background_color="white", stopwords=STOPWORDS, mask = Tweet_mask)
wc.generate(no_europe)
plt.imshow(wc)
plt.axis("off")
plt.savefig('C:\\Users\\iam00\\Desktop\\Bigpodium\\europe_Hashtag.png', dpi=300)
plt.show()

	id	created_at	retweet_count	user_screen_name	user_followers_count	user_location	Hashtags	text
0	1159279337046233088	2019-08-08 01:45:06	11	DamiSammarro	1929	Buenos Aires - Argentina	[{'text': 'AI', 'indices': [81, 84]}, {'text':...	RT @KirkDBorne: The trend is clear. With some ...
1	1159207366170152960	2019-08-07 20:59:07	0	stanford__ai	441	Cambridge, MA	[{'text': 'ODSCEurope', 'indices': [7, 18]}, {...	Attend #ODSCEurope, the largest applied data s...
2	1159193190865522690	2019-08-07 20:02:48	1	JustBeMentalist	1216	Shropshire	[{'text': 'ODSCEurope', 'indices': [17, 28]}, ...	RT @odsc: Attend #ODSCEurope, the largest appl...
3	1159182135107969025	2019-08-07 19:18:52	3	CorrieG75686244	112	Zuid-Holland, Nederland	[{'text': 'ML', 'indices': [38, 41]}, {'text':...	RT @odsc: Stay at the cutting-edge of #ML and ...
4	1159147098123771905	2019-08-07 16:59:38	0	stanford__ai	441	Cambridge, MA	[{'text': 'datascience', 'indices': [41, 53]},...	Gain insights on how the intersection of #data...

	id	created_at	retweet_count	user_screen_name	user_followers_count	user_location	Hashtags	text	Sentiment	SentimentClass
0	1159279337046233088	2019-08-08 01:45:06	11	DamiSammarro	1929	Buenos Aires - Argentina	[{'text': 'AI', 'indices': [81, 84]}, {'text':...	RT @KirkDBorne: The trend is clear. With some ...	0.550000	Positive
1	1159207366170152960	2019-08-07 20:59:07	0	stanford__ai	441	Cambridge, MA	[{'text': 'ODSCEurope', 'indices': [7, 18]}, {...	Attend #ODSCEurope, the largest applied data s...	0.500000	Positive
2	1159193190865522690	2019-08-07 20:02:48	1	JustBeMentalist	1216	Shropshire	[{'text': 'ODSCEurope', 'indices': [17, 28]}, ...	RT @odsc: Attend #ODSCEurope, the largest appl...	0.500000	Positive
3	1159182135107969025	2019-08-07 19:18:52	3	CorrieG75686244	112	Zuid-Holland, Nederland	[{'text': 'ML', 'indices': [38, 41]}, {'text':...	RT @odsc: Stay at the cutting-edge of #ML and ...	0.318182	Positive
4	1159147098123771905	2019-08-07 16:59:38	0	stanford__ai	441	Cambridge, MA	[{'text': 'datascience', 'indices': [41, 53]},...	Gain insights on how the intersection of #data...	0.000000	Neutral

	Sentiment	text
0	0.550000	RT @KirkDBorne: The trend is clear. With some ...
1	0.500000	Attend #ODSCEurope, the largest applied data s...
2	0.500000	RT @odsc: Attend #ODSCEurope, the largest appl...
3	0.318182	RT @odsc: Stay at the cutting-edge of #ML and ...
8	0.550000	RT KirkDBorne: The trend is clear. With some o...

Job Task Analysis - Content collection from twitter related to hashtag #ODSCEurope.(Deepa)¶

References.¶