Unverified Commit 425dff8a authored by ishres19's avatar ishres19 Committed by GitHub
Browse files

Add files via upload

parent da18b235
import json
import pandas as pd
# from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
# from textblob import TextBlob
# file_content = open('2020-12-randomlychosen.jsonl', "r")
def convert_json_to_dataframe(filename, year_month):
df = pd.read_json(filename, lines = True)
df = df[df['lang'].str.contains('en|uk', regex=True)]
df = df[['created_at','id','full_text','geo','coordinates','place', 'lang', 'retweeted_status']]
df.to_csv('monthly_tweets_dataframe/'+year_month+'.csv', index=False)
convert_json_to_dataframe('/eccs/home/ishres19/COVID-19-TweetIDs/2020-2021/2020-11-randomlychosen.jsonl', '2020_11')
from textblob import TextBlob
import pandas as pd
import os
from pathlib import Path
path_of_directory = '/Users/irisa/senior_capstone_sentiment_analysis/monthly_tweets_dataframe'
def main():
for csv_file in os.listdir(path_of_directory):
if csv_file.endswith('.csv'):
sentiment_analysis_of_tweets_textblob(csv_file)
def calculate_polarity(text):
return TextBlob(text).sentiment.polarity
def calculate_subjectivity(text):
return TextBlob(text).sentiment.subjectivity
def analysis_pos_neg(score):
if score < 0:
return 'Negative'
if score ==0:
return 'Neutral'
else:
return "Positive"
def sentiment_analysis_of_tweets_textblob(id_file):
df = pd.read_csv(id_file)
print (id_file)
df['polarity'] = df['full_text'].apply(calculate_polarity)
df['subjectivity'] = df['full_text'].apply(calculate_subjectivity)
df['analysis'] = df['polarity'].apply(analysis_pos_neg)
df.to_csv('Textblob_analysis/sentiment_analysis' +'_'+id_file, index=False)
df["created_at"] = df["created_at"].str.split().str[0]
# df.to_csv('2020_12_sentiment.csv', index=False)
# df = pd.read_csv("2020_12_sentiment.csv")
df['created_at'] = pd.to_datetime(df['created_at'])
df['created_at'].dt.strftime('%m/%D/%Y')
df['polarity'] = df['polarity'].astype(float)
df_new = df[['created_at', 'polarity']]
group_by = df_new.groupby('created_at')['polarity'].mean()
print(group_by)
groupby_df = group_by.to_frame(name = 'mean').reset_index()
print (groupby_df)
id_file = 'Textblob_analysis/polarity_mean' +'_' + id_file
lines = groupby_df.plot.line()
groupby_df.to_csv(id_file, index=False)
if __name__ == "__main__":
main()
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
import plotly.express as px
def viz(file_name):
df = pd.read_csv(file_name)
print (df)
fig = px.line(df, x='created_at', y='mean')
fig.update_xaxes(nticks=30)
fig.write_html("plot.html")
viz('/Users/irisa/senior_capstone_sentiment_analysis/monthly_tweets_dataframe/VADER_analysis/compound_mean_2020_08.csv')
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import pandas as pd
from pathlib import Path
import os
# df = pd.read_csv("2020_12.csv")
# csv_files = ['2020_01', '2020_02', '2020_03', '2020_04','2020_05', '2020_06', '2020_07',
# '2020_08', '2020_09', '2020_10', '2020_11', '2020_12']
path_of_directory = '/Users/irisa/senior_capstone_sentiment_analysis/monthly_tweets_dataframe'
def main():
for csv_file in os.listdir(path_of_directory):
if csv_file.endswith('.csv'):
sentiment_analysis_of_tweets_vader(csv_file)
def sentiment_analysis_of_tweets_vader(id_file):
print (id_file)
analyzer = SentimentIntensityAnalyzer()
df = pd.read_csv(id_file)
df['compound'] = [analyzer.polarity_scores(x)['compound'] for x in df['full_text']]
df['neg'] = [analyzer.polarity_scores(x)['neg'] for x in df['full_text']]
df['neu'] = [analyzer.polarity_scores(x)['neu'] for x in df['full_text']]
df['pos'] = [analyzer.polarity_scores(x)['pos'] for x in df['full_text']]
df["created_at"] = df["created_at"].str.split().str[0]
# df.to_csv('2020_12_sentiment.csv', index=False)
# df = pd.read_csv("2020_12_sentiment.csv")
df['created_at'] = pd.to_datetime(df['created_at'])
df['created_at'].dt.strftime('%m/%D/%Y')
df['compound'] = df['compound'].astype(float)
df_new = df[['created_at', 'compound']]
# print (df)
# groupby_count_pos = df.groupby('created_at')['pos'].count()
# groupby_count_neg = df.groupby('created_at')['neg'].count()
# print (groupby_count_pos)
group_by = df_new.groupby('created_at')['compound'].mean()
print(group_by)
groupby_df = group_by.to_frame(name = 'mean').reset_index()
print (groupby_df)
id_file = 'VADER_analysis/compound_mean' +'_' + id_file
groupby_df.to_csv(id_file, index=False)
if __name__ == "__main__":
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment