# Data Methods: Social media (Twitter) data# Sample program for using rtweet, sentiment analysis# Use vignette("auth", package = "rtweet") for authentication# Documentation: vignette("intro", package = "rtweet")# GitHub: https://github.com/mkearney/rtweet# [Bob Rudis 21 Recipes for Mining Twitter Data with rtweet](https://rud.is/books/21-recipes/)rm(list=ls())library(rtweet)library(ggplot2)library(tidyverse)
Warning in .recacheSubclasses(def@className, def, env): undefined subclass
"packedMatrix" of class "mMatrix"; definition not updated
Warning in .recacheSubclasses(def@className, def, env): undefined subclass
"packedMatrix" of class "replValueSp"; definition not updated
Package version: 3.2.3
Unicode version: 14.0
ICU version: 70.1
Parallel computing: 8 of 8 threads used.
See https://quanteda.io for tutorials and examples.
library(quanteda.textmodels)library(quanteda.textplots)library(readr)# Set up authentication using own Twitter account# will save credentials to local drive as default.rdsSys.setenv(TWITTER_BEARER ="AAAAAAAAAAAAAAAAAAAAAMsxhAEAAAAAzBUGoUHZtZp5Ax2OtJaAObG4rec%3DLGMtinMdRqtP9xU5NjFY3KVhZAyXM160sEyTbP5YTZr4T3MT0m")params <-list(`user.fields`='description',`expansions`='pinned_tweet_id')auth_setup_default()
Using default authentication available.
Reading auth from '/Users/sami_manuel/Library/Preferences/org.R-project.R/R/rtweet/default.rds'
## search for 500 tweets of "Joe Biden" in Englishjbt <- rtweet::search_tweets(q ="JoeBiden", n =500, lang ="en", retryonratelimit =TRUE)#i. Most likesmax(jbt$favorite_count)
[1] 64787
#ii. Most Retweetsmax(jbt$retweet_count)
[1] 18094
#ii. Most Repliesmax(jbt$reply_count)
[1] NA
# analysis of Joe Biden tweetsjbt_twt = jbt$textjbt_toks =tokens(jbt_twt)jbttwtdfm <-dfm(jbt_toks)# JBT Latent Semantic Analysisjbt_sum_lsa <-textmodel_lsa(jbttwtdfm)summary(jbt_sum_lsa)
Length Class Mode
sk 10 -none- numeric
docs 4820 -none- numeric
features 21980 -none- numeric
matrix_low_rank 1059436 -none- numeric
data 1059436 dgCMatrix S4
## search for 500 tweets of "COVID" in EnglishCOVID <- rtweet::search_tweets(q ="COVID", n =500, lang ="en", retryonratelimit =TRUE)# analysis of tweetsCOVID_twt = COVID$textCOVID_toks =tokens(COVID_twt)COVIDtwtdfm <-dfm(COVID_toks)# Latent Semantic AnalysisCOVID_sum_lsa <-textmodel_lsa(COVIDtwtdfm)summary(COVID_sum_lsa)
Length Class Mode
sk 10 -none- numeric
docs 5000 -none- numeric
features 32880 -none- numeric
matrix_low_rank 1644000 -none- numeric
data 1644000 dgCMatrix S4