Analysis of Fact-checkers’ Tweets in Turkey

1.Preparations and Packages
2.Data Import & Tidying
3.Analysis of Tweets
4. TF-IDF of Tweets
5. Bigrams Network
6.Networks of Top Hashtags and Users
6. STM on Tweets

1.Preparations and Packages

library("tidyverse")
library("quanteda")
library("lubridate")
library("stopwords")
library("tidytext")
library("wordcloud2")
library("cowplot")
library("stringi")
library("igraph")
library("ggraph")

2.Data Import & Tidying

teyit <- read_csv("C:/Users/Sadettin/Downloads/twitdata/teyit.csv")
dpayi <- read_csv("C:/Users/Sadettin/Downloads/twitdata/dogruluk2.csv") 
evrimag <- read_csv("C:/Users/Sadettin/Downloads/twitdata/evrima.csv")
malumatf <- read_csv("C:/Users/Sadettin/Downloads/twitdata/malumatf.csv")
yalansav <- read_csv("C:/Users/Sadettin/Downloads/twitdata/yalansav.csv")
gununyalani <- read_csv("C:/Users/Sadettin/Downloads/twitdata/gununytw.csv")
dogrusune <- read_csv("C:/Users/Sadettin/Downloads/twitdata/dogrusunetrttw.csv")
#factchecktr <- read_csv("C:/Users/Sadettin/Downloads/twitdata/factchtr.csv")


dpayi$tweet <-  stri_trans_general(dpayi$tweet, id="Latin-ASCII")
teyit$tweet <-  stri_trans_general(teyit$tweet, id = "Latin-ASCII")
evrimag$tweet <- stri_trans_general(evrimag$tweet, id = "Latin-ASCII")
malumatf$tweet <- stri_trans_general(malumatf$tweet, id = "Latin-ASCII")
yalansav$tweet <- stri_trans_general(yalansav$tweet, id = "Latin-ASCII")
gununyalani$tweet <- stri_trans_general(gununyalani$tweet, id = "Latin-ASCII")
dogrusune$tweet <- stri_trans_general(dogrusune$tweet, id = "Latin-ASCII")
#factchecktr$tweet <- stri_trans_general(factchecktr$tweet, id = "Latin-ASCII")

custom stopwords lists

custom_stopwords <- c("https","teyit.org","pic.twitter.com","http","i","mi","v","e","twitter.com","t","eepurl.com","ii","iii","youtu.be","open.spotify.com","the","u","www.youtube.com","www.dogrulukpayi.com","dogrulukpayı","doğrulukpayı","c278f7a17463ce4aaa5a39b20","fb.me","evrimagaci.org","utm_source","utm_campaign","utm_medium","www.evrimagaci.org","icin","watch","to","new","d","a","yalansavar.org","yalansavar","cok","po.st","rt","in","via","www.malumatfurus.org","malumatfurusorg","archive.is","malumatfurus","gununyalanlari.com","teyitorg","ow.ly","te","nin","www.facebook.com","events","den","556632e33ced8","photo","tr","www.patreon.com", "evrimagaci","evrimagaci","status","wp.me","p1ufar","as","en.m.wikipedia.org","isil_arican","tevfik_uyar","c4","b1","agaci","ağaci","twitter","dogrulukpayi","destek.teyit.org","557dce70d4","izlemedeyiz.us6","dahdlx","_milkivey","dlvr.it","buff.ly","gununyalanlari","d8","iddiası","iddiasi","degil","oldugu","social","oldugunu","sitemizden","dogruluk","subscribe","list","manage.com","gore")

Only for one words in the text

one_words <- function(x){
 x%>% select(tweet) %>% filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet) %>%
  filter(!word %in% stopwords("turkish", source = "stopwords-iso"), !word %in%
           custom_stopwords,str_detect(word, "[a-z]")) %>% count(word, sort = TRUE)
}

Two words (bigram) in the text

two_words <- function(x){
  x %>% select(tweet) %>% filter(!str_detect(tweet, '^"')) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(bigram, tweet, token = "ngrams", n = 2) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stopwords("turkish", source = "stopwords-iso"),!word1 %in% custom_stopwords,str_detect(word1, "[a-z]")) %>%
  filter(!word2 %in% stopwords("turkish", source = "stopwords-iso"),!word2 %in% custom_stopwords,str_detect(word2, "[a-z]")) %>%
  unite(bigram,word1, word2, sep = " ")%>%
  count(bigram, sort = TRUE)
}

three words in the text

three_words <- function(x){
  x %>% select(tweet) %>% filter(!str_detect(tweet, '^"')) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(trigram, tweet, token = "ngrams", n = 3) %>% 
  separate(trigram, c("word1", "word2","word3"), sep = " ") %>%
  filter(!word1 %in% stopwords("turkish", source = "stopwords-iso"),!word1 %in% custom_stopwords,str_detect(word1, "[a-z]")) %>%
  filter(!word2 %in% stopwords("turkish", source = "stopwords-iso"),!word2 %in% custom_stopwords,str_detect(word2, "[a-z]")) %>%
  filter(!word3 %in% stopwords("turkish", source = "stopwords-iso"),!word3 %in% custom_stopwords,str_detect(word3, "[a-z]")) %>% 
  unite(trigram, word1, word2,word3, sep = " ")%>%
  count(trigram, sort = TRUE)
}

3.Analysis of Tweets

ONE WORD

Let’s look at the data first

one_words(teyit)

## # A tibble: 22,220 x 2
##    word            n
##    <chr>       <int>
##  1 dogru        1514
##  2 gosterdigi    835
##  3 merhaba       819
##  4 yanlis        660
##  5 tesekkurler   556
##  6 iddia         472
##  7 fotografin    465
##  8 fotograf      417
##  9 yeni          371
## 10 video         361
## # ... with 22,210 more rows

WORDCLOUDS for ONE WORDS

wordcloud of one word for teyit

teyit_cloud <- one_words(teyit) %>% top_n(200,n)

wordcloud2(data = teyit_cloud,
           fontFamily ="Poppins",
           minRotation = -pi/6, 
           maxRotation = -pi/6, 
           rotateRatio = 1.5, 
           size = 1.5)

Most Frequent Words

teyit_viz <-  one_words(teyit) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#6a51a3")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,1700)

dp_viz <- one_words(dpayi) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#fc4e2a")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,4200)

malumat_viz <- one_words(malumatf) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#4292c6")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,800)

evrimag_viz <- one_words(evrimag) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#41ab5d")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,5000)

yalansav_viz <- one_words(yalansav) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#fec44f")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,300)

gy_viz <- one_words(gununyalani) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#dd3497")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,2600)

plot_grid(teyit_viz, dp_viz,malumat_viz,evrimag_viz,yalansav_viz,gy_viz, labels = c("Teyit", "D.Payı","Malumatfuruş","Evrim Ağacı","Yalansavar","Günün Yalanları"),ncol = 3,label_fontfamily = "Poppins")

#Saving 15 x 9.24 in image

TWO WORDS - BIGRAMS

Now let’s look at bigrams (two words (söz öbeği) to contextualize our findings a bit more

two_words(teyit)

## # A tibble: 46,500 x 2
##    bigram                      n
##    <chr>                   <int>
##  1 sosyal medyada            194
##  2 yanlis bilgi              193
##  3 haftanin dogrulari        183
##  4 soz konusu                169
##  5 dogrulari yanlislari      166
##  6 iddia edilen              160
##  7 gecen haftanin            114
##  8 tesekkurler merhaba       102
##  9 iddiasiyla paylasilan      99
## 10 erisebilirsiniz ilginiz    93
## # ... with 46,490 more rows

WORDCLOUDS for BIGRAMS

bigram wordcloud for teyit

teyit_obek_cloud <- two_words(teyit) %>% top_n(200,n)

wordcloud2(data = teyit_obek_cloud,
           fontFamily ="Poppins",
           minRotation = -pi/6, 
           maxRotation = -pi/6, 
           rotateRatio = 1.5, 
           size = 1.5)

Most Frequent Bigrams

teyit_viz2 <-  two_words(teyit) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#6a51a3")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,250)

dp_viz2 <- two_words(dpayi) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#fc4e2a")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,1300)

malumat_viz2 <- two_words(malumatf) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#4292c6")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,450)

evrimag_viz2 <- two_words(evrimag) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#41ab5d")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,600)

yalansav2_viz <- two_words(yalansav) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#fec44f")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,150)

gy_viz2 <- two_words(gununyalani) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#e7298a")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,450)

plot_grid(teyit_viz2, dp_viz2,malumat_viz2,evrimag_viz2, yalansav2_viz,gy_viz2, ncol = 3, labels = c('Teyit bigram', 'D.Payı bigram',"Malumatfuruş bigram","Evrim ağacı bigram","Yalansavar bigram","Günün yalanları bigram"),label_fontfamily = "Poppins")

THREE WORDS - TRIGRAM

three_words(dpayi)

## # A tibble: 35,847 x 2
##    trigram                            n
##    <chr>                          <int>
##  1 recep tayyip erdogan             180
##  2 kisi basina dusen                163
##  3 iddia kontrolu recep             160
##  4 kontrolu recep tayyip            156
##  5 sirada yer aliyor                147
##  6 surec nasil ilerliyor            108
##  7 gundemine neler girdi            107
##  8 neler girdi gelin                107
##  9 payi'nin gundemine neler         107
## 10 adresini ziyaret edebilirsiniz   104
## # ... with 35,837 more rows

teyit_viz3 <-  three_words(teyit) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#6a51a3")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,180)

dp_viz3 <- three_words(dpayi) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#fc4e2a")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,200)

malumat_viz3 <- three_words(malumatf) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#4292c6")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,70)

evrimag_viz3 <- three_words(evrimag) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#41ab5d")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,100)

yalansav_viz3 <- three_words(yalansav) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#fec44f")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,25)

gy_viz3 <- three_words(gununyalani) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#e7298a")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,300)

plot_grid(teyit_viz3, dp_viz3,malumat_viz3,evrimag_viz3, yalansav_viz3,gy_viz3, ncol = 3, labels = c('Teyit trigram', 'D.Payı trigram',"Malumatfuruş trigram","Evrim ağacı trigram","Yalansavar trigram","Günün yalanları trigram"),label_fontfamily = "Poppins")

4. TF-IDF of Tweets

What is tf_idf? (click here)

one_words_tfidf <- function(x){
 x%>% select(name,tweet) %>% filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet) %>%
  filter(!word %in% stopwords("turkish", source = "stopwords-iso"), !word %in%
           custom_stopwords,str_detect(word, "[a-z]")) %>% count(name,word, sort = TRUE)
}

all_data <- bind_rows(dpayi, teyit,evrimag,yalansav,gununyalani,malumatf)

tf_idf_tweets <- all_data %>% one_words_tfidf() %>% bind_tf_idf(word,name,n)
tf_idf_tweets %>% arrange(desc(tf_idf))

## # A tibble: 186,492 x 6
##    name            word                 n      tf   idf  tf_idf
##    <chr>           <chr>            <int>   <dbl> <dbl>   <dbl>
##  1 Dogruluk Payi   beyanat           3863 0.0235  1.79  0.0422 
##  2 Günün Yalanlari yalani            2292 0.0215  0.405 0.00871
##  3 Dogruluk Payi   bulten            3116 0.0190  0.405 0.00770
##  4 Günün Yalanlari carpitmasi         572 0.00536 1.10  0.00589
##  5 Yalansavar      tam2013            123 0.00267 1.79  0.00479
##  6 Yalansavar      tam2014             77 0.00167 1.79  0.00300
##  7 Malumatfurus    yanlislama         217 0.00214 1.10  0.00235
##  8 Günün Yalanlari afrinoperasyonu    138 0.00129 1.79  0.00232
##  9 Teyit           coronavirusfacts   125 0.00127 1.79  0.00228
## 10 Evrim Agaci     evrimi             748 0.00185 1.10  0.00204
## # ... with 186,482 more rows

tf_idf chart

tf_idf_tweets %>% arrange(desc(tf_idf)) %>%
  mutate(word = factor(word, levels = rev(unique(word)))) %>%
  group_by(name) %>% 
  top_n(15) %>% 
  ungroup() %>%
  ggplot(aes(word, tf_idf, fill = name)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~name, ncol = 2, scales = "free") +
  coord_flip()+theme_poppins()

5. Bigrams Network

easy way through functions

ag_ggraph <- function(x,b){
  set.seed(123)
  a <- grid::arrow(type = "closed", length = unit(0.1, "inches"))
  x %>% select(tweet) %>% filter(!str_detect(tweet, '^"')) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(bigram, tweet, token = "ngrams", n = 2) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stopwords("turkish", source = "stopwords-iso"),!word1 %in% custom_stopwords,str_detect(word1, "[a-z]")) %>%
  filter(!word2 %in% stopwords("turkish", source = "stopwords-iso"),!word2 %in% custom_stopwords,str_detect(word2, "[a-z]")) %>%
  count(word1, word2, sort = TRUE)%>% 
    filter(n >30) %>%
    graph_from_data_frame() %>% 
    ggraph(layout = "fr") +
    geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                 arrow = a, end_cap = circle(.07, 'inches')) +
    geom_node_point(color =b, size = 3) +
    geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
    theme_void()
}

ag_ggraph(dpayi,"lightblue")

the time-consuming way

bigrams_count <- function(x){
  x %>% select(tweet) %>% filter(!str_detect(tweet, '^"')) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(bigram, tweet, token = "ngrams", n = 2) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stopwords("turkish", source = "stopwords-iso"),!word1 %in% custom_stopwords,str_detect(word1, "[a-z]")) %>%
  filter(!word2 %in% stopwords("turkish", source = "stopwords-iso"),!word2 %in% custom_stopwords,str_detect(word2, "[a-z]")) %>%
  count(word1, word2, sort = TRUE) 
}

dpayi_bigram <- bigrams_count(dpayi)

dpayi_bigram

## # A tibble: 42,840 x 3
##    word1    word2        n
##    <chr>    <chr>    <int>
##  1 iddia    kontrolu  1070
##  2 bulten   turkiye    334
##  3 ak       parti      279
##  4 tesekkur ederiz     267
##  5 yer      aliyor     254
##  6 sirada   yer        227
##  7 recep    tayyip     224
##  8 basina   dusen      211
##  9 tayyip   erdogan    181
## 10 kisi     basina     179
## # ... with 42,830 more rows

bigram_graph <- dpayi_bigram %>%
  filter(n > 40) %>%
  graph_from_data_frame()

bigram_graph

## IGRAPH 4d69294 DN-- 160 125 -- 
## + attr: name (v/c), n (e/n)
## + edges from 4d69294 (vertex names):
##  [1] iddia     ->kontrolu      bulten    ->turkiye      
##  [3] ak        ->parti         tesekkur  ->ederiz       
##  [5] yer       ->aliyor        sirada    ->yer          
##  [7] recep     ->tayyip        basina    ->dusen        
##  [9] tayyip    ->erdogan       kisi      ->basina       
## [11] kontrolu  ->recep         analiz    ->edilen       
## [13] fact      ->checking      takip     ->edebilirsiniz
## [15] ulke      ->arasinda      analiz    ->ettik        
## + ... omitted several edges

set.seed(123)

a <- grid::arrow(type = "closed", length = unit(0.1, "inches"))

ggraph(bigram_graph, layout = "fr") +
  geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                 arrow = a, end_cap = circle(.07, 'inches')) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
  theme_void()

knitr::include_graphics("images/dp_bigram_network.png")

6.Networks of Top Hashtags and Users

Extracting tags and top users by tweets id

tweet_data <- bind_rows(dpayi, teyit,evrimag,yalansav,gununyalani,dogrusune, malumatf)
head(tweet_data)

## # A tibble: 6 x 34
##        id conversation_id created_at date       time     timezone user_id
##     <dbl>           <dbl>      <dbl> <date>     <time>   <chr>      <dbl>
## 1 1.27e18         1.27e18    1.59e12 2020-06-16 08:33:12 EDT       2.48e9
## 2 1.27e18         1.27e18    1.59e12 2020-06-16 08:28:12 EDT       2.48e9
## 3 1.27e18         1.27e18    1.59e12 2020-06-16 06:57:55 EDT       2.48e9
## 4 1.27e18         1.27e18    1.59e12 2020-06-15 05:51:32 EDT       2.48e9
## 5 1.27e18         1.27e18    1.59e12 2020-06-14 13:35:09 EDT       2.48e9
## 6 1.27e18         1.27e18    1.59e12 2020-06-13 12:58:27 EDT       2.48e9
## # ... with 27 more variables: username <chr>, name <chr>, place <lgl>,
## #   tweet <chr>, mentions <chr>, urls <chr>, photos <chr>, replies_count <dbl>,
## #   retweets_count <dbl>, likes_count <dbl>, hashtags <chr>, cashtags <chr>,
## #   link <chr>, retweet <lgl>, quote_url <chr>, video <dbl>, near <lgl>,
## #   geo <lgl>, source <lgl>, user_rt_id <lgl>, user_rt <lgl>, retweet_id <lgl>,
## #   reply_to <chr>, retweet_date <lgl>, translate <lgl>, trans_src <lgl>,
## #   trans_dest <lgl>

tag_data <-  tweet_data %>% select(id,name,tweet) %>% 
  filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet =str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet, token = "tweets" ) %>% filter(str_detect(word, "#"))

saveRDS(tag_data,"tagdata.rds")
head(tag_data)

## # A tibble: 6 x 3
##        id name          word        
##     <dbl> <chr>         <chr>       
## 1 1.27e18 Dogruluk Payi #dp60saniye 
## 2 1.26e18 Dogruluk Payi #dp60saniye 
## 3 1.26e18 Dogruluk Payi #sigarazammi
## 4 1.26e18 Dogruluk Payi #dp60saniye 
## 5 1.26e18 Dogruluk Payi #dp60saniye 
## 6 1.26e18 Dogruluk Payi #enflasyon

users_data <- tweet_data %>% select(id,name,tweet) %>% 
  filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet =str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet, token = "tweets" ) %>% filter(str_detect(word, "@"))

saveRDS(users_data,"mentioneddata.rds")
head(users_data)

## # A tibble: 6 x 3
##        id name          word           
##     <dbl> <chr>         <chr>          
## 1 1.27e18 Dogruluk Payi @kosartanerin  
## 2 1.26e18 Dogruluk Payi @kosartanerin  
## 3 1.26e18 Dogruluk Payi @kondaarastirma
## 4 1.24e18 Dogruluk Payi @verikaynagi   
## 5 1.23e18 Dogruluk Payi @kondaarastirma
## 6 1.22e18 Dogruluk Payi @idemahaber

This guide helps a lot

Custom network function for hashtags

quanteda_network <- function(x,y){
 tag_dfm <- dfm_select(dfm(x$tweet, remove_punct = TRUE), pattern = ("#*"))
 toptag <- names(topfeatures(tag_dfm, 70))
 topgat_fcm <- fcm_select(fcm(tag_dfm), pattern = toptag)
 return(textplot_network(topgat_fcm, 
                 min_freq = 0.1, 
                 edge_alpha = 0.8, 
                 edge_size = 5,
                 edge_color = y,
                 vertex_labelfont ="Poppins",
                 vertex_labelsize = 4))
}

quanteda_network(dpayi,"#3690c0")

Custom function for top users

quanteda_users <- function(x,y){
 tag_dfm <- dfm_select(dfm(x$tweet, remove_punct = TRUE), pattern = ("@*"))
 toptag <- names(topfeatures(tag_dfm, 70))
 topgat_fcm <- fcm_select(fcm(tag_dfm), pattern = toptag)
 return(textplot_network(topgat_fcm, 
                 min_freq = 0.1, 
                 edge_alpha = 0.8, 
                 edge_size = 5,
                 edge_color = y,
                 vertex_labelfont ="Poppins",
                 vertex_labelsize = 4))
}

quanteda_users(dpayi, "#3690c0")

6. STM on Tweets

library(stm)

## stm v1.3.5 successfully loaded. See ?stm for help. 
##  Papers, resources, and other materials at structuraltopicmodel.com

oneword_stm <- function(x){
 x%>% select(username, tweet) %>% filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet) %>%
  filter(!word %in% stopwords("turkish", source = "stopwords-iso"), !word %in%
           custom_stopwords,str_detect(word, "[a-z]")) %>% count(username,word, sort = TRUE)
}

datastm <- oneword_stm(tweet_data) %>% filter(!username =="dogrusunetrt")

Let’s start stm

datastm

## # A tibble: 186,492 x 3
##    username        word        n
##    <chr>           <chr>   <int>
##  1 evrimagaci      evrim    4272
##  2 dogrulukpayicom beyanat  3863
##  3 dogrulukpayicom bulten   3116
##  4 evrimagaci      nasil    2387
##  5 gununyalanlari  yalani   2292
##  6 dogrulukpayicom turkiye  2290
##  7 gununyalanlari  yalan    2230
##  8 evrimagaci      bilim    1939
##  9 evrimagaci      nedir    1771
## 10 evrimagaci      okumak   1762
## # ... with 186,482 more rows

Let’s start structuring topic modelling

fc_dfm <- datastm %>%
    cast_dfm(username, word, n)
fc_dfm

## Document-feature matrix of: 6 documents, 131,815 features (76.4% sparse).
##                  features
## docs              evrim beyanat bulten nasil yalani turkiye yalan bilim nedir
##   evrimagaci       4272       0      1  2387     12     357   111  1939  1771
##   dogrulukpayicom     7    3863   3116   288      0    2290     6     2    88
##   gununyalanlari      1       0      0    45   2292     337  2230     9     2
##   teyitorg            0       0     14   248      0     144   103    46    14
##   malumatfurusorg     8       0      3    59      3     142    64    30    19
##   yalansavar         12       0      0   138      8       3    56   180    36
##                  features
## docs              okumak
##   evrimagaci        1762
##   dogrulukpayicom     66
##   gununyalanlari       0
##   teyitorg            23
##   malumatfurusorg      2
##   yalansavar           3
## [ reached max_nfeat ... 131,805 more features ]

topic_model <- stm(fc_dfm, K = 10, 
                   verbose = FALSE, init.type = "Spectral")

summary(topic_model)

## A topic model with 10 topics, 6 documents and a 131815 word dictionary.

## Topic 1 Top Words:
##       Highest Prob: beyanat, bulten, turkiye, turkiye'de, iddia, sayisi, kontrolu 
##       FREX: beyanat, hukumetre, dp60saniye, mv, mailchi.mp, ahmet_davutoglu, bultenlerini 
##       Lift: 0ks7jtupak0, 10aralikdunyainsanhaklarigunu, 13bow7v, 1bjf9ne, 1fnoll6, 3amp1ublvky, 5487fbbbcc2e9 
##       Score: beyanat, bulten, payi'nin, hukumetre, chp, dp60saniye, ihracat 
## Topic 2 Top Words:
##       Highest Prob: yalani, yalan, dogru, carpitmasi, tarafindan, iddiasiyla, servis 
##       FREX: afrinoperasyonu, gundem'in, yandaslari, kiraz, sozcunun, eymur, kirca 
##       Lift: 10ar, 10rzclpcnk2rjlw067_8aw, 15temmuzuanlat, 2016yalanlari, 22_11_2017_suriye_gbm_bilgi_notu.pdf, abden, acikkollu'nun 
##       Score: yalani, carpitmasi, afrinoperasyonu, cumhuriyet'in, yalan, iddiasiyla, yalanladi 
## Topic 3 Top Words:
##       Highest Prob: dogru, gosterdigi, merhaba, yanlis, tesekkurler, iddia, fotografin 
##       FREX: coronavirusfacts, desteklediginiz, ilgin, kutunuzda, teyitlendin, teyitciyi, teyitpedia 
##       Lift: 021243accdd8, 0iwzw46xo3cyaypkaksfw1, 0kbco73nlfbjtudoctbr3o, 0mblwhastge, 0vdpjnrlc3hrj2wohoypcn, 16lara, 1789da 
##       Score: gosterdigi, merhaba, coronavirusfacts, yayimladigimiz, ilginiz, fotografin, tesekkurler 
## Topic 4 Top Words:
##       Highest Prob: evrim, nasil, okumak, nedir, bilim, insan, fotograf 
##       FREX: evrimin, posted, secilim, evrimlesti, dar.vin, boyda, cmb 
##       Lift: 23andme, posted, evrimin, secilim, evrimlesti, dar.vin, boyda 
##       Score: evrim, evrimi, evrimsel, evrimin, okumak, posted, covid19 
## Topic 5 Top Words:
##       Highest Prob: kose, dogru, yazarlari, yanlis, dogrulama, koronavirus, iddia 
##       FREX: tarihtebugun, aktarmis, kosemenler, keciboynuzu, hazar, ozdil, padisah 
##       Lift: __tn__, _aamirkhan, _devapartisi, _e2r2volqva, _ilkeli_, _nediyoyabu_, _notallthosewho'ye 
##       Score: kose, yanlislama, yazarlari, tarihtebugun, aktarmis, dogrulama, kosemenler 
## Topic 6 Top Words:
##       Highest Prob: yeni, bilim, nasil, su, son, dogru, bilimsel 
##       FREX: derisini, desenleri, desteklediklerimiz, dibinde, dinozorlari, donukluk, dostumdur 
##       Lift: astral, gezegenlerden, paranormal, anneleri, aman, cocukluk, nye 
##       Score: kacirmayin, yazi, tesekkurler, makale, mesela, p, bilimin 
## Topic 7 Top Words:
##       Highest Prob: tartisirken, yeni, bilim, nasil, su, yazi, bilimsel 
##       FREX: tartisirken, yazi, anlatiyor, cogu, mesela, tip, yazdi 
##       Lift: tartisirken, bods, pekcok, skeptik, shermer, novella, derkenar 
##       Score: tartisirken, tam2013, tam2014, sbasegmez, mkozturk, yazi, sozdebilim 
## Topic 8 Top Words:
##       Highest Prob: animsayalim, yeni, bilim, nasil, yazi, su, bilimsel 
##       FREX: animsayalim, tam2013, yazdi, tam2014, mkozturk, sbasegmez, homeopati 
##       Lift: animsayalim, ___ceka___, _burkmez, _burkmez'den, _burkmez'in, _dwqflaibxy, _encoding 
##       Score: animsayalim, tam2013, tam2014, sbasegmez, mkozturk, sozdebilim, csicon 
## Topic 9 Top Words:
##       Highest Prob: tivit, yeni, bilim, nasil, yazi, su, bilimsel 
##       FREX: tivit, tam2013, yazdi, tam2014, mkozturk, sbasegmez, homeopati 
##       Lift: tivit, ___ceka___, _burkmez, _burkmez'den, _dwqflaibxy, _encoding, _i_d 
##       Score: tivit, tam2013, tam2014, sbasegmez, mkozturk, sozdebilim, csicon 
## Topic 10 Top Words:
##       Highest Prob: yeni, bilim, nasil, yazi, tam2013, yazdi, su 
##       FREX: tam2013, tam2014, yazdi, mkozturk, sbasegmez, homeopati, elestirel 
##       Lift: tumertopal, 3balfx, 5b, 7b, 7d, affedin, arsenicum 
##       Score: tam2013, tam2014, sbasegmez, mkozturk, sozdebilim, csicon, burkmez

td_beta <- tidy(topic_model)

## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.