1.Preparations and Packages

library("tidyverse")
library("quanteda")
library("lubridate")
library("stopwords")
library("tidytext")
library("wordcloud2")
library("cowplot")
library("stringi")
library("igraph")
library("ggraph")

2.Data Import & Tidying

teyit <- read_csv("C:/Users/Sadettin/Downloads/twitdata/teyit.csv")
dpayi <- read_csv("C:/Users/Sadettin/Downloads/twitdata/dogruluk2.csv") 
evrimag <- read_csv("C:/Users/Sadettin/Downloads/twitdata/evrima.csv")
malumatf <- read_csv("C:/Users/Sadettin/Downloads/twitdata/malumatf.csv")
yalansav <- read_csv("C:/Users/Sadettin/Downloads/twitdata/yalansav.csv")
gununyalani <- read_csv("C:/Users/Sadettin/Downloads/twitdata/gununytw.csv")
dogrusune <- read_csv("C:/Users/Sadettin/Downloads/twitdata/dogrusunetrttw.csv")
#factchecktr <- read_csv("C:/Users/Sadettin/Downloads/twitdata/factchtr.csv")


dpayi$tweet <-  stri_trans_general(dpayi$tweet, id="Latin-ASCII")
teyit$tweet <-  stri_trans_general(teyit$tweet, id = "Latin-ASCII")
evrimag$tweet <- stri_trans_general(evrimag$tweet, id = "Latin-ASCII")
malumatf$tweet <- stri_trans_general(malumatf$tweet, id = "Latin-ASCII")
yalansav$tweet <- stri_trans_general(yalansav$tweet, id = "Latin-ASCII")
gununyalani$tweet <- stri_trans_general(gununyalani$tweet, id = "Latin-ASCII")
dogrusune$tweet <- stri_trans_general(dogrusune$tweet, id = "Latin-ASCII")
#factchecktr$tweet <- stri_trans_general(factchecktr$tweet, id = "Latin-ASCII")

custom stopwords lists

custom_stopwords <- c("https","teyit.org","pic.twitter.com","http","i","mi","v","e","twitter.com","t","eepurl.com","ii","iii","youtu.be","open.spotify.com","the","u","www.youtube.com","www.dogrulukpayi.com","dogrulukpayı","doğrulukpayı","c278f7a17463ce4aaa5a39b20","fb.me","evrimagaci.org","utm_source","utm_campaign","utm_medium","www.evrimagaci.org","icin","watch","to","new","d","a","yalansavar.org","yalansavar","cok","po.st","rt","in","via","www.malumatfurus.org","malumatfurusorg","archive.is","malumatfurus","gununyalanlari.com","teyitorg","ow.ly","te","nin","www.facebook.com","events","den","556632e33ced8","photo","tr","www.patreon.com", "evrimagaci","evrimagaci","status","wp.me","p1ufar","as","en.m.wikipedia.org","isil_arican","tevfik_uyar","c4","b1","agaci","ağaci","twitter","dogrulukpayi","destek.teyit.org","557dce70d4","izlemedeyiz.us6","dahdlx","_milkivey","dlvr.it","buff.ly","gununyalanlari","d8","iddiası","iddiasi","degil","oldugu","social","oldugunu","sitemizden","dogruluk","subscribe","list","manage.com","gore")
one_words <- function(x){
 x%>% select(tweet) %>% filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet) %>%
  filter(!word %in% stopwords("turkish", source = "stopwords-iso"), !word %in%
           custom_stopwords,str_detect(word, "[a-z]")) %>% count(word, sort = TRUE)
}
two_words <- function(x){
  x %>% select(tweet) %>% filter(!str_detect(tweet, '^"')) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(bigram, tweet, token = "ngrams", n = 2) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stopwords("turkish", source = "stopwords-iso"),!word1 %in% custom_stopwords,str_detect(word1, "[a-z]")) %>%
  filter(!word2 %in% stopwords("turkish", source = "stopwords-iso"),!word2 %in% custom_stopwords,str_detect(word2, "[a-z]")) %>%
  unite(bigram,word1, word2, sep = " ")%>%
  count(bigram, sort = TRUE)
}
three_words <- function(x){
  x %>% select(tweet) %>% filter(!str_detect(tweet, '^"')) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(trigram, tweet, token = "ngrams", n = 3) %>% 
  separate(trigram, c("word1", "word2","word3"), sep = " ") %>%
  filter(!word1 %in% stopwords("turkish", source = "stopwords-iso"),!word1 %in% custom_stopwords,str_detect(word1, "[a-z]")) %>%
  filter(!word2 %in% stopwords("turkish", source = "stopwords-iso"),!word2 %in% custom_stopwords,str_detect(word2, "[a-z]")) %>%
  filter(!word3 %in% stopwords("turkish", source = "stopwords-iso"),!word3 %in% custom_stopwords,str_detect(word3, "[a-z]")) %>% 
  unite(trigram, word1, word2,word3, sep = " ")%>%
  count(trigram, sort = TRUE)
}

3.Analysis of Tweets

ONE WORD

Let’s look at the data first

one_words(teyit)
## # A tibble: 22,220 x 2
##    word            n
##    <chr>       <int>
##  1 dogru        1514
##  2 gosterdigi    835
##  3 merhaba       819
##  4 yanlis        660
##  5 tesekkurler   556
##  6 iddia         472
##  7 fotografin    465
##  8 fotograf      417
##  9 yeni          371
## 10 video         361
## # ... with 22,210 more rows

WORDCLOUDS for ONE WORDS

wordcloud of one word for teyit

teyit_cloud <- one_words(teyit) %>% top_n(200,n)

wordcloud2(data = teyit_cloud,
           fontFamily ="Poppins",
           minRotation = -pi/6, 
           maxRotation = -pi/6, 
           rotateRatio = 1.5, 
           size = 1.5)

Most Frequent Words

teyit_viz <-  one_words(teyit) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#6a51a3")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,1700)

dp_viz <- one_words(dpayi) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#fc4e2a")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,4200)

malumat_viz <- one_words(malumatf) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#4292c6")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,800)

evrimag_viz <- one_words(evrimag) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#41ab5d")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,5000)

yalansav_viz <- one_words(yalansav) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#fec44f")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,300)

gy_viz <- one_words(gununyalani) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(word, n),n))+
  geom_col(fill ="#dd3497")+
  coord_flip()+
  geom_text(aes(x = word, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,2600)

plot_grid(teyit_viz, dp_viz,malumat_viz,evrimag_viz,yalansav_viz,gy_viz, labels = c("Teyit", "D.Payı","Malumatfuruş","Evrim Ağacı","Yalansavar","Günün Yalanları"),ncol = 3,label_fontfamily = "Poppins")

#Saving 15 x 9.24 in image


TWO WORDS - BIGRAMS

Now let’s look at bigrams (two words (söz öbeği) to contextualize our findings a bit more

two_words(teyit)
## # A tibble: 46,500 x 2
##    bigram                      n
##    <chr>                   <int>
##  1 sosyal medyada            194
##  2 yanlis bilgi              193
##  3 haftanin dogrulari        183
##  4 soz konusu                169
##  5 dogrulari yanlislari      166
##  6 iddia edilen              160
##  7 gecen haftanin            114
##  8 tesekkurler merhaba       102
##  9 iddiasiyla paylasilan      99
## 10 erisebilirsiniz ilginiz    93
## # ... with 46,490 more rows

WORDCLOUDS for BIGRAMS

bigram wordcloud for teyit

teyit_obek_cloud <- two_words(teyit) %>% top_n(200,n)

wordcloud2(data = teyit_obek_cloud,
           fontFamily ="Poppins",
           minRotation = -pi/6, 
           maxRotation = -pi/6, 
           rotateRatio = 1.5, 
           size = 1.5)

Most Frequent Bigrams

teyit_viz2 <-  two_words(teyit) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#6a51a3")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,250)

dp_viz2 <- two_words(dpayi) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#fc4e2a")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,1300)

malumat_viz2 <- two_words(malumatf) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#4292c6")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,450)

evrimag_viz2 <- two_words(evrimag) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#41ab5d")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,600)

yalansav2_viz <- two_words(yalansav) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#fec44f")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,150)

gy_viz2 <- two_words(gununyalani) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(bigram, n),n))+
  geom_col(fill ="#e7298a")+
  coord_flip()+
  geom_text(aes(x = bigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,450)

plot_grid(teyit_viz2, dp_viz2,malumat_viz2,evrimag_viz2, yalansav2_viz,gy_viz2, ncol = 3, labels = c('Teyit bigram', 'D.Payı bigram',"Malumatfuruş bigram","Evrim ağacı bigram","Yalansavar bigram","Günün yalanları bigram"),label_fontfamily = "Poppins")

THREE WORDS - TRIGRAM

three_words(dpayi)
## # A tibble: 35,847 x 2
##    trigram                            n
##    <chr>                          <int>
##  1 recep tayyip erdogan             180
##  2 kisi basina dusen                163
##  3 iddia kontrolu recep             160
##  4 kontrolu recep tayyip            156
##  5 sirada yer aliyor                147
##  6 surec nasil ilerliyor            108
##  7 gundemine neler girdi            107
##  8 neler girdi gelin                107
##  9 payi'nin gundemine neler         107
## 10 adresini ziyaret edebilirsiniz   104
## # ... with 35,837 more rows
teyit_viz3 <-  three_words(teyit) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#6a51a3")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,180)

dp_viz3 <- three_words(dpayi) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#fc4e2a")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,200)

malumat_viz3 <- three_words(malumatf) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#4292c6")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,70)

evrimag_viz3 <- three_words(evrimag) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#41ab5d")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,100)

yalansav_viz3 <- three_words(yalansav) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#fec44f")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,25)

gy_viz3 <- three_words(gununyalani) %>%  top_n(20,n) %>% 
  ggplot(aes(fct_reorder(trigram, n),n))+
  geom_col(fill ="#e7298a")+
  coord_flip()+
  geom_text(aes(x = trigram, y = n,label = n),check_overlap = TRUE, hjust = -0.2,size = 3.7,color= "gray25")+
  labs(x="",y="",title ="")+
  theme_poppins()+ylim(0,300)

plot_grid(teyit_viz3, dp_viz3,malumat_viz3,evrimag_viz3, yalansav_viz3,gy_viz3, ncol = 3, labels = c('Teyit trigram', 'D.Payı trigram',"Malumatfuruş trigram","Evrim ağacı trigram","Yalansavar trigram","Günün yalanları trigram"),label_fontfamily = "Poppins")

4. TF-IDF of Tweets

What is tf_idf? (click here)

one_words_tfidf <- function(x){
 x%>% select(name,tweet) %>% filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet) %>%
  filter(!word %in% stopwords("turkish", source = "stopwords-iso"), !word %in%
           custom_stopwords,str_detect(word, "[a-z]")) %>% count(name,word, sort = TRUE)
}
all_data <- bind_rows(dpayi, teyit,evrimag,yalansav,gununyalani,malumatf)
tf_idf_tweets <- all_data %>% one_words_tfidf() %>% bind_tf_idf(word,name,n)
tf_idf_tweets %>% arrange(desc(tf_idf))
## # A tibble: 186,492 x 6
##    name            word                 n      tf   idf  tf_idf
##    <chr>           <chr>            <int>   <dbl> <dbl>   <dbl>
##  1 Dogruluk Payi   beyanat           3863 0.0235  1.79  0.0422 
##  2 Günün Yalanlari yalani            2292 0.0215  0.405 0.00871
##  3 Dogruluk Payi   bulten            3116 0.0190  0.405 0.00770
##  4 Günün Yalanlari carpitmasi         572 0.00536 1.10  0.00589
##  5 Yalansavar      tam2013            123 0.00267 1.79  0.00479
##  6 Yalansavar      tam2014             77 0.00167 1.79  0.00300
##  7 Malumatfurus    yanlislama         217 0.00214 1.10  0.00235
##  8 Günün Yalanlari afrinoperasyonu    138 0.00129 1.79  0.00232
##  9 Teyit           coronavirusfacts   125 0.00127 1.79  0.00228
## 10 Evrim Agaci     evrimi             748 0.00185 1.10  0.00204
## # ... with 186,482 more rows

tf_idf chart

tf_idf_tweets %>% arrange(desc(tf_idf)) %>%
  mutate(word = factor(word, levels = rev(unique(word)))) %>%
  group_by(name) %>% 
  top_n(15) %>% 
  ungroup() %>%
  ggplot(aes(word, tf_idf, fill = name)) +
  geom_col(show.legend = FALSE) +
  labs(x = NULL, y = "tf-idf") +
  facet_wrap(~name, ncol = 2, scales = "free") +
  coord_flip()+theme_poppins()

5. Bigrams Network

easy way through functions

ag_ggraph <- function(x,b){
  set.seed(123)
  a <- grid::arrow(type = "closed", length = unit(0.1, "inches"))
  x %>% select(tweet) %>% filter(!str_detect(tweet, '^"')) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(bigram, tweet, token = "ngrams", n = 2) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stopwords("turkish", source = "stopwords-iso"),!word1 %in% custom_stopwords,str_detect(word1, "[a-z]")) %>%
  filter(!word2 %in% stopwords("turkish", source = "stopwords-iso"),!word2 %in% custom_stopwords,str_detect(word2, "[a-z]")) %>%
  count(word1, word2, sort = TRUE)%>% 
    filter(n >30) %>%
    graph_from_data_frame() %>% 
    ggraph(layout = "fr") +
    geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                 arrow = a, end_cap = circle(.07, 'inches')) +
    geom_node_point(color =b, size = 3) +
    geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
    theme_void()
}
ag_ggraph(dpayi,"lightblue")

the time-consuming way

bigrams_count <- function(x){
  x %>% select(tweet) %>% filter(!str_detect(tweet, '^"')) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", "")) %>%
  unnest_tokens(bigram, tweet, token = "ngrams", n = 2) %>% 
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% stopwords("turkish", source = "stopwords-iso"),!word1 %in% custom_stopwords,str_detect(word1, "[a-z]")) %>%
  filter(!word2 %in% stopwords("turkish", source = "stopwords-iso"),!word2 %in% custom_stopwords,str_detect(word2, "[a-z]")) %>%
  count(word1, word2, sort = TRUE) 
}
dpayi_bigram <- bigrams_count(dpayi)

dpayi_bigram
## # A tibble: 42,840 x 3
##    word1    word2        n
##    <chr>    <chr>    <int>
##  1 iddia    kontrolu  1070
##  2 bulten   turkiye    334
##  3 ak       parti      279
##  4 tesekkur ederiz     267
##  5 yer      aliyor     254
##  6 sirada   yer        227
##  7 recep    tayyip     224
##  8 basina   dusen      211
##  9 tayyip   erdogan    181
## 10 kisi     basina     179
## # ... with 42,830 more rows
bigram_graph <- dpayi_bigram %>%
  filter(n > 40) %>%
  graph_from_data_frame()

bigram_graph
## IGRAPH 4d69294 DN-- 160 125 -- 
## + attr: name (v/c), n (e/n)
## + edges from 4d69294 (vertex names):
##  [1] iddia     ->kontrolu      bulten    ->turkiye      
##  [3] ak        ->parti         tesekkur  ->ederiz       
##  [5] yer       ->aliyor        sirada    ->yer          
##  [7] recep     ->tayyip        basina    ->dusen        
##  [9] tayyip    ->erdogan       kisi      ->basina       
## [11] kontrolu  ->recep         analiz    ->edilen       
## [13] fact      ->checking      takip     ->edebilirsiniz
## [15] ulke      ->arasinda      analiz    ->ettik        
## + ... omitted several edges
set.seed(123)

a <- grid::arrow(type = "closed", length = unit(0.1, "inches"))

ggraph(bigram_graph, layout = "fr") +
  geom_edge_link(aes(edge_alpha = n), show.legend = FALSE,
                 arrow = a, end_cap = circle(.07, 'inches')) +
  geom_node_point(color = "lightblue", size = 3) +
  geom_node_text(aes(label = name), vjust = 1, hjust = 1) +
  theme_void()
knitr::include_graphics("images/dp_bigram_network.png")

6.Networks of Top Hashtags and Users

tweet_data <- bind_rows(dpayi, teyit,evrimag,yalansav,gununyalani,dogrusune, malumatf)
head(tweet_data)
## # A tibble: 6 x 34
##        id conversation_id created_at date       time     timezone user_id
##     <dbl>           <dbl>      <dbl> <date>     <time>   <chr>      <dbl>
## 1 1.27e18         1.27e18    1.59e12 2020-06-16 08:33:12 EDT       2.48e9
## 2 1.27e18         1.27e18    1.59e12 2020-06-16 08:28:12 EDT       2.48e9
## 3 1.27e18         1.27e18    1.59e12 2020-06-16 06:57:55 EDT       2.48e9
## 4 1.27e18         1.27e18    1.59e12 2020-06-15 05:51:32 EDT       2.48e9
## 5 1.27e18         1.27e18    1.59e12 2020-06-14 13:35:09 EDT       2.48e9
## 6 1.27e18         1.27e18    1.59e12 2020-06-13 12:58:27 EDT       2.48e9
## # ... with 27 more variables: username <chr>, name <chr>, place <lgl>,
## #   tweet <chr>, mentions <chr>, urls <chr>, photos <chr>, replies_count <dbl>,
## #   retweets_count <dbl>, likes_count <dbl>, hashtags <chr>, cashtags <chr>,
## #   link <chr>, retweet <lgl>, quote_url <chr>, video <dbl>, near <lgl>,
## #   geo <lgl>, source <lgl>, user_rt_id <lgl>, user_rt <lgl>, retweet_id <lgl>,
## #   reply_to <chr>, retweet_date <lgl>, translate <lgl>, trans_src <lgl>,
## #   trans_dest <lgl>
tag_data <-  tweet_data %>% select(id,name,tweet) %>% 
  filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet =str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet, token = "tweets" ) %>% filter(str_detect(word, "#"))

saveRDS(tag_data,"tagdata.rds")
head(tag_data)
## # A tibble: 6 x 3
##        id name          word        
##     <dbl> <chr>         <chr>       
## 1 1.27e18 Dogruluk Payi #dp60saniye 
## 2 1.26e18 Dogruluk Payi #dp60saniye 
## 3 1.26e18 Dogruluk Payi #sigarazammi
## 4 1.26e18 Dogruluk Payi #dp60saniye 
## 5 1.26e18 Dogruluk Payi #dp60saniye 
## 6 1.26e18 Dogruluk Payi #enflasyon
users_data <- tweet_data %>% select(id,name,tweet) %>% 
  filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet =str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet, token = "tweets" ) %>% filter(str_detect(word, "@"))

saveRDS(users_data,"mentioneddata.rds")
head(users_data)
## # A tibble: 6 x 3
##        id name          word           
##     <dbl> <chr>         <chr>          
## 1 1.27e18 Dogruluk Payi @kosartanerin  
## 2 1.26e18 Dogruluk Payi @kosartanerin  
## 3 1.26e18 Dogruluk Payi @kondaarastirma
## 4 1.24e18 Dogruluk Payi @verikaynagi   
## 5 1.23e18 Dogruluk Payi @kondaarastirma
## 6 1.22e18 Dogruluk Payi @idemahaber

This guide helps a lot

quanteda_network <- function(x,y){
 tag_dfm <- dfm_select(dfm(x$tweet, remove_punct = TRUE), pattern = ("#*"))
 toptag <- names(topfeatures(tag_dfm, 70))
 topgat_fcm <- fcm_select(fcm(tag_dfm), pattern = toptag)
 return(textplot_network(topgat_fcm, 
                 min_freq = 0.1, 
                 edge_alpha = 0.8, 
                 edge_size = 5,
                 edge_color = y,
                 vertex_labelfont ="Poppins",
                 vertex_labelsize = 4))
}
quanteda_network(dpayi,"#3690c0")


quanteda_users <- function(x,y){
 tag_dfm <- dfm_select(dfm(x$tweet, remove_punct = TRUE), pattern = ("@*"))
 toptag <- names(topfeatures(tag_dfm, 70))
 topgat_fcm <- fcm_select(fcm(tag_dfm), pattern = toptag)
 return(textplot_network(topgat_fcm, 
                 min_freq = 0.1, 
                 edge_alpha = 0.8, 
                 edge_size = 5,
                 edge_color = y,
                 vertex_labelfont ="Poppins",
                 vertex_labelsize = 4))
}
quanteda_users(dpayi, "#3690c0")

6. STM on Tweets

library(stm)
## stm v1.3.5 successfully loaded. See ?stm for help. 
##  Papers, resources, and other materials at structuraltopicmodel.com
oneword_stm <- function(x){
 x%>% select(username, tweet) %>% filter(!str_detect(tweet, '^"'),!str_detect(tweet,"^'")) %>%
  mutate(tweet = str_replace_all(tweet, "https://t.co/[A-Za-z\\d]+|&amp;", ""))%>%
  unnest_tokens(word, tweet) %>%
  filter(!word %in% stopwords("turkish", source = "stopwords-iso"), !word %in%
           custom_stopwords,str_detect(word, "[a-z]")) %>% count(username,word, sort = TRUE)
}
datastm <- oneword_stm(tweet_data) %>% filter(!username =="dogrusunetrt")
datastm
## # A tibble: 186,492 x 3
##    username        word        n
##    <chr>           <chr>   <int>
##  1 evrimagaci      evrim    4272
##  2 dogrulukpayicom beyanat  3863
##  3 dogrulukpayicom bulten   3116
##  4 evrimagaci      nasil    2387
##  5 gununyalanlari  yalani   2292
##  6 dogrulukpayicom turkiye  2290
##  7 gununyalanlari  yalan    2230
##  8 evrimagaci      bilim    1939
##  9 evrimagaci      nedir    1771
## 10 evrimagaci      okumak   1762
## # ... with 186,482 more rows
fc_dfm <- datastm %>%
    cast_dfm(username, word, n)
fc_dfm
## Document-feature matrix of: 6 documents, 131,815 features (76.4% sparse).
##                  features
## docs              evrim beyanat bulten nasil yalani turkiye yalan bilim nedir
##   evrimagaci       4272       0      1  2387     12     357   111  1939  1771
##   dogrulukpayicom     7    3863   3116   288      0    2290     6     2    88
##   gununyalanlari      1       0      0    45   2292     337  2230     9     2
##   teyitorg            0       0     14   248      0     144   103    46    14
##   malumatfurusorg     8       0      3    59      3     142    64    30    19
##   yalansavar         12       0      0   138      8       3    56   180    36
##                  features
## docs              okumak
##   evrimagaci        1762
##   dogrulukpayicom     66
##   gununyalanlari       0
##   teyitorg            23
##   malumatfurusorg      2
##   yalansavar           3
## [ reached max_nfeat ... 131,805 more features ]
topic_model <- stm(fc_dfm, K = 10, 
                   verbose = FALSE, init.type = "Spectral")
summary(topic_model)
## A topic model with 10 topics, 6 documents and a 131815 word dictionary.
## Topic 1 Top Words:
##       Highest Prob: beyanat, bulten, turkiye, turkiye'de, iddia, sayisi, kontrolu 
##       FREX: beyanat, hukumetre, dp60saniye, mv, mailchi.mp, ahmet_davutoglu, bultenlerini 
##       Lift: 0ks7jtupak0, 10aralikdunyainsanhaklarigunu, 13bow7v, 1bjf9ne, 1fnoll6, 3amp1ublvky, 5487fbbbcc2e9 
##       Score: beyanat, bulten, payi'nin, hukumetre, chp, dp60saniye, ihracat 
## Topic 2 Top Words:
##       Highest Prob: yalani, yalan, dogru, carpitmasi, tarafindan, iddiasiyla, servis 
##       FREX: afrinoperasyonu, gundem'in, yandaslari, kiraz, sozcunun, eymur, kirca 
##       Lift: 10ar, 10rzclpcnk2rjlw067_8aw, 15temmuzuanlat, 2016yalanlari, 22_11_2017_suriye_gbm_bilgi_notu.pdf, abden, acikkollu'nun 
##       Score: yalani, carpitmasi, afrinoperasyonu, cumhuriyet'in, yalan, iddiasiyla, yalanladi 
## Topic 3 Top Words:
##       Highest Prob: dogru, gosterdigi, merhaba, yanlis, tesekkurler, iddia, fotografin 
##       FREX: coronavirusfacts, desteklediginiz, ilgin, kutunuzda, teyitlendin, teyitciyi, teyitpedia 
##       Lift: 021243accdd8, 0iwzw46xo3cyaypkaksfw1, 0kbco73nlfbjtudoctbr3o, 0mblwhastge, 0vdpjnrlc3hrj2wohoypcn, 16lara, 1789da 
##       Score: gosterdigi, merhaba, coronavirusfacts, yayimladigimiz, ilginiz, fotografin, tesekkurler 
## Topic 4 Top Words:
##       Highest Prob: evrim, nasil, okumak, nedir, bilim, insan, fotograf 
##       FREX: evrimin, posted, secilim, evrimlesti, dar.vin, boyda, cmb 
##       Lift: 23andme, posted, evrimin, secilim, evrimlesti, dar.vin, boyda 
##       Score: evrim, evrimi, evrimsel, evrimin, okumak, posted, covid19 
## Topic 5 Top Words:
##       Highest Prob: kose, dogru, yazarlari, yanlis, dogrulama, koronavirus, iddia 
##       FREX: tarihtebugun, aktarmis, kosemenler, keciboynuzu, hazar, ozdil, padisah 
##       Lift: __tn__, _aamirkhan, _devapartisi, _e2r2volqva, _ilkeli_, _nediyoyabu_, _notallthosewho'ye 
##       Score: kose, yanlislama, yazarlari, tarihtebugun, aktarmis, dogrulama, kosemenler 
## Topic 6 Top Words:
##       Highest Prob: yeni, bilim, nasil, su, son, dogru, bilimsel 
##       FREX: derisini, desenleri, desteklediklerimiz, dibinde, dinozorlari, donukluk, dostumdur 
##       Lift: astral, gezegenlerden, paranormal, anneleri, aman, cocukluk, nye 
##       Score: kacirmayin, yazi, tesekkurler, makale, mesela, p, bilimin 
## Topic 7 Top Words:
##       Highest Prob: tartisirken, yeni, bilim, nasil, su, yazi, bilimsel 
##       FREX: tartisirken, yazi, anlatiyor, cogu, mesela, tip, yazdi 
##       Lift: tartisirken, bods, pekcok, skeptik, shermer, novella, derkenar 
##       Score: tartisirken, tam2013, tam2014, sbasegmez, mkozturk, yazi, sozdebilim 
## Topic 8 Top Words:
##       Highest Prob: animsayalim, yeni, bilim, nasil, yazi, su, bilimsel 
##       FREX: animsayalim, tam2013, yazdi, tam2014, mkozturk, sbasegmez, homeopati 
##       Lift: animsayalim, ___ceka___, _burkmez, _burkmez'den, _burkmez'in, _dwqflaibxy, _encoding 
##       Score: animsayalim, tam2013, tam2014, sbasegmez, mkozturk, sozdebilim, csicon 
## Topic 9 Top Words:
##       Highest Prob: tivit, yeni, bilim, nasil, yazi, su, bilimsel 
##       FREX: tivit, tam2013, yazdi, tam2014, mkozturk, sbasegmez, homeopati 
##       Lift: tivit, ___ceka___, _burkmez, _burkmez'den, _dwqflaibxy, _encoding, _i_d 
##       Score: tivit, tam2013, tam2014, sbasegmez, mkozturk, sozdebilim, csicon 
## Topic 10 Top Words:
##       Highest Prob: yeni, bilim, nasil, yazi, tam2013, yazdi, su 
##       FREX: tam2013, tam2014, yazdi, mkozturk, sbasegmez, homeopati, elestirel 
##       Lift: tumertopal, 3balfx, 5b, 7b, 7d, affedin, arsenicum 
##       Score: tam2013, tam2014, sbasegmez, mkozturk, sozdebilim, csicon, burkmez
td_beta <- tidy(topic_model)
## Warning: `tbl_df()` is deprecated as of dplyr 1.0.0.
## Please use `tibble::as_tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.