Dies ist der Link zu dem Datensatz: https://www.ecb.europa.eu/press/key/htm ... ds.en.html
Die Ansätze sind gefühlt eher Mist.
Bisherige Ansätze:
Code: Alles auswählen
reden_data <- read.csv("Dateipfad", header = TRUE, sep = "|")
str(reden_data)
library(tm)
corpus <- iconv(reden_data$contents)
corpus <- Corpus(VectorSource(corpus))
corpus <- tm_map(corpus, tolower)
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeNumbers)
cleanset <- tm_map(corpus, removeWords, stopwords('english'))
cleanset <- tm_map(cleanset, stemDocument)
cleanset <- tm_map(cleanset, stripWhitespace)
tdm <- TermDocumentMatrix(cleanset)
tdm <- as.matrix(tdm)
dim(tdm)
library(dplyr)
library(textdata)
library(tidytext)
corpus_text <- sapply(corpus, as.character)
sentiments <- get_sentiments("afinn")
reden_sentiment <- data.frame(word = unlist(strsplit(corpus_text, "\\s+")))
reden_sentiment <- reden_sentiment %>%
left_join(sentiments, by = "word")
# Summieren der Sentiments für jede Rede
reden_sentiment_summary <- reden_sentiment %>%
group_by(word) %>%
summarize(sentiment_score = sum(value))
# Ergebnisse anzeigen
head(reden_sentiment_summary)
Ansatz 2:
#install.packages("NLP")
#library(NLP)
reden_data <- read.csv("Dateipfad", header = TRUE, sep = "|")
#reden_data_cleaned <- reden_data[reden_data$speakers != "", ]
#library(tm)
#library(quanteda)
reden_data$contents <- iconv(reden_data$contents, from = "UTF-8", to = "UTF-8", sub = "")
corpus <- Corpus(VectorSource(reden_data$contents))
# Bereinigung der Daten
corpus <- tm_map(corpus, content_transformer(tolower))
corpus <- tm_map(corpus, removePunctuation)
corpus <- tm_map(corpus, removeNumbers)
corpus <- tm_map(corpus, removeWords, stopwords("german"))
dtm <- DocumentTermMatrix(corpus)
#install.packages("quanteda")
#library(dplyr)
#library(textdata)
#library(tidytext)
# Jetzt wandeln wir corpus in einen Zeichenvektor um
corpus_text <- sapply(corpus, as.character)
# Beispiel: Verwenden des sentimentr-Pakets und eines vordefinierten Wörterbuchs
sentiments <- get_sentiments("afinn")
reden_sentiment <- data.frame(word = unlist(strsplit(corpus_text, "\\s+")))
reden_sentiment <- reden_sentiment %>%
left_join(sentiments, by = "word")
# Summieren der Sentiments für jede Rede
reden_sentiment_summary <- reden_sentiment %>%
group_by(Document) %>%
summarize(sentiment_score = sum(value))
# Ergebnisse anzeigen
head(reden_sentiment_summary)
install.packages("xml2")
library(devtools)