Difference between revisions of "R: tidytext NASA data"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
Line 39: | Line 39: | ||
unnest_tokens(word, desc) %>% | unnest_tokens(word, desc) %>% | ||
anti_join(stop_words) | anti_join(stop_words) | ||
− | |||
nasa_title | nasa_title | ||
nasa_desc | nasa_desc | ||
+ | |||
+ | |||
+ | |||
+ | nasa_title %>% | ||
+ | count(word, sort = TRUE) | ||
+ | nasa_desc %>% | ||
+ | count(word, sort = TRUE) | ||
+ | |||
+ | |||
+ | my_stopwords <- data_frame(word = c(as.character(1:10), | ||
+ | "v1", "v03", "l2", "l3", "l4", "v5.2.0", | ||
+ | "v003", "v004", "v005", "v006", "v7")) | ||
+ | nasa_title <- nasa_title %>% | ||
+ | anti_join(my_stopwords) | ||
+ | nasa_desc <- nasa_desc %>% | ||
+ | anti_join(my_stopwords) | ||
+ | |||
+ | |||
+ | |||
+ | |||
+ | |||
Revision as of 08:17, 8 November 2018
library(jsonlite) metadata <- fromJSON("https://data.nasa.gov/data.json") names(metadata$dataset)
class(metadata$dataset$title) class(metadata$dataset$description) class(metadata$dataset$keyword)
# ambil judul2 library(dplyr) nasa_title <- data_frame(id = metadata$dataset$`_id`$`$oid`, title = metadata$dataset$title) nasa_title
# sampling 5 line nasa_desc <- data_frame(id = metadata$dataset$`_id`$`$oid`, desc = metadata$dataset$description) nasa_desc %>% select(desc) %>% sample_n(5)
# cari keyword library(tidyr) nasa_keyword <- data_frame(id = metadata$dataset$`_id`$`$oid`, keyword = metadata$dataset$keyword) %>% unnest(keyword) nasa_keyword
library(tidytext) nasa_title <- nasa_title %>% unnest_tokens(word, title) %>% anti_join(stop_words) nasa_desc <- nasa_desc %>% unnest_tokens(word, desc) %>% anti_join(stop_words) nasa_title nasa_desc
nasa_title %>% count(word, sort = TRUE) nasa_desc %>% count(word, sort = TRUE)
my_stopwords <- data_frame(word = c(as.character(1:10), "v1", "v03", "l2", "l3", "l4", "v5.2.0", "v003", "v004", "v005", "v006", "v7")) nasa_title <- nasa_title %>% anti_join(my_stopwords) nasa_desc <- nasa_desc %>% anti_join(my_stopwords)