Difference between revisions of "R: tidytext NASA data"
Jump to navigation
Jump to search
Onnowpurbo (talk | contribs) |
Onnowpurbo (talk | contribs) |
||
| Line 29: | Line 29: | ||
unnest(keyword) | unnest(keyword) | ||
nasa_keyword | nasa_keyword | ||
| − | |||
| Line 43: | Line 42: | ||
| − | + | # count & sort | |
nasa_title %>% | nasa_title %>% | ||
count(word, sort = TRUE) | count(word, sort = TRUE) | ||
| Line 50: | Line 49: | ||
| + | # buang keyword yang tidak ada artinya | ||
my_stopwords <- data_frame(word = c(as.character(1:10), | my_stopwords <- data_frame(word = c(as.character(1:10), | ||
"v1", "v03", "l2", "l3", "l4", "v5.2.0", | "v1", "v03", "l2", "l3", "l4", "v5.2.0", | ||
| Line 59: | Line 59: | ||
| + | # supaya OCEAN & Ocean jadi sama | ||
| + | nasa_keyword <- nasa_keyword %>% | ||
| + | mutate(keyword = toupper(keyword)) | ||
Revision as of 08:19, 8 November 2018
library(jsonlite)
metadata <- fromJSON("https://data.nasa.gov/data.json")
names(metadata$dataset)
class(metadata$dataset$title) class(metadata$dataset$description) class(metadata$dataset$keyword)
# ambil judul2
library(dplyr)
nasa_title <- data_frame(id = metadata$dataset$`_id`$`$oid`,
title = metadata$dataset$title)
nasa_title
# sampling 5 line
nasa_desc <- data_frame(id = metadata$dataset$`_id`$`$oid`,
desc = metadata$dataset$description)
nasa_desc %>%
select(desc) %>%
sample_n(5)
# cari keyword
library(tidyr)
nasa_keyword <- data_frame(id = metadata$dataset$`_id`$`$oid`,
keyword = metadata$dataset$keyword) %>%
unnest(keyword)
nasa_keyword
library(tidytext)
nasa_title <- nasa_title %>%
unnest_tokens(word, title) %>%
anti_join(stop_words)
nasa_desc <- nasa_desc %>%
unnest_tokens(word, desc) %>%
anti_join(stop_words)
nasa_title
nasa_desc
# count & sort
nasa_title %>%
count(word, sort = TRUE)
nasa_desc %>%
count(word, sort = TRUE)
# buang keyword yang tidak ada artinya
my_stopwords <- data_frame(word = c(as.character(1:10),
"v1", "v03", "l2", "l3", "l4", "v5.2.0",
"v003", "v004", "v005", "v006", "v7"))
nasa_title <- nasa_title %>%
anti_join(my_stopwords)
nasa_desc <- nasa_desc %>%
anti_join(my_stopwords)
# supaya OCEAN & Ocean jadi sama
nasa_keyword <- nasa_keyword %>%
mutate(keyword = toupper(keyword))