Difference between revisions of "R: tidytext NASA data"
		
		
		
		
		
		Jump to navigation
		Jump to search
		
				
		
		
	
Onnowpurbo (talk | contribs)  | 
				Onnowpurbo (talk | contribs)   | 
				||
| Line 39: | Line 39: | ||
      unnest_tokens(word, desc) %>%  |       unnest_tokens(word, desc) %>%  | ||
         anti_join(stop_words)  |          anti_join(stop_words)  | ||
| − | |||
  nasa_title  |   nasa_title  | ||
  nasa_desc  |   nasa_desc  | ||
| + | |||
| + | |||
| + | |||
| + |  nasa_title %>%  | ||
| + |      count(word, sort = TRUE)  | ||
| + |  nasa_desc %>%  | ||
| + |      count(word, sort = TRUE)  | ||
| + | |||
| + | |||
| + |  my_stopwords <- data_frame(word = c(as.character(1:10),  | ||
| + |                             "v1", "v03", "l2", "l3", "l4", "v5.2.0",  | ||
| + |                             "v003", "v004", "v005", "v006", "v7"))  | ||
| + |  nasa_title <- nasa_title %>%  | ||
| + |       anti_join(my_stopwords)  | ||
| + |  nasa_desc <- nasa_desc %>%  | ||
| + |       anti_join(my_stopwords)  | ||
| + | |||
| + | |||
| + | |||
| + | |||
| + | |||
Revision as of 08:17, 8 November 2018
library(jsonlite)
metadata <- fromJSON("https://data.nasa.gov/data.json")
names(metadata$dataset)
class(metadata$dataset$title) class(metadata$dataset$description) class(metadata$dataset$keyword)
# ambil judul2
library(dplyr)
nasa_title <- data_frame(id = metadata$dataset$`_id`$`$oid`,
                         title = metadata$dataset$title)
nasa_title
# sampling 5 line
nasa_desc <- data_frame(id = metadata$dataset$`_id`$`$oid`,
                        desc = metadata$dataset$description)
nasa_desc %>%
   select(desc) %>%
      sample_n(5)
# cari keyword
library(tidyr)
nasa_keyword <- data_frame(id = metadata$dataset$`_id`$`$oid`,
                           keyword = metadata$dataset$keyword) %>%
                                 unnest(keyword)
nasa_keyword
library(tidytext)
nasa_title <- nasa_title %>%
    unnest_tokens(word, title) %>%
       anti_join(stop_words)
nasa_desc <- nasa_desc %>%
    unnest_tokens(word, desc) %>%
       anti_join(stop_words)
nasa_title
nasa_desc
nasa_title %>%
    count(word, sort = TRUE)
nasa_desc %>%
    count(word, sort = TRUE)
my_stopwords <- data_frame(word = c(as.character(1:10),
                           "v1", "v03", "l2", "l3", "l4", "v5.2.0",
                           "v003", "v004", "v005", "v006", "v7"))
nasa_title <- nasa_title %>%
     anti_join(my_stopwords)
nasa_desc <- nasa_desc %>%
     anti_join(my_stopwords)