Difference between revisions of "R: tidytext NASA data"

From OnnoWiki
Jump to navigation Jump to search
Line 30: Line 30:
 
  nasa_keyword
 
  nasa_keyword
  
 +
 +
 +
library(tidytext)
 +
nasa_title <- nasa_title %>%
 +
    unnest_tokens(word, title) %>%
 +
        anti_join(stop_words)
 +
nasa_desc <- nasa_desc %>%
 +
    unnest_tokens(word, desc) %>%
 +
        anti_join(stop_words)
 +
 +
nasa_title
 +
nasa_desc
  
  

Revision as of 08:13, 8 November 2018

library(jsonlite)
metadata <- fromJSON("https://data.nasa.gov/data.json")
names(metadata$dataset)
class(metadata$dataset$title)
class(metadata$dataset$description)
class(metadata$dataset$keyword)
# ambil judul2
library(dplyr)
nasa_title <- data_frame(id = metadata$dataset$`_id`$`$oid`,
                         title = metadata$dataset$title)
nasa_title


# sampling 5 line
nasa_desc <- data_frame(id = metadata$dataset$`_id`$`$oid`,
                        desc = metadata$dataset$description)
nasa_desc %>%
   select(desc) %>%
      sample_n(5)


# cari keyword
library(tidyr)
nasa_keyword <- data_frame(id = metadata$dataset$`_id`$`$oid`,
                           keyword = metadata$dataset$keyword) %>%
                                 unnest(keyword)
nasa_keyword


library(tidytext)
nasa_title <- nasa_title %>%
    unnest_tokens(word, title) %>%
       anti_join(stop_words)
nasa_desc <- nasa_desc %>%
    unnest_tokens(word, desc) %>%
       anti_join(stop_words)
nasa_title
nasa_desc


Pranala Menarik