Prof. Walmes Marques Zeviani
02 Mar 2017
XML
.Documentação: http://www.omdbapi.com/.
# Consulta informações sobre o filme Titanic.
url <- "http://www.omdbapi.com/?t=%s&r=xml"
url <- sprintf(url, "titanic")
browseURL(url)
# Faz busca por filmes com o termo batman.
url <- "http://www.omdbapi.com/?s=%s&r=xml"
url <- sprintf(url, "batman")
browseURL(url)
# Número de ocorrências para no intervalo fornecido.
url <- paste0("https://earthquake.usgs.gov/fdsnws/event/1/",
"count?",
"format=xml&",
"starttime=2017-02-02T12:00:00&",
"endtime=2017-02-02T13:00:00")
browseURL(url)
# Detalhes das ocorrências no intervalo fornecido.
url <- paste0("https://earthquake.usgs.gov/fdsnws/event/1/",
"query?",
"format=xml&",
"starttime=2017-02-02T12:00:00&",
"endtime=2017-02-02T13:00:00")
browseURL(url)
# JSON com informações de Curitiba.
# {"_id": 6322752,
# "name": "Curitiba",
# "country": "BR",
# "coord": {"lon": -49.290821, "lat": -25.50395}}
url <- paste0("http://samples.openweathermap.org/data/2.5/",
"weather?id=%d&appid=%s")
url <- sprintf(url,
6322752,
"6eb4d970f22e507866028152781d8dd5")
url
browseURL(url)
# Faz busca por filmes com o termo batman.
url <- "http://www.omdbapi.com/?s=%s&r=xml"
# url <- sprintf(url, "rocky")
# url <- sprintf(url, "terminator")
# url <- sprintf(url, "rambo")
url <- sprintf(url, "resident evil")
url
## [1] "http://www.omdbapi.com/?s=resident evil&r=xml"
# browseURL(url)
library(XML)
doc <- xmlParse(url)
# Classe do objeto.
class(doc)
## [1] "XMLInternalDocument" "XMLAbstractDocument"
# Métodos disponíveis para a classe.
methods(class = "XMLInternalDocument")
## [1] addChildren coerce
## [3] docName<- docName
## [5] free getEncoding
## [7] initialize print
## [9] processXInclude readKeyValueDB
## [11] readSolrDoc saveXML
## [13] show slotsFromS3
## [15] summary xmlChildren
## [17] xmlClone [
## [19] [[ xmlNamespaceDefinitions
## [21] xmlRoot xmlSourceFunctions
## [23] xmlSourceSection xmlSource
## [25] xmlToDataFrame xpathApply
## see '?methods' for accessing help and source code
# Nome do arquivo origem.
docName(doc)
## [1] "http://www.omdbapi.com/?s=resident%20evil&r=xml"
# Mostra o conteúdo.
show(doc)
## <?xml version="1.0" encoding="utf-8"?>
## <root totalResults="67" response="True">
## <result title="Resident Evil" year="2002" imdbID="tt0120804" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BN2Y2MTljNjMtMDRlNi00ZWNhLThmMWItYTlmZjYyZDk4NzYxXkEyXkFqcGdeQXVyNjQ2MjQ5NzM@._V1_SX300.jpg"/>
## <result title="Resident Evil: Apocalypse" year="2004" imdbID="tt0318627" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTc1NTUxMzk0Nl5BMl5BanBnXkFtZTcwNDQ1MDIzMw@@._V1_SX300.jpg"/>
## <result title="Resident Evil: Extinction" year="2007" imdbID="tt0432021" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BNDQ0MWI3MmEtMzM0OC00Y2ViLWE4MDItMzNhNmY1ZTdjMWE2XkEyXkFqcGdeQXVyMTQxNzMzNDI@._V1_SX300.jpg"/>
## <result title="Resident Evil: Afterlife" year="2010" imdbID="tt1220634" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTkxNzM3OTg5OF5BMl5BanBnXkFtZTcwMDA5MDA2Mw@@._V1_SX300.jpg"/>
## <result title="Resident Evil: Retribution" year="2012" imdbID="tt1855325" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTA2NTkwNjUxNTZeQTJeQWpwZ15BbWU3MDE2OTMxMTg@._V1_SX300.jpg"/>
## <result title="Resident Evil: The Final Chapter" year="2016" imdbID="tt2592614" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTc0Mzc2OTQ0Ml5BMl5BanBnXkFtZTgwOTQ5MjE4MDI@._V1_SX300.jpg"/>
## <result title="Resident Evil: Degeneration" year="2008" imdbID="tt1174954" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTgwMDE4NzcxMl5BMl5BanBnXkFtZTcwNjU1NjcwMg@@._V1_SX300.jpg"/>
## <result title="Resident Evil: Damnation" year="2012" imdbID="tt1753496" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BNzg2ODUxOTIzN15BMl5BanBnXkFtZTcwMDM0NzAzOA@@._V1_SX300.jpg"/>
## <result title="Resident Evil 4" year="2005" imdbID="tt0397042" type="game" poster="http://ia.media-imdb.com/images/M/MV5BMTU1NjY2MDM4Nl5BMl5BanBnXkFtZTgwMzk0MTU3MDE@._V1_SX300.jpg"/>
## <result title="Resident Evil 5" year="2009" imdbID="tt0473298" type="game" poster="http://ia.media-imdb.com/images/M/MV5BMTk3MDMzNzMxMV5BMl5BanBnXkFtZTcwNDYxNTMxNg@@._V1_SX300.jpg"/>
## </root>
##
root <- xmlRoot(doc)
# Classe do objeto.
class(root)
## [1] "XMLInternalElementNode" "XMLInternalNode"
## [3] "XMLAbstractNode"
# Métodos disponíveis para a classe.
methods(class = "XMLInternalElementNode")
## [1] addAttributes coerce docName
## [4] getEncoding initialize processXInclude
## [7] readHTMLList readHTMLTable readKeyValueDB
## [10] readSolrDoc removeAttributes removeXMLNamespaces
## [13] saveXML show slotsFromS3
## [16] xmlAttrs<- xmlChildren<- xmlClone
## [19] [[ xmlNamespaces<- xmlNamespace<-
## [22] xmlName<- xmlParent xmlToDataFrame
## [25] xmlToS4 xmlValue<-
## see '?methods' for accessing help and source code
# Mostra o conteúdo.
show(root)
## <root totalResults="67" response="True">
## <result title="Resident Evil" year="2002" imdbID="tt0120804" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BN2Y2MTljNjMtMDRlNi00ZWNhLThmMWItYTlmZjYyZDk4NzYxXkEyXkFqcGdeQXVyNjQ2MjQ5NzM@._V1_SX300.jpg"/>
## <result title="Resident Evil: Apocalypse" year="2004" imdbID="tt0318627" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTc1NTUxMzk0Nl5BMl5BanBnXkFtZTcwNDQ1MDIzMw@@._V1_SX300.jpg"/>
## <result title="Resident Evil: Extinction" year="2007" imdbID="tt0432021" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BNDQ0MWI3MmEtMzM0OC00Y2ViLWE4MDItMzNhNmY1ZTdjMWE2XkEyXkFqcGdeQXVyMTQxNzMzNDI@._V1_SX300.jpg"/>
## <result title="Resident Evil: Afterlife" year="2010" imdbID="tt1220634" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTkxNzM3OTg5OF5BMl5BanBnXkFtZTcwMDA5MDA2Mw@@._V1_SX300.jpg"/>
## <result title="Resident Evil: Retribution" year="2012" imdbID="tt1855325" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTA2NTkwNjUxNTZeQTJeQWpwZ15BbWU3MDE2OTMxMTg@._V1_SX300.jpg"/>
## <result title="Resident Evil: The Final Chapter" year="2016" imdbID="tt2592614" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTc0Mzc2OTQ0Ml5BMl5BanBnXkFtZTgwOTQ5MjE4MDI@._V1_SX300.jpg"/>
## <result title="Resident Evil: Degeneration" year="2008" imdbID="tt1174954" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BMTgwMDE4NzcxMl5BMl5BanBnXkFtZTcwNjU1NjcwMg@@._V1_SX300.jpg"/>
## <result title="Resident Evil: Damnation" year="2012" imdbID="tt1753496" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BNzg2ODUxOTIzN15BMl5BanBnXkFtZTcwMDM0NzAzOA@@._V1_SX300.jpg"/>
## <result title="Resident Evil 4" year="2005" imdbID="tt0397042" type="game" poster="http://ia.media-imdb.com/images/M/MV5BMTU1NjY2MDM4Nl5BMl5BanBnXkFtZTgwMzk0MTU3MDE@._V1_SX300.jpg"/>
## <result title="Resident Evil 5" year="2009" imdbID="tt0473298" type="game" poster="http://ia.media-imdb.com/images/M/MV5BMTk3MDMzNzMxMV5BMl5BanBnXkFtZTcwNDYxNTMxNg@@._V1_SX300.jpg"/>
## </root>
# Nome do elemento raíz.
xmlName(root)
## [1] "root"
# Quantos filhos (child) possui.
xmlSize(root)
## [1] 10
# Atributos do elemento raíz.
xmlAttrs(root)
## totalResults response
## "67" "True"
kiva <- "http://api.kivaws.org/v1/teams/2/lenders.xml"
doc <- xmlParse(kiva)
root <- xmlRoot(doc)
xmlSize(root)
## [1] 2
xmlName(root)
## [1] "response"
# Seleciona o primeiro filho. Qual a diferença?
root[1]
## $paging
## <paging>
## <page>1</page>
## <total>68</total>
## <page_size>50</page_size>
## <pages>2</pages>
## </paging>
##
## attr(,"class")
## [1] "XMLInternalNodeList" "XMLNodeList"
root[[1]]
## <paging>
## <page>1</page>
## <total>68</total>
## <page_size>50</page_size>
## <pages>2</pages>
## </paging>
# Extraindo o conteúdo do elemento.
xmlValue(root[[1]])
## [1] "168502"
root[[1]]["total"]
## $total
## <total>68</total>
##
## attr(,"class")
## [1] "XMLInternalNodeList" "XMLNodeList"
root[[1]][["total"]]
## <total>68</total>
xmlValue(root[[1]][["total"]])
## [1] "68"
url <- "http://www.omdbapi.com/?s=%s&r=xml"
url <- sprintf(url, "resident evil")
url
## [1] "http://www.omdbapi.com/?s=resident evil&r=xml"
# browseURL(url)
doc <- xmlParse(url)
docName(doc)
## [1] "http://www.omdbapi.com/?s=resident%20evil&r=xml"
root <- xmlRoot(doc)
xmlName(root)
## [1] "root"
xmlSize(root)
## [1] 10
# Pega os atributos do elemento raíz.
xmlAttrs(root)
## totalResults response
## "67" "True"
# Nome dos filhos.
names(root)
## result result result result result result result
## "result" "result" "result" "result" "result" "result" "result"
## result result result
## "result" "result" "result"
# Seleciona o primeiro filho.
root[[1]]
## <result title="Resident Evil" year="2002" imdbID="tt0120804" type="movie" poster="https://images-na.ssl-images-amazon.com/images/M/MV5BN2Y2MTljNjMtMDRlNi00ZWNhLThmMWItYTlmZjYyZDk4NzYxXkEyXkFqcGdeQXVyNjQ2MjQ5NzM@._V1_SX300.jpg"/>
# Extrai todos os atributos.
xmlAttrs(root[[1]])
## title
## "Resident Evil"
## year
## "2002"
## imdbID
## "tt0120804"
## type
## "movie"
## poster
## "https://images-na.ssl-images-amazon.com/images/M/MV5BN2Y2MTljNjMtMDRlNi00ZWNhLThmMWItYTlmZjYyZDk4NzYxXkEyXkFqcGdeQXVyNjQ2MjQ5NzM@._V1_SX300.jpg"
# Extrai apenas o atributo especificado.
xmlGetAttr(root[[1]], name = "title")
## [1] "Resident Evil"
xmlGetAttr(root[[1]], name = "year")
## [1] "2002"
# Cria uma lista onde cada child (filme) é um elemento da lista.
child <- xmlChildren(root)
str(child)
## List of 10
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## $ result:Classes 'XMLInternalElementNode', 'XMLInternalNode', 'XMLAbstractNode' <externalptr>
## - attr(*, "class")= chr [1:2] "XMLInternalNodeList" "XMLNodeList"
f <- function(node) {
c(title = xmlGetAttr(node, name = "title"),
year = xmlGetAttr(node, name = "year"))
}
# Aplica uma função em cada elemento da lista.
sapply(child, FUN = f)
## result result
## title "Resident Evil" "Resident Evil: Apocalypse"
## year "2002" "2004"
## result result
## title "Resident Evil: Extinction" "Resident Evil: Afterlife"
## year "2007" "2010"
## result
## title "Resident Evil: Retribution"
## year "2012"
## result
## title "Resident Evil: The Final Chapter"
## year "2016"
## result result
## title "Resident Evil: Degeneration" "Resident Evil: Damnation"
## year "2008" "2012"
## result result
## title "Resident Evil 4" "Resident Evil 5"
## year "2005" "2009"
# xmlApply(x, ...) = lapply(xmlChildren(x), ...)
xmlApply(root, FUN = f)
## $result
## title year
## "Resident Evil" "2002"
##
## $result
## title year
## "Resident Evil: Apocalypse" "2004"
##
## $result
## title year
## "Resident Evil: Extinction" "2007"
##
## $result
## title year
## "Resident Evil: Afterlife" "2010"
##
## $result
## title year
## "Resident Evil: Retribution" "2012"
##
## $result
## title
## "Resident Evil: The Final Chapter"
## year
## "2016"
##
## $result
## title year
## "Resident Evil: Degeneration" "2008"
##
## $result
## title year
## "Resident Evil: Damnation" "2012"
##
## $result
## title year
## "Resident Evil 4" "2005"
##
## $result
## title year
## "Resident Evil 5" "2009"
# xmlSApply(x, ...) = sapply(xmlChildren(x), ...)
# S: simplifica quando possível.
xmlSApply(root, FUN = f)
## result result
## title "Resident Evil" "Resident Evil: Apocalypse"
## year "2002" "2004"
## result result
## title "Resident Evil: Extinction" "Resident Evil: Afterlife"
## year "2007" "2010"
## result
## title "Resident Evil: Retribution"
## year "2012"
## result
## title "Resident Evil: The Final Chapter"
## year "2016"
## result result
## title "Resident Evil: Degeneration" "Resident Evil: Damnation"
## year "2008" "2012"
## result result
## title "Resident Evil 4" "Resident Evil 5"
## year "2005" "2009"
placat <- "https://www.w3schools.com/Xml/plant_catalog.xml"
library(RCurl)
doc <- xmlParse(getURL(placat))
root <- xmlRoot(doc)
xmlSize(root)
## [1] 36
xmlName(root)
## [1] "CATALOG"
names(root)
## PLANT PLANT PLANT PLANT PLANT PLANT PLANT PLANT
## "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT"
## PLANT PLANT PLANT PLANT PLANT PLANT PLANT PLANT
## "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT"
## PLANT PLANT PLANT PLANT PLANT PLANT PLANT PLANT
## "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT"
## PLANT PLANT PLANT PLANT PLANT PLANT PLANT PLANT
## "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT" "PLANT"
## PLANT PLANT PLANT PLANT
## "PLANT" "PLANT" "PLANT" "PLANT"
root[[1]]
## <PLANT>
## <COMMON>Bloodroot</COMMON>
## <BOTANICAL>Sanguinaria canadensis</BOTANICAL>
## <ZONE>4</ZONE>
## <LIGHT>Mostly Shady</LIGHT>
## <PRICE>$2.44</PRICE>
## <AVAILABILITY>031599</AVAILABILITY>
## </PLANT>
names(root[[1]])
## COMMON BOTANICAL ZONE LIGHT
## "COMMON" "BOTANICAL" "ZONE" "LIGHT"
## PRICE AVAILABILITY
## "PRICE" "AVAILABILITY"
root[[1]][["BOTANICAL"]]
## <BOTANICAL>Sanguinaria canadensis</BOTANICAL>
# xmlSApply(root,
# FUN = function(node) {
# node[["PRICE"]]
# })
# Idem ao código acima comentado.
head(xmlSApply(root, FUN = "[[", "PRICE"))
## $PLANT
## <PRICE>$2.44</PRICE>
##
## $PLANT
## <PRICE>$9.37</PRICE>
##
## $PLANT
## <PRICE>$6.81</PRICE>
##
## $PLANT
## <PRICE>$9.90</PRICE>
##
## $PLANT
## <PRICE>$6.44</PRICE>
##
## $PLANT
## <PRICE>$9.03</PRICE>
xmlSApply(root,
FUN = function(node) {
xmlValue(node[["PRICE"]])
})
## PLANT PLANT PLANT PLANT PLANT PLANT PLANT PLANT
## "$2.44" "$9.37" "$6.81" "$9.90" "$6.44" "$9.03" "$4.45" "$3.99"
## PLANT PLANT PLANT PLANT PLANT PLANT PLANT PLANT
## "$3.23" "$2.98" "$2.80" "$5.59" "$6.59" "$3.90" "$3.20" "$9.04"
## PLANT PLANT PLANT PLANT PLANT PLANT PLANT PLANT
## "$6.94" "$9.58" "$8.86" "$9.16" "$4.59" "$7.16" "$9.80" "$2.57"
## PLANT PLANT PLANT PLANT PLANT PLANT PLANT PLANT
## "$9.34" "$2.78" "$7.06" "$6.56" "$7.81" "$8.56" "$9.26" "$4.36"
## PLANT PLANT PLANT PLANT
## "$7.89" "$8.60" "$5.63" "$3.02"
doc <- xmlParse("http://api.kivaws.org/v1/lenders/matt.xml")
summary(doc)
## $nameCounts
##
## country_code id image
## 1 1 1
## invitee_count lender lender_id
## 1 1 1
## lenders loan_because loan_count
## 1 1 1
## member_since name occupation
## 1 1 1
## occupational_info personal_url response
## 1 1 1
## template_id uid whereabouts
## 1 1 1
##
## $numNodes
## [1] 18
root <- xmlRoot(doc)
lend <- root[["lenders"]][["lender"]]
xmlSize(lend)
## [1] 13
names(lend)
## lender_id name image
## "lender_id" "name" "image"
## whereabouts country_code uid
## "whereabouts" "country_code" "uid"
## member_since personal_url occupation
## "member_since" "personal_url" "occupation"
## loan_because occupational_info loan_count
## "loan_because" "occupational_info" "loan_count"
## invitee_count
## "invitee_count"
img <- lend[["image"]]
xmlName(xmlParent(img))
## [1] "lender"
names(xmlChildren(img))
## [1] "id" "template_id"
xmlName(getSibling(img, after = TRUE))
## [1] "whereabouts"
xmlName(getSibling(img, after = FALSE))
## [1] "name"
Função | Classe |
---|---|
xmlParse |
XMLInternalDocument |
xmlRoot |
XMLInternalElementNode |
[ , [[ |
XMLInternalNodeList |
Função | Descrição |
---|---|
xmlName |
Nome do elemento |
xmlSize |
Número de filhos |
xmlAttrs |
Vetor com os atributos |
xmlGetAttr |
Apenas um atributo pelo nome |
xmlValue |
Conteúdo do elemento |
xmlChildren |
Lista com os filhos |
xmlParent |
Elemento pai |
Função | Descrição |
---|---|
xmlApply |
Aplica função aos filhos de um elemento |
xmlSApply |
Idem, mas simplifica quando possível |