Download the files
All the data files and the description file are available on http://base-donnees-publique.medicaments.gouv.fr/telechargement.php.
Data files are .txt
Description file is .pdf
I will use httr to download all of them.
url <- "http://base-donnees-publique.medicaments.gouv.fr/telechargement.php"
Get the page
library(rvest)
## Loading required package: xml2
library(XML)
## Loading required package: methods
##
## Attaching package: 'XML'
## The following object is masked from 'package:rvest':
##
## xml
page <- read_html(url)
Databases URL could be find with files ending by .txt.
These url are in link in a list (ul)
dl_end_url <- page %>%
html_nodes("#container ul a") %>%
html_attr("href")
dl_end_url
## [1] "docs/Contenu_et_format_des_fichiers_telechargeables_dans_la_BDM_v1.pdf"
## [2] "?fichier=CIS_bdpm.txt"
## [3] "?fichier=CIS_CIP_bdpm.txt"
## [4] "?fichier=CIS_COMPO_bdpm.txt"
## [5] "?fichier=CIS_HAS_SMR_bdpm.txt"
## [6] "?fichier=CIS_HAS_ASMR_bdpm.txt"
## [7] "?fichier=HAS_LiensPageCT_bdpm.txt"
## [8] "?fichier=CIS_GENER_bdpm.txt"
## [9] "?fichier=CIS_CPD_bdpm.txt"
## [10] "?fichier=CIS_InfoImportantes.txt"
We don’t want to download the documentation for the moment
# avoid the PDF
data_end_url <- dl_end_url[-grep("pdf", dl_end_url)]
To get the download URL, just paste the base url.
full_url <- function(endurl, base = url) {
paste0(base, endurl)
}
filename <- function(endurl) {
gsub("\\?fichier=", "", x= endurl)
}
We will download all the raw data in a directory
raw_data_dir <- "raw_data"
if(!dir.exists(raw_data_dir)) dir.create(raw_data_dir)
# If the raw data was not already downloaded
if(length(dir(raw_data_dir)) == 0) {
lapply(data_end_url,
function(x)
download.file(
url = full_url(x),
destfile = file.path(raw_data_dir, filename(x))
)
)
}
List the downloaded files
datafiles <- dir(raw_data_dir)
sizes <- file.size(file.path(raw_data_dir, datafiles))
kable(data.frame("File name" = datafiles, "Size (Mo)" = round(sizes / 10^6, 1), check.names = F))
| File name | Size (Mo) |
|---|---|
| CIS_bdpm.txt | 2.7 |
| CIS_CIP_bdpm.txt | 4.0 |
| CIS_COMPO_bdpm.txt | 2.1 |
| CIS_CPD_bdpm.txt | 0.7 |
| CIS_GENER_bdpm.txt | 1.0 |
| CIS_HAS_ASMR_bdpm.txt | 1.6 |
| CIS_HAS_SMR_bdpm.txt | 2.4 |
| CIS_InfoImportantes.txt | 6.5 |
| HAS_LiensPageCT_bdpm.txt | 0.4 |