Import the data and convert them in rda files
Data format is describe in this PDF.
In summary :
I will import this tabular data with readr.
First specify the locales
library(readr)
##
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
##
## guess_encoding
# Specify manually the encoding
guess_encoding("raw_data/CIS_bdpm.txt")
## encoding confidence
## 1 ISO-8859-1 0.61
## 2 ISO-8859-2 0.32
lcl <- locale(
date_names = "fr",
date_format = "%d/%m/%Y",
decimal_mark = ",",
encoding = "ISO-8859-1"
)
File with the brand name of the drugs still commercialised or stoped since less 3 years ago.
columns_names <- c(
"code_cis", "denomination", "forme", "voie", "statut_amm",
"type_amm", "commercialisation", "date_amm", "statut_bdm",
"no_autorisation_eu", "titulaire", "surveillance_renforcee"
)
bdpm <- read_tsv("raw_data/CIS_bdpm.txt",
col_names = columns_names,
locale = lcl)
## Parsed with column specification:
## cols(
## code_cis = col_integer(),
## denomination = col_character(),
## forme = col_character(),
## voie = col_character(),
## statut_amm = col_character(),
## type_amm = col_character(),
## commercialisation = col_character(),
## date_amm = col_date(format = ""),
## statut_bdm = col_character(),
## no_autorisation_eu = col_character(),
## titulaire = col_character(),
## surveillance_renforcee = col_character()
## )
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
glimpse(bdpm)
## Observations: 14,297
## Variables: 12
## $ code_cis <int> 61266250, 62869109, 66513085, 64332894,...
## $ denomination <chr> "A 313 200 000 UI POUR CENT, pommade", ...
## $ forme <chr> "pommade", "capsule molle", "solution i...
## $ voie <chr> "cutanée", "orale", "sous-cutanée", "so...
## $ statut_amm <chr> "Autorisation active", "Autorisation ac...
## $ type_amm <chr> "Procédure nationale", "Procédure natio...
## $ commercialisation <chr> "Commercialisée", "Commercialisée", "Co...
## $ date_amm <date> 1998-03-12, 1997-07-07, 2014-09-09, 20...
## $ statut_bdm <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ no_autorisation_eu <chr> NA, NA, "EU/1/14/944", "EU/1/14/944", N...
## $ titulaire <chr> "PHARMA DEVELOPPEMENT", "PHARMA DEVELOP...
## $ surveillance_renforcee <chr> "Non", "Non", "Oui", "Oui", "Non", "Non...
library(DT)
# test the first columns
datatable(head(bdpm))
rds_dir <- "produced_data"
if(!dir.exists(rds_dir)) dir.create(rds_dir)
saveRDS(bdpm, file.path(rds_dir, "bdpm.rds"))