Aim

Import the data and convert them in rda files

Description

Data format is describe in this PDF.

In summary :

Prepare readr

I will import this tabular data with readr.

First specify the locales

library(readr)
## 
## Attaching package: 'readr'
## The following object is masked from 'package:rvest':
## 
##     guess_encoding
# Specify manually the encoding
guess_encoding("raw_data/CIS_bdpm.txt")
##     encoding confidence
## 1 ISO-8859-1       0.61
## 2 ISO-8859-2       0.32
lcl <- locale(
  date_names = "fr",
  date_format = "%d/%m/%Y",
  decimal_mark = ",",
  encoding = "ISO-8859-1"
  )

CIS_bdpm

File with the brand name of the drugs still commercialised or stoped since less 3 years ago.

columns_names <- c(
  "code_cis", "denomination", "forme", "voie", "statut_amm", 
  "type_amm", "commercialisation", "date_amm", "statut_bdm",
  "no_autorisation_eu", "titulaire", "surveillance_renforcee"
)

bdpm <- read_tsv("raw_data/CIS_bdpm.txt", 
                 col_names = columns_names,
                 locale = lcl)
## Parsed with column specification:
## cols(
##   code_cis = col_integer(),
##   denomination = col_character(),
##   forme = col_character(),
##   voie = col_character(),
##   statut_amm = col_character(),
##   type_amm = col_character(),
##   commercialisation = col_character(),
##   date_amm = col_date(format = ""),
##   statut_bdm = col_character(),
##   no_autorisation_eu = col_character(),
##   titulaire = col_character(),
##   surveillance_renforcee = col_character()
## )
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
glimpse(bdpm)
## Observations: 14,297
## Variables: 12
## $ code_cis               <int> 61266250, 62869109, 66513085, 64332894,...
## $ denomination           <chr> "A 313 200 000 UI POUR CENT, pommade", ...
## $ forme                  <chr> "pommade", "capsule molle", "solution i...
## $ voie                   <chr> "cutanée", "orale", "sous-cutanée", "so...
## $ statut_amm             <chr> "Autorisation active", "Autorisation ac...
## $ type_amm               <chr> "Procédure nationale", "Procédure natio...
## $ commercialisation      <chr> "Commercialisée", "Commercialisée", "Co...
## $ date_amm               <date> 1998-03-12, 1997-07-07, 2014-09-09, 20...
## $ statut_bdm             <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,...
## $ no_autorisation_eu     <chr> NA, NA, "EU/1/14/944", "EU/1/14/944", N...
## $ titulaire              <chr> "PHARMA DEVELOPPEMENT", "PHARMA DEVELOP...
## $ surveillance_renforcee <chr> "Non", "Non", "Oui", "Oui", "Non", "Non...
library(DT)

# test the first columns
datatable(head(bdpm))
rds_dir <- "produced_data"
if(!dir.exists(rds_dir)) dir.create(rds_dir)

saveRDS(bdpm, file.path(rds_dir, "bdpm.rds"))