Title: | Modifying Rules on a DataBase |
---|---|
Description: | Apply modification rules from R package 'dcmodify' to the database, prescribing and documenting deterministic data cleaning steps on records in a database. The rules are translated into SQL statements using R package 'dbplyr'. |
Authors: | Edwin de Jonge [aut, cre] , Wytze Gelderloos [ctb] |
Maintainer: | Edwin de Jonge <[email protected]> |
License: | MIT + file LICENSE |
Version: | 0.3.1 |
Built: | 2024-11-16 04:20:32 UTC |
Source: | https://github.com/data-cleaning/dcmodifydb |
Writes the generated sql to a file or command line. The script contains ALTER and UPDATE statements and can be used for documentation purposes.
dump_sql(x, table, con = NULL, file = stdout(), ...)
dump_sql(x, table, con = NULL, file = stdout(), ...)
x |
|
table |
either a |
con |
optional, when |
file |
to which the sql will be written. |
... |
not used |
Note that when this script is run on the database it will change the original table. This differs from the default behavior of dcmodify which works on a (temporary) copy of the table.
Furthermore, it seems wise to wrap the generated SQL in a transaction when apply the SQL code on a database.
character
sql script with all statements.
Other sql translation:
modifier_to_sql()
# load modification rules and apply: library(dcmodify) con <- DBI::dbConnect(RSQLite::SQLite(), dbname=system.file("db/person.db", package="dcmodifydb")) person <- dplyr::tbl(con, "person") rules <- modifier(.file = system.file("db/corrections.yml", package="dcmodifydb")) print(rules) # show sql code generated from the rules. dump_sql(rules, person)
# load modification rules and apply: library(dcmodify) con <- DBI::dbConnect(RSQLite::SQLite(), dbname=system.file("db/person.db", package="dcmodifydb")) person <- dplyr::tbl(con, "person") rules <- modifier(.file = system.file("db/corrections.yml", package="dcmodifydb")) print(rules) # show sql code generated from the rules. dump_sql(rules, person)
Get an indication of which R statement can be executed on the SQL database. dcmodifydb translates R statements into SQL statement. This works for many scenario's but not all R statements can be translated into SQL. This function checks whether a modification rule can be executed on the database.
is_working_db(m, tab, n = 2, warn = TRUE, sql_warn = FALSE)
is_working_db(m, tab, n = 2, warn = TRUE, sql_warn = FALSE)
m |
|
tab |
tbl object |
n |
number of records to use in this check |
warn |
generate warnings for non-working rules |
sql_warn |
generate warnings with sql code for non-working rules |
logical
with which statements are working
person <- dbplyr::memdb_frame(age = 12, salary = 3000) library(dcmodify) correction_rules <- modifier( if (age < 16) salary = 0 , if (retired == TRUE) salary = 0 ) # second rule is not working, because retired is not available is_working_db(correction_rules, person, warn = FALSE) # show warnings (default) is_working_db(correction_rules, person, warn = TRUE) # show the sql statements that are not working is_working_db(correction_rules, person, warn = FALSE, sql_warn = TRUE)
person <- dbplyr::memdb_frame(age = 12, salary = 3000) library(dcmodify) correction_rules <- modifier( if (age < 16) salary = 0 , if (retired == TRUE) salary = 0 ) # second rule is not working, because retired is not available is_working_db(correction_rules, person, warn = FALSE) # show warnings (default) is_working_db(correction_rules, person, warn = TRUE) # show the sql statements that are not working is_working_db(correction_rules, person, warn = FALSE, sql_warn = TRUE)
Extract UPDATE statements from modifier object as a list of SQL statements.
A user should normally be using modify()
or dump_sql()
, but this
function may be useful.
modifier_to_sql(x, table, con = NULL)
modifier_to_sql(x, table, con = NULL)
x |
|
table |
table object |
con |
optional connection |
list
of sql UPDATE statements.
Other sql translation:
dump_sql()
Change records in a database table using modification rules specified
in a modifier()
object. This is the main function of package dcmodifydb
.
For more information see the vignettes.
## S4 method for signature 'ANY,modifier' modify( dat, x, copy = NULL, transaction = !isTRUE(copy), ignore_nw = FALSE, ... )
## S4 method for signature 'ANY,modifier' modify( dat, x, copy = NULL, transaction = !isTRUE(copy), ignore_nw = FALSE, ... )
dat |
|
x |
|
copy |
if |
transaction |
if |
ignore_nw |
if |
... |
unused |
The modification rules are translated into SQL update statements and executed on the table.
Note that
by default the updates are executed on a copy of the table.
the default for transaction
is FALSE
when copy=TRUE
and
TRUE
when copy=FALSE
when transaction = TRUE
and a modification fails,
all modifications are rolled back.
tbl_sql()
object, referencing the modified table object.
library(DBI) library(dcmodify) library(dcmodifydb) # silly modification rules m <- modifier( if (cyl == 6) gear <- 10 , gear[cyl == 4] <- 0 # this R syntax works too :-) , if (gear == 3) cyl <- 2 ) # setting up a table in the database con <- dbConnect(RSQLite::SQLite()) dbWriteTable(con, "mtcars", mtcars[,c("cyl", "gear")]) tbl_mtcars <- dplyr::tbl(con, "mtcars") # "Houston, we have a table" head(tbl_mtcars) # lets modify on a temporary copy of the table.. # this copy is only visible to the current connection tbl_m <- modify(tbl_mtcars, m, copy=TRUE) # and gear has changed... head(tbl_m) # If one certain about the changes, then you can overwrite the table with the changes tbl_m <- modify(tbl_mtcars, m, copy=FALSE) dbDisconnect(con)
library(DBI) library(dcmodify) library(dcmodifydb) # silly modification rules m <- modifier( if (cyl == 6) gear <- 10 , gear[cyl == 4] <- 0 # this R syntax works too :-) , if (gear == 3) cyl <- 2 ) # setting up a table in the database con <- dbConnect(RSQLite::SQLite()) dbWriteTable(con, "mtcars", mtcars[,c("cyl", "gear")]) tbl_mtcars <- dplyr::tbl(con, "mtcars") # "Houston, we have a table" head(tbl_mtcars) # lets modify on a temporary copy of the table.. # this copy is only visible to the current connection tbl_m <- modify(tbl_mtcars, m, copy=TRUE) # and gear has changed... head(tbl_m) # If one certain about the changes, then you can overwrite the table with the changes tbl_m <- modify(tbl_mtcars, m, copy=FALSE) dbDisconnect(con)
A synthetic data set with person data with records to be corrected. The datasethas missing values
person
person
A data frame with x rows and variables:
monthly income, in US dollars
age of a person in year
gender of a person
year of measurement
if a person smokes or not
how many cigarretes a person smokes
...
The dataset is also available as a sqlite database at
system.file("db/person.db", package="dcmodifydb")
# load modification rules and apply: library(dcmodify) rules <- modifier(.file = system.file("db/corrections.yml", package="dcmodifydb")) con <- DBI::dbConnect(RSQLite::SQLite(), dbname=system.file("db/person.db", package="dcmodifydb")) person <- dplyr::tbl(con, "person") print(person) person2 <- modify(person, rules, copy=TRUE) print(person2)
# load modification rules and apply: library(dcmodify) rules <- modifier(.file = system.file("db/corrections.yml", package="dcmodifydb")) con <- DBI::dbConnect(RSQLite::SQLite(), dbname=system.file("db/person.db", package="dcmodifydb")) person <- dplyr::tbl(con, "person") print(person) person2 <- modify(person, rules, copy=TRUE) print(person2)