read_vc() handles git2rdata objects stored by write_vc(). It reads and verifies the metadata file (.yml). Then it reads and verifies the raw data. The last step is back-transforming any transformation done by meta() to return the data.frame as stored by write_vc().

read_vc() is an S3 generic on root which currently handles "character" (a path) and "git-repository" (from git2r). S3 methods for other version control system could be added.

read_vc(file, root = ".")

Arguments

file

the name of the git2rdata object. Git2rdata objects cannot have dots in their name. The name may include a relative path. file is a path relative to the root. Note that file must point to a location within root.

root

The root of a project. Can be a file path or a git-repository. Defaults to the current working directory (".").

Value

The data.frame with the file names and hashes as attributes.

See also

Examples

## on file system # create a directory root <- tempfile("git2rdata-") dir.create(root) # write a dataframe to the directory write_vc(iris[1:6, ], file = "iris", root = root, sorting = "Sepal.Length")
#> 11d423327dc1f67e16183dbd9871ad931d3b9689 #> "iris.tsv" #> bcf0c81601329774ce177b76919f64d509131788 #> "iris.yml"
# check that a data file (.tsv) and a metadata file (.yml) exist. list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml"
# read the git2rdata object from the directory read_vc("iris", root)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> 1 4.6 3.1 1.5 0.2 setosa #> 2 4.7 3.2 1.3 0.2 setosa #> 3 4.9 3.0 1.4 0.2 setosa #> 4 5.0 3.6 1.4 0.2 setosa #> 5 5.1 3.5 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa
# store a new version with different observations but the same metadata write_vc(iris[1:5, ], "iris", root)
#> 5bf24c9e9b6830eebd749bac23ce599774d4087e #> "iris.tsv" #> aa9c8665def5a9644ec8157943631f63ac46e5c1 #> "iris.yml"
list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml"
# Removing a column requires version requires new metadata. # Add strict = FALSE to override the existing metadata. write_vc( iris[1:6, -2], "iris", root, sorting = "Sepal.Length", strict = FALSE )
#> Warning: Changes in the metadata may lead to unnecessarily large diffs. #> See vignette('version_control', package = 'git2rdata') for more information. #> #> - New data has a different number of variables. #> - Deleted variables: Sepal.Width.
#> 1a649561f36566acf21c2f167e69abc54907badb #> "iris.tsv" #> bf5ce7f305eafd190fc6354181c7dac15b5494fb #> "iris.yml"
list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml"
# storing the orignal version again requires another update of the metadata write_vc(iris[1:6, ], "iris", root, sorting = "Sepal.Width", strict = FALSE)
#> Warning: Changes in the metadata may lead to unnecessarily large diffs. #> See vignette('version_control', package = 'git2rdata') for more information. #> #> - The sorting variables changed. #> - Sorting for the new data: 'Sepal.Width'. #> - Sorting for the old data: 'Sepal.Length'. #> - New data has a different number of variables. #> - New variables: Sepal.Width.
#> b13fe258fa3e684284e1e13a081dcc216672500e #> "iris.tsv" #> 1a12d45cfbec489feae910be9e411d5712a58830 #> "iris.yml"
list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml"
# optimize = FALSE stores the data more verbose. This requires larger files. write_vc( iris[1:6, ], "iris2", root, sorting = "Sepal.Width", optimize = FALSE )
#> 965536eea40ded6da531ade9fb89ea79a423810d #> "iris2.tsv" #> 1b3ded16c33ab6ad9fe70f77bc3709ac7abfd1b5 #> "iris2.yml"
list.files(root, recursive = TRUE)
#> [1] "iris.tsv" "iris.yml" "iris2.tsv" "iris2.yml"
## on git repo using a git2r::git-repository # initialise a git repo using the git2r package repo_path <- tempfile("git2rdata-repo-") dir.create(repo_path) repo <- git2r::init(repo_path) git2r::config(repo, user.name = "Alice", user.email = "alice@example.org") # store a dataframe in git repo. write_vc(iris[1:6, ], file = "iris", root = repo, sorting = "Sepal.Length")
#> 11d423327dc1f67e16183dbd9871ad931d3b9689 #> "iris.tsv" #> bcf0c81601329774ce177b76919f64d509131788 #> "iris.yml"
# This git2rdata object is not staged by default. status(repo)
#> Untracked files: #> Untracked: iris.tsv #> Untracked: iris.yml #>
# read a dataframe from a git repo read_vc("iris", repo)
#> Sepal.Length Sepal.Width Petal.Length Petal.Width Species #> 1 4.6 3.1 1.5 0.2 setosa #> 2 4.7 3.2 1.3 0.2 setosa #> 3 4.9 3.0 1.4 0.2 setosa #> 4 5.0 3.6 1.4 0.2 setosa #> 5 5.1 3.5 1.4 0.2 setosa #> 6 5.4 3.9 1.7 0.4 setosa
# store a new version in the git repo and stage it in one go write_vc(iris[1:5, ], "iris", repo, stage = TRUE)
#> 5bf24c9e9b6830eebd749bac23ce599774d4087e #> "iris.tsv" #> aa9c8665def5a9644ec8157943631f63ac46e5c1 #> "iris.yml"
status(repo)
#> Staged changes: #> New: iris.tsv #> New: iris.yml #>
# store a verbose version in a different gir2data object write_vc( iris[1:6, ], "iris2", repo, sorting = "Sepal.Width", optimize = FALSE )
#> 965536eea40ded6da531ade9fb89ea79a423810d #> "iris2.tsv" #> 1b3ded16c33ab6ad9fe70f77bc3709ac7abfd1b5 #> "iris2.yml"
status(repo)
#> Untracked files: #> Untracked: iris2.tsv #> Untracked: iris2.yml #> #> Staged changes: #> New: iris.tsv #> New: iris.yml #>
# clean up junk <- file.remove( list.files(root, full.names = TRUE, recursive = TRUE), root) junk <- file.remove( rev(list.files(repo_path, full.names = TRUE, recursive = TRUE, include.dirs = TRUE, all.files = TRUE)), repo_path)