---
title: "Wildlife Disease Data"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Wildlife Disease Data}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---


```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
```

```{r setup, warning=FALSE, message=FALSE}
library(wddsWizard)
library(dplyr)
library(readxl)
library(janitor)
library(jsonlite)
```

The Wildlife Disease Data Standard is composed of two  components - disease data and project metadata. 
Standardizing how disease data are stored is the primary goal of WDDS.


This vignette describes how to prepare data for validation against the wildlife disease data standard. We will 1) read an example excel spreadsheet, 2) do some light reformatting, and 3) produce a json object.

## Required fields

The following fields are required to be in the data.

```{r required fields, echo=FALSE, message=TRUE, warning=TRUE}
wdds_data_required <- wddsWizard::disease_data_schema$properties[wddsWizard::disease_data_required_fields]

descriptions <- purrr::map_depth(wdds_data_required, 1, "description")

df_data_desc <- data.frame(Field = wddsWizard::disease_data_required_fields, Descriptions = unlist(descriptions), row.names = NULL)

df_data_desc |>
  dplyr::mutate(TDWG_url = dplyr::case_when(
    stringr::str_detect(string = Descriptions, pattern = "http://rs.tdwg.org/dwc/terms/") ~ stringr::str_extract(string = Descriptions, pattern = "http://rs.tdwg.org/dwc/terms/.*"),
    TRUE ~ ""
  )) |>
  dplyr::mutate(Field = case_when(
    TDWG_url != "" ~ kableExtra::cell_spec(Field, format = "html", link = TDWG_url),
    TRUE ~ Field
  )) |>
  dplyr::mutate(Descriptions = dplyr::case_when(
    stringr::str_detect(string = Descriptions, pattern = "http://rs.tdwg.org/dwc/terms/") ~ stringr::str_remove(string = Descriptions, pattern = "See http://rs.tdwg.org/dwc/terms/.*"),
    TRUE ~ Descriptions
  )) |>
  dplyr::select(-TDWG_url) |>
  kableExtra::kbl(escape = FALSE) |>
  kableExtra::kable_styling()
```

## Read in and Clean up the excel spreadsheet

```{r clean-csv}
## read
becker_data <- wdds_example_data(version = "latest", file = "Becker_demo_dataset.xlsx") |>
  readxl::read_xlsx()

becker_data_prelim <- janitor::clean_names(becker_data, case = "lower_camel")
```

### Check for required Fields


```{r missing-fields}
# check that required fields are in dataset
required_field_check <- wddsWizard::disease_data_required_fields %in% names(becker_data_prelim)

wddsWizard::disease_data_required_fields[!required_field_check]
```
### Rename Fields to match standard

```{r align-field-names}
becker_data_prelim$collectionMethod

becker_data_clean <- becker_data_prelim |>
  dplyr::rename(
    "sampleID" = "sampleId",
    "animalID" = "animalId",
    "collectionMethodAndOrTissue" = "collectionMethod"
  )

# check that all required fields are in the data
all(wddsWizard::disease_data_required_fields %in% names(becker_data_clean))
```

## Prep for JSON
```{r prep-data}
becker_prepped <- prep_data(becker_data_clean)

## wrap the prepped data in list
becker_data_json <- becker_prepped |>
  jsonlite::toJSON(pretty = TRUE)
```


## Validate disease data
```{r validate-data}
## get the schema -- notice that you can set the version of the schema
schema <- wdds_json(version = "latest", file = "schemas/disease_data.json")

dd_validator <- jsonvalidate::json_validator(schema, engine = "ajv")

dd_validation <- dd_validator(becker_data_json, verbose = TRUE)

## check for errors!

errors <- attributes(dd_validation)

if (!dd_validation) {
  errors$errors
} else {
  print("Valid project metadata!😁")
}
```