class: title-slide count: false .title[ # 04 - Data ] .subtitle[ ## Open Science Tools ] .author[ ### Claudio Zandonella & Davide Massidda ] .institute[ ] --- class: center, middle, inverse # Step III: Sharing Data --- # Data Structures -- .pull-left-50[ - **Wide Format** <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;"> Name </th> <th style="text-align:left;"> Sex </th> <th style="text-align:center;"> Age </th> <th style="text-align:center;"> Pre </th> <th style="text-align:center;"> Post </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Alice </td> <td style="text-align:left;"> F </td> <td style="text-align:center;"> 24 </td> <td style="text-align:center;"> ... </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:left;"> Bob </td> <td style="text-align:left;"> M </td> <td style="text-align:center;"> 21 </td> <td style="text-align:center;"> ... </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:left;"> Carl </td> <td style="text-align:left;"> M </td> <td style="text-align:center;"> 23 </td> <td style="text-align:center;"> ... </td> <td style="text-align:center;"> ... </td> </tr> </tbody> </table> ] .pull-right-50[ - **Long Format.** <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <thead> <tr> <th style="text-align:left;"> Name </th> <th style="text-align:left;"> Sex </th> <th style="text-align:center;"> Age </th> <th style="text-align:left;"> Time </th> <th style="text-align:center;"> Value </th> </tr> </thead> <tbody> <tr> <td style="text-align:left;"> Alice </td> <td style="text-align:left;"> F </td> <td style="text-align:center;"> 24 </td> <td style="text-align:left;"> Pre </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:left;"> Alice </td> <td style="text-align:left;"> F </td> <td style="text-align:center;"> 24 </td> <td style="text-align:left;"> Post </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:left;"> Bob </td> <td style="text-align:left;"> M </td> <td style="text-align:center;"> 21 </td> <td style="text-align:left;"> Pre </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:left;"> Bob </td> <td style="text-align:left;"> M </td> <td style="text-align:center;"> 21 </td> <td style="text-align:left;"> Post </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:left;"> Carl </td> <td style="text-align:left;"> M </td> <td style="text-align:center;"> 23 </td> <td style="text-align:left;"> Pre </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:left;"> Carl </td> <td style="text-align:left;"> M </td> <td style="text-align:center;"> 23 </td> <td style="text-align:left;"> Post </td> <td style="text-align:center;"> ... </td> </tr> </tbody> </table> ] --- # Data Structures - **Relational Model** .pull-left-50[ <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <caption>Subjects</caption> <thead> <tr> <th style="text-align:right;"> ID </th> <th style="text-align:left;"> Name </th> <th style="text-align:left;"> Sex </th> <th style="text-align:right;"> Age </th> </tr> </thead> <tbody> <tr> <td style="text-align:right;"> 1 </td> <td style="text-align:left;"> Alice </td> <td style="text-align:left;"> F </td> <td style="text-align:right;"> 24 </td> </tr> <tr> <td style="text-align:right;"> 2 </td> <td style="text-align:left;"> Bob </td> <td style="text-align:left;"> M </td> <td style="text-align:right;"> 21 </td> </tr> <tr> <td style="text-align:right;"> 3 </td> <td style="text-align:left;"> Carl </td> <td style="text-align:left;"> M </td> <td style="text-align:right;"> 23 </td> </tr> </tbody> </table> ] .pull-right-50[ <table class="table table-striped" style="width: auto !important; margin-left: auto; margin-right: auto;"> <caption>Study</caption> <thead> <tr> <th style="text-align:center;"> ID </th> <th style="text-align:center;"> Subject_ID </th> <th style="text-align:left;"> Time </th> <th style="text-align:center;"> Value </th> </tr> </thead> <tbody> <tr> <td style="text-align:center;"> 1 </td> <td style="text-align:center;"> 1 </td> <td style="text-align:left;"> Pre </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:center;"> 2 </td> <td style="text-align:center;"> 1 </td> <td style="text-align:left;"> Post </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:center;"> 3 </td> <td style="text-align:center;"> 2 </td> <td style="text-align:left;"> Pre </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:center;"> 4 </td> <td style="text-align:center;"> 2 </td> <td style="text-align:left;"> Post </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:center;"> 5 </td> <td style="text-align:center;"> 3 </td> <td style="text-align:left;"> Pre </td> <td style="text-align:center;"> ... </td> </tr> <tr> <td style="text-align:center;"> 6 </td> <td style="text-align:center;"> 3 </td> <td style="text-align:left;"> Post </td> <td style="text-align:center;"> ... </td> </tr> </tbody> </table> ] --- # Data Structures - **Relational Model** <img src="images/04-data/sql-schema.svg" width="80%" style="display: block; margin: auto;" /> -- <img src="images/04-data/sql-logo.png" width="20%" style="display: block; margin: auto;" /> --- # Data Structures - **Join Operations** .move-up-15[ <img src="images/04-data/join.svg" width="60%" style="display: block; margin: auto;" /> ] --- # Data Documentation -- .pull-left-50[ .no-border[ #### `README-data.md` ] ] --- # Data Documentation .pull-left-50[ .no-border[ #### `README-data.md` ] General info: .li-small[ - Study/Project reference - Authors (citation/contact information) - License - Data collection process ] ] .pull-right-50[ ] --- # Data Documentation .pull-left-50[ .no-border[ #### `README-data.md` ] General info: .li-small[ - Study/Project reference - Authors (citation/contact information) - License - Data collection process ] Data info: .li-small[ - Variable name - Variable type - Unit of measure - Values range or levels - Missing/special values ] ] .pull-right-50[ ] --- # Data Documentation .pull-left-50[ .no-border[ #### `README-data.md` ] General info: .li-small[ - Study/Project reference - Authors (citation/contact information) - License - Data collection process ] Data info: .li-small[ - Variable name - Variable type - Unit of measure - Values range or levels - Missing/special values ] ] .pull-right-50[ .code-small[ ```r #----- README-data.md ----# # Data README ## General Info Details about the study/project, authors, License, or other relevant information. Description of the data collection process or links to the paper/external documentation for further details. ## Details The dataset `my-study.csv` is formed by n rows and k columns: - `Name`. Character variable indicating the subject name - `Sex`. Factor variable indicating the subject gender (levels are `"F"` for females and `"M"` for males) - `Age`. Numeric variable indicating subject age (in years) - `Time`. Factor variable indicating measure time (levels are `"Pre"` and `"Post"`) - `Value`. Numeric variable indicating the outcome measure [unit of measure] - ... ``` ] ] --- # Data Good Practices -- - File format - proprietary vs open -- - Machine readable - no `.pdf` -- - Indicate encoding - prefer `UTF-8` -- - The rawer the better -- .emoji-xl[ .center[ ❤️ `.csv` ❤️ ] ] --- # Data Sharing -- .pull-left-50[ <img src="images/04-data/kepler.jpeg" width="55%" style="display: block; margin: auto;" /> .center[Johannes Kepler (1571 - 1630)] ] .pull-right-50[ ] --- # Data Sharing .pull-left-50[ <img src="images/04-data/kepler.jpeg" width="55%" style="display: block; margin: auto;" /> .center[Johannes Kepler (1571 - 1630)] ] .pull-right-50[ <img src="images/04-data/brahe.jpg" width="55%" style="display: block; margin: auto;" /> .center[Tycho Brahe (1546 - 1601)] ] --- # Data Sharing <br> <img src="images/04-data/FAIR-data.jpeg" width="85%" style="display: block; margin: auto;" /> --- # Data Sharing .pull-left-50[ #### Legal Aspects: .li-small[ - GDPR (😭) - Personal data and anonymization - Geographical restrictions ] ] .pull-right-50[ ] --- # Data Sharing .pull-left-50[ #### Legal Aspects: .li-small[ - GDPR (😭) - Personal data and anonymization - Geographical restrictions ] .move-down-50[ #### LICENSE - Open Data Commons: ] .li-small[ - **ODC-BY.** Attribution License - **ODbL.** Open Database License (BY-SA) - **PDDL.** Public Domain (all rights waived) - **DbCL.** Database Content License (BY-SA) ] .center[https://opendatacommons.org/] ] .pull-right-50[ ] --- # Data Sharing .pull-left-50[ #### Legal Aspects: .li-small[ - GDPR (😭) - Personal data and anonymization - Geographical restrictions ] .move-down-50[ #### LICENSE - Open Data Commons: ] .li-small[ - **ODC-BY.** Attribution License - **ODbL.** Open Database License (BY-SA) - **PDDL.** Public Domain (all rights waived) - **DbCL.** Database Content License (BY-SA) ] .center[https://opendatacommons.org/] ] .pull-right-50[ #### Where? .li-small[ - <img style="height:2em" src="images/04-data/osf.png" class="inline-img"> (https://osf.io) - <img style="height:2em" src="images/04-data/github.png" class="inline-img">(https://github.com) - <img style="height:2em" src="images/04-data/gitlab.png" class="inline-img">(https://gitlab.com) - <img style="height:2em" src="images/04-data/dataverse.png" class="inline-img">(https://dataverse.org) - <img style="height:2em" src="images/04-data/databrary.png" class="inline-img">(https://nyu.databrary.org) - <img style="height:2em" src="images/04-data/icpsr.png" class="inline-img">(https://www.icpsr.umich.edu) ] ] --- # Metadata -- .pull-left-50[ `CITATION.cff` .code-small[ ``` cff-version: 1.2.0 message: "For attribution, please cite it as below." authors: - family-names: "Zandonella Callegher" given-names: "Claudio" orcid: "https://orcid.org/0000-0001-7721-6318" - family-names: "Massidda" given-names: "Davide" title: "The Open Science Manual: Make Your Scientific Research Accessible and Reproducible" version: 1.0.1 doi: 10.5281/zenodo.6521850 date-released: 2023-01-02 url: "https://github.com/arca-dpss/manual-open-science" ``` ] ] .pull-right-50[ .move-up-50[ {{content}} ] ] -- <img src="images/04-data/citation.png" width="90%" style="display: block; margin: auto;" /> --- # Metadata .pull-left-50[ `CITATION.cff` .code-small[ ``` cff-version: 1.2.0 message: "For attribution, please cite it as below." authors: - family-names: "Zandonella Callegher" given-names: "Claudio" orcid: "https://orcid.org/0000-0001-7721-6318" - family-names: "Massidda" given-names: "Davide" title: "The Open Science Manual: Make Your Scientific Research Accessible and Reproducible" version: 1.0.1 doi: 10.5281/zenodo.6521850 date-released: 2023-01-02 url: "https://github.com/arca-dpss/manual-open-science" ``` ] ] .pull-right-50[ .move-up-50[ <img src="images/04-data/citation.png" width="90%" style="display: block; margin: auto;" /> ] Documentation: .li-small[ - https://citation-file-format.github.io/ - [About CITATION files](https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-citation-files) ] ] --- # Metadata #### Schema.org -- <br> .center[ *“Schema.org is a collaborative, community activity with a mission to create,<br>maintain, and promote schemas for structured data on the Internet,<br>on web pages, in email messages, and beyond.”* ] -- <br> Documentation: .li-small[ - https://schema.org/ - https://www.software.ac.uk/resources/guides/adding-schema-dot-org ] --- class: inverse, center, middle # Sharing is Caring! ## Moving to the next step...