<html xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
<meta name="Generator" content="Microsoft Word 15 (filtered medium)">
<style><!--
/* Font Definitions */
@font-face
        {font-family:Helvetica;
        panose-1:2 11 6 4 2 2 2 2 2 4;}
@font-face
        {font-family:Wingdings;
        panose-1:5 0 0 0 0 0 0 0 0 0;}
@font-face
        {font-family:"Cambria Math";
        panose-1:2 4 5 3 5 4 6 3 2 4;}
@font-face
        {font-family:Calibri;
        panose-1:2 15 5 2 2 2 4 3 2 4;}
@font-face
        {font-family:"Calibri Light";
        panose-1:2 15 3 2 2 2 4 3 2 4;}
@font-face
        {font-family:Garamond;
        panose-1:2 2 4 4 3 3 1 1 8 3;}
/* Style Definitions */
p.MsoNormal, li.MsoNormal, div.MsoNormal
        {margin:0cm;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
h4
        {mso-style-priority:9;
        mso-style-link:"Heading 4 Char";
        mso-margin-top-alt:auto;
        margin-right:0cm;
        mso-margin-bottom-alt:auto;
        margin-left:0cm;
        font-size:12.0pt;
        font-family:"Calibri",sans-serif;}
a:link, span.MsoHyperlink
        {mso-style-priority:99;
        color:#0563C1;
        text-decoration:underline;}
p.MsoListParagraph, li.MsoListParagraph, div.MsoListParagraph
        {mso-style-priority:34;
        margin-top:0cm;
        margin-right:0cm;
        margin-bottom:0cm;
        margin-left:36.0pt;
        font-size:11.0pt;
        font-family:"Calibri",sans-serif;}
span.Heading4Char
        {mso-style-name:"Heading 4 Char";
        mso-style-priority:9;
        mso-style-link:"Heading 4";
        font-family:"Calibri Light",sans-serif;
        color:#2F5496;
        font-style:italic;}
span.EmailStyle22
        {mso-style-type:personal-reply;
        font-family:"Calibri",sans-serif;
        color:windowtext;}
.MsoChpDefault
        {mso-style-type:export-only;
        font-size:10.0pt;}
@page WordSection1
        {size:612.0pt 792.0pt;
        margin:72.0pt 72.0pt 72.0pt 72.0pt;}
div.WordSection1
        {page:WordSection1;}
/* List Definitions */
@list l0
        {mso-list-id:510069797;
        mso-list-type:hybrid;
        mso-list-template-ids:492076288 -627683698 -156891162 -2101312760 691421774 902879454 1343669878 813465894 616351260 -1707078572;}
@list l0:level1
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:36.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level2
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:72.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level3
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:108.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level4
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:144.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level5
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:180.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level6
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:216.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level7
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:252.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level8
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:288.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l0:level9
        {mso-level-number-format:bullet;
        mso-level-text:§;
        mso-level-tab-stop:324.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        font-family:Wingdings;}
@list l1
        {mso-list-id:514804689;
        mso-list-template-ids:-1159285466;}
@list l1:level1
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:36.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level2
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:72.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level3
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:108.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level4
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:144.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level5
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:180.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level6
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:216.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level7
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:252.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level8
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:288.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
@list l1:level9
        {mso-level-number-format:bullet;
        mso-level-text:;
        mso-level-tab-stop:324.0pt;
        mso-level-number-position:left;
        text-indent:-18.0pt;
        mso-ansi-font-size:10.0pt;
        font-family:Symbol;}
ol
        {margin-bottom:0cm;}
ul
        {margin-bottom:0cm;}
--></style><!--[if gte mso 9]><xml>
<o:shapedefaults v:ext="edit" spidmax="1026" />
</xml><![endif]--><!--[if gte mso 9]><xml>
<o:shapelayout v:ext="edit">
<o:idmap v:ext="edit" data="1" />
</o:shapelayout></xml><![endif]-->
</head>
<body lang="EN-GB" link="#0563C1" vlink="#954F72" style="word-wrap:break-word">
<div class="WordSection1">
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">Ernie,<o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">The 10.2 Billion Euro figure almost certainly comes from this EU report:<o:p></o:p></span></p>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal"><b>Cost-benefit analysis for FAIR research data: Cost of not having FAIR Research Data.
</b><a href="https://op.europa.eu/en/publication-detail/-/publication/d375368c-1a0a-11e9-8d04-01aa75ed71a1/language-en">https://op.europa.eu/en/publication-detail/-/publication/d375368c-1a0a-11e9-8d04-01aa75ed71a1/language-en</a>.<span style="mso-fareast-language:EN-US"><o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">To quote: “we found that the annual cost of not having FAIR research data costs the European economy at least €10.2bn every year”.
<o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">Also from the report: “As a rule of thumb, in a data analysis project, data cleansing of poor quality data can take up to 80% of the total effort.” It is perhaps not clear from the report how the
 80% figure was arrived at.  <o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">Ian Bruno, CCDC<o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<div>
<div style="border:none;border-top:solid #E1E1E1 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal"><b><span lang="EN-US">From:</span></b><span lang="EN-US"> CODATA-international <codata-international-bounces@lists.codata.org>
<b>On Behalf Of </b>Johnson, Jon<br>
<b>Sent:</b> 11 December 2020 09:00<br>
<b>To:</b> Ernie Boyko <boykern@yahoo.com>; CODATA International <codata-international@lists.codata.org><br>
<b>Subject:</b> Re: [CODATA-international] Cost of Data Wrangling<o:p></o:p></span></p>
</div>
</div>
<p class="MsoNormal"><o:p> </o:p></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">Hi Eric<o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">It’s a bit of an urban myth I think see
<a href="https://linkprotect.cudasvc.com/url?a=https%3a%2f%2fblog.ldodds.com%2f2020%2f01%2f31%2fdo-data-scientists-spend-80-of-their-time-cleaning-data-turns-out-no%2f&c=E,1,KynW-_cnYJZ-XI6NoLJXo1m_vRsTrsFmPVaaUX93jsH1cg2uDh4Cbso6it49Zo5YogRhr2sSDU7iKHNhg0-GXAxng-GBFmx39uv7gd4j0Uw4LgI,&typo=1">
https://blog.ldodds.com/2020/01/31/do-data-scientists-spend-80-of-their-time-cleaning-data-turns-out-no/</a>, but it aligns with the Pareto Principle, so we are all willing to go with it!<o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">I suppose it is not that important whether it is 80% or 60%, it’s still a massive problem and the takeaway is that it highlights where the source of most effort is being expended, and strongly suggests
 that it arises from poor data quality and lack of metadata to manage that.<o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">Jon Johnson <o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">CLOSER, UCL Institute of Social Research<o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US">@spuddybike<o:p></o:p></span></p>
<p class="MsoNormal"><span style="mso-fareast-language:EN-US"><o:p> </o:p></span></p>
<div style="border:none;border-top:solid #B5C4DF 1.0pt;padding:3.0pt 0cm 0cm 0cm">
<p class="MsoNormal"><b><span style="font-size:12.0pt;color:black">From: </span></b><span style="font-size:12.0pt;color:black">CODATA-international <<a href="mailto:codata-international-bounces@lists.codata.org">codata-international-bounces@lists.codata.org</a>>
 on behalf of Ernie Boyko <<a href="mailto:boykern@yahoo.com">boykern@yahoo.com</a>><br>
<b>Reply to: </b>Ernie Boyko <<a href="mailto:boykern@yahoo.com">boykern@yahoo.com</a>><br>
<b>Date: </b>Friday, 11 December 2020 at 07:24<br>
<b>To: </b>CODATA International <<a href="mailto:codata-international@lists.codata.org">codata-international@lists.codata.org</a>><br>
<b>Subject: </b>[CODATA-international] Cost of Data Wrangling<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><o:p> </o:p></p>
</div>
<div>
<div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;font-family:"Times New Roman",serif">Hi all<o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;font-family:"Times New Roman",serif">A study conducted for the EU? is often quoted as being the source of a statement along the lines of<o:p></o:p></span></p>
<div>
<div style="margin-left:50.4pt;margin-bottom:6.0pt">
<ul style="margin-top:0cm" type="disc">
<ul style="margin-top:0cm" type="disc">
<li class="MsoListParagraph" style="margin-left:0cm;mso-list:l0 level2 lfo3"><b><span lang="EN-US" style="font-size:14.0pt;color:#404040">80% of effort in data intensive research is used on data wrangling; conservative estimate of 10.2 Bn Euro. </span></b><span style="font-size:14.0pt;font-family:"Times New Roman",serif"><o:p></o:p></span></li></ul>
</ul>
</div>
</div>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;font-family:"Times New Roman",serif;color:black"> Can anyone on this list point me to this study?</span><span style="font-size:12.0pt;font-family:"Times New Roman",serif"><o:p></o:p></span></p>
</div>
<div>
<p class="MsoNormal"><span style="font-size:12.0pt;font-family:"Times New Roman",serif">Many thanks in advance.  I am trying to make the case for the benefits of developing a career stream for data wranglers/data stewards.<o:p></o:p></span></p>
</div>
<div>
<div id="ydp112ffa72yui_3_13_0_1_1383570742306_21877">
<div id="ydp112ffa72yui_3_13_0_1_1383570742306_21877">
<div id="ydp112ffa72yui_3_13_0_1_1383923334020_29546">
<div id="ydp112ffa72yui_3_13_0_1_1383923334020_29543">
<p class="MsoNormal"><span style="font-size:13.5pt;font-family:"Times New Roman",serif;color:#4C76A2;background:white">Cheers, Ernie</span><span style="font-size:13.5pt;font-family:"Times New Roman",serif"><o:p></o:p></span></p>
<p class="MsoNormal" style="background:white"><span style="font-size:13.5pt;font-family:"Times New Roman",serif;color:black">+1-613-290-2804</span><span style="font-size:13.5pt;font-family:"Times New Roman",serif"><o:p></o:p></span></p>
<p class="MsoNormal" style="background:white"><span style="font-size:13.5pt;font-family:"Times New Roman",serif"><o:p> </o:p></span></p>
<p class="MsoNormal" style="background:white"><b><span style="font-size:12.0pt;font-family:"Helvetica",sans-serif;color:black"> </span></b><b><span style="font-size:13.5pt;font-family:"Garamond",serif;color:black"> “Data is the new oil.” — Clive Humby</span></b><span style="font-size:12.0pt;font-family:"Times New Roman",serif"><o:p></o:p></span></p>
<div>
<h4 style="margin:0cm;line-height:15.75pt;background:white;vertical-align:baseline;font-stretch:inherit">
<span style="font-family:"Garamond",serif;color:black;border:none windowtext 1.0pt;padding:0cm">“Data really powers everything that we do.” – Jeff Weiner<o:p></o:p></span></h4>
<p class="MsoNormal" style="margin-bottom:12.0pt;background:white"><span style="font-size:12.0pt;font-family:"Times New Roman",serif"><o:p> </o:p></span></p>
</div>
<div>
<p class="MsoNormal" style="margin-bottom:12.0pt;background:white"><b><span style="font-size:12.0pt;font-family:"Times New Roman",serif"><o:p> </o:p></span></b></p>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
</div>
<br>
<br>
<table cellspacing="0" cellpadding="0" border="0" id="signature" style="width: 625px; font-family: Helvetica, Arial, sans-serif; font-size:13px; line-height: 21px;">
<tbody>
<tr>
<td width="35%" style="padding: 10px 20px 0 0">
<table>
<tbody>
<tr>
<td align="center"><a href="https://www.ccdc.cam.ac.uk"><img src="https://cdn.ccdc.cam.ac.uk/content/images/email/ccdc-signature-logo-small-190.png" width="190px" style="margin-bottom: 5px;" alt="CCDC">
</a></td>
</tr>
<tr>
<td align="center">
<table cellspacing="0" cellpadding="5" border="0">
<tbody>
<tr>
<td><a href="https://www.linkedin.com/company/2683138?trk=cws-btn-overview-0-0"><img alt="LinkedIn" src="https://cdn.ccdc.cam.ac.uk/content/images/email/ccdc-signature-linkedin.png" width="36" height="36"></a>
</td>
<td><a href="https://twitter.com/ccdc_cambridge"><img alt="Twitter" src="https://cdn.ccdc.cam.ac.uk/content/images/email/ccdc-signature-twitter.png" width="36" height="36"></a>
</td>
<td><a href="https://www.facebook.com/ccdc.cambridge"><img alt="Facebook" src="https://cdn.ccdc.cam.ac.uk/content/images/email/ccdc-signature-facebook.png" width="36" height="36"></a>
</td>
<td><a href="https://www.youtube.com/user/CCDCCambridge"><img alt="YouTube" src="https://cdn.ccdc.cam.ac.uk/content/images/email/ccdc-signature-youtube.png" width="36" height="36"></a>
</td>
</tr>
</tbody>
</table>
</td>
</tr>
</tbody>
</table>
</td>
<td width="1px" style="border-right: 1px solid #00adee"></td>
<td width="65%" valign="top" style="padding: 10px 0 0 20px;"><strong>Dr Ian Bruno</strong><br>
Head of Strategic Partnerships<br>
<br>
Phone: +44 1223 3-36013<br>
Email: bruno@ccdc.cam.ac.uk<br>
</td>
</tr>
</tbody>
</table>
<p style="font-family: Helvetica, Arial, sans-serif; font-size:12px; line-height: 21px; margin-top: 35px;">
LEGAL NOTICE<br>
Unless expressly stated otherwise, information contained in this message is confidential. If this message is not intended for you, please inform postmaster@ccdc.cam.ac.uk and delete the message. The Cambridge Crystallographic Data Centre is a company Limited
 by Guarantee and a Registered Charity. Registered in England No. 2155347 Registered Charity No. 800579
</p>
</body>
</html>