@article{241c9b68c4e24bf19fce9a1440e00a9f,
title = "Enhancing the identification of rheumatoid arthritis-associated interstitial lung disease through text mining of chest computerized tomography reports",
abstract = "Objectives: Algorithms have been developed to identify rheumatoid arthritis-interstitial lung disease (RA-ILD) in administrative data with positive predictive values (PPVs) between 70 and 80%. We hypothesized that including ILD-related terms identified within chest computed tomography (CT) reports through text mining would improve the PPV of these algorithms in this cross-sectional study. Methods: We identified a derivation cohort of possible RA-ILD cases (n = 114) using electronic health record data from a large academic medical center and performed medical record review to validate diagnoses (reference standard). ILD-related terms (e.g., ground glass, honeycomb) were identified in chest CT reports by natural language processing. Administrative algorithms including diagnostic and procedural codes as well as specialty were applied to the cohort both with and without the requirement for ILD-related terms from CT reports. We subsequently analyzed similar algorithms in an external validation cohort of 536 participants with RA. Results: The addition of ILD-related terms to RA-ILD administrative algorithms increased the PPV in both the derivation (improvement ranging from 3.6 to 11.7%) and validation cohorts (improvement 6.0 to 21.1%). This increase was greatest for less stringent algorithms. Administrative algorithms including ILD-related terms from CT reports exceeded a PPV of 90% (maximum 94.6% derivation cohort). Increases in PPV were accompanied by a decline in sensitivity (validation cohort -3.9 to -19.5%). Conclusions: The addition of ILD-related terms identified by text mining from chest CT reports led to improvements in the PPV of RA-ILD algorithms. With high PPVs, use of these algorithms in large data sets could facilitate epidemiologic and comparative effectiveness research in RA-ILD.",
keywords = "Informatics, Interstitial lung disease, Natural language processing, Rheumatoid arthritis",
author = "Luedders, {Brent A.} and Cope, {Brendan J.} and Daniel Hershberger and Matthew DeVries and Campbell, {W. Scott} and James Campbell and Punyasha Roul and Yangyuna Yang and Jorge Rojas and Cannon, {Grant W.} and Sauer, {Brian C.} and Baker, {Joshua F.} and Curtis, {Jeffrey R.} and Mikuls, {Ted R.} and England, {Bryant R.}",
note = "Funding Information: BAL is supported by the UNMC Mentored Scholars Program . BRE is supported by a VA CSR&D ( IK2 CX002203 ). JFB is supported by the VA CSR&D ( I01 CX001703 ) and VA RR&D ( I01 RX003644 ). TRM is supported by grants from the VA (BLR&D Merit I01 BX004660 ), National Institutes of Health ( 2U54GM115458 ), U.S. Department of Defense ( PR200793 ), and the Rheumatology Research Foundation . JRC is supported by the National Institute of Arthritis and Musculoskeletal and Skin Diseases ( P30AR072583 ). Funding Information: BAL is supported by the UNMC Mentored Scholars Program. BRE is supported by a VA CSR&D (IK2 CX002203). JFB is supported by the VA CSR&D (I01 CX001703) and VA RR&D (I01 RX003644). TRM is supported by grants from the VA (BLR&D Merit I01 BX004660), National Institutes of Health (2U54GM115458), U.S. Department of Defense (PR200793), and the Rheumatology Research Foundation. JRC is supported by the National Institute of Arthritis and Musculoskeletal and Skin Diseases (P30AR072583). *The project described utilizes the UNMC Clinical Research Analytics Environment (CRANE). CRANE is supported by funding from the National Institute of General Medical Sciences, U54 GM115458 and the Patient Centered Outcomes Research Institute, PCORI CDRN-1306–04631. The content is solely the responsibility of the authors and does not necessarily represent the official views of the NIH or PCORI. The views expressed in this article are those of the authors and do not necessarily reflect the position or policy of the Department of Veterans Affairs or the United States government. Funding Information: *The project described utilizes the UNMC Clinical Research Analytics Environment (CRANE). CRANE is supported by funding from the National Institute of General Medical Sciences , U54 GM115458 and the Patient Centered Outcomes Research Institute , PCORI CDRN-1306–04631 . The content is solely the responsibility of the authors and does not necessarily represent the official views of the NIH or PCORI. The views expressed in this article are those of the authors and do not necessarily reflect the position or policy of the Department of Veterans Affairs or the United States government. Publisher Copyright: {\textcopyright} 2023",
year = "2023",
month = jun,
doi = "10.1016/j.semarthrit.2023.152204",
language = "English (US)",
volume = "60",
journal = "Seminars in Arthritis and Rheumatism",
issn = "0049-0172",
publisher = "W.B. Saunders Ltd",
}