So I have two dataframes containing ";" separated IDs. I want to see which IDs in df2$Protein.IDs are in df1$subunits.UniProt.IDs., and ideally return the matching IDs as a list. I'm not sure how to approach this as I haven't come across anything in previous questions. I'm trying to use strsplit separate the IDs for each entry, then search for matches for each one, with the goal of iterating across the df by a for loop, but I'm a bit stuck on how to make it come together.
df1 <- structure(list(ComplexName = c("Oligosaccharyltransferase complex (Stt3B variant)",
"TRF1-TIN2 complex", "TRF1 telomere length regulation complex",
"Rap1 complex", "TRF2-Rap1 complex III", "TRF-Rap1 complex I, 2MD"
), Organism = c("Mouse", "Mouse", "Mouse", "Mouse", "Mouse",
"Mouse"), subunits.UniProt.IDs. = c("Q91YQ5;Q9DBG6;O54734;P61804;Q8BTV1;Q3TDQ1;Q9CQY5",
"P70371;Q5EE38;Q9QXG9;Q91WC1", "Q6PFX9;P70371;Q9QXG9", "P70371;O35144;P70388;Q5EE38;Q9QXG9;Q91WC1;Q91VL8",
"O35144;Q91VL8", "Q6PFX9;P70371;O35144;Q9QXG9;Q91WC1;Q91VL8")), row.names = 881:886, class = "data.frame")
df2 <- structure(list(Protein.IDs = c("E9QN37;A1L314", "A2A6U3;Q80UG5-2;Q80UG5-3;Q80UG5;A2A6U5;A8Y5D3",
"A2A7A7;Q8CFX1;A2A7A8", "A6PW84;A2A7Q5;Q3V1T4-3;Q3V1T4", "A2ACG7;Q9DBG6",
"F6TBV1;F6WHL0;A2ADH1;Q9CQY5-2;Q9CQY5-3;Q9CQY5", "A2AEM2;Q99LI2",
"A2AFS0;Q8C483;P26638;A2AFS1", "A2AFW6;Q9D050;Q791V5;D6RCZ1",
"Q3UB58;Q3UNJ0;A2AIN5;Q3UZ35;P21855", "A2AIW9;Q9DC61", "A2AL36;E9Q9E7;A2AS42;F6QP53;A2AL36-2",
"A2AL50;Q8C0I1;H3BKN2;A2AL49;H3BIY5", "Q8CBM2;A2AL85;Q8BSY0",
"A2AMH3;A2AMH5;A2AMH4;Q6X893;Q6X893-2;A2AMH8", "A2AMW0;P47757-2;A2AMV7;P47757;F6QJN8;F6YHZ8;F7CAZ6",
"Q3U8S1;A2APM5;A2APM3;A2APM4;E9QKM8;Q80X37;A2APM1;A2APM2;P15379-2;P15379-3;P15379-6;P15379-11;P15379-5;P15379-10;P15379-9;P15379-4;P15379-8;P15379-7;P15379;P15379-12;P15379-13",
"A2AR26;Q3UDF0;A2AR27", "E9Q8N1;E9Q8K5;A2ASS6;A2ASS6-2;A2AT70;F7CR78",
"A2AUR7;Q9D031;Q01730"), Gene.names = c("Mpeg1", "Sep-09", "H6pd",
"P3h1;Lepre1", "Rpn2", "Magt1", "Clcc1", "Sars", "Mtch2", "Cd72",
"Pmpca", "Cntrl", "Agps", "Asph", "Slc44a1", "Capzb", "Cd44",
"Slc2a6", "Ttn", "Rsu1")), row.names = c(NA, 20L), class = "data.frame")
Thanks!