From f5ec8f0d8bc3a99225b58fece87e59001c698bc2 Mon Sep 17 00:00:00 2001
From: Abigail Gold <me@kb6.ee>
Date: Sat, 19 Oct 2019 22:55:53 -0400
Subject: [PATCH] convert qrz lookup to use pure lxml instead of bs4

---
 cogs/qrzcog.py | 38 +++++++++++++++++++++-----------------
 1 file changed, 21 insertions(+), 17 deletions(-)

diff --git a/cogs/qrzcog.py b/cogs/qrzcog.py
index 2e0ddbe..a1b085b 100644
--- a/cogs/qrzcog.py
+++ b/cogs/qrzcog.py
@@ -8,12 +8,13 @@ General Public License, version 2.
 """
 from collections import OrderedDict
 from datetime import datetime
+from io import BytesIO
 
 import discord
 from discord.ext import commands, tasks
 
 import aiohttp
-from bs4 import BeautifulSoup
+from lxml import etree
 
 
 class QRZCog(commands.Cog):
@@ -38,12 +39,14 @@ class QRZCog(commands.Cog):
         async with self.session.get(url) as resp:
             if resp.status != 200:
                 raise ConnectionError(f'Unable to connect to QRZ (HTTP Error {resp.status})')
-            resp_xml = await resp.text()
+            resp_xml = etree.parse(BytesIO(await resp.read())).getroot()
 
-        xml_soup = BeautifulSoup(resp_xml, "xml")
-
-        resp_data = {tag.name: tag.contents[0] for tag in xml_soup.select('QRZDatabase Callsign *')}
-        resp_session = {tag.name: tag.contents[0] for tag in xml_soup.select('QRZDatabase Session *')}
+        resp_xml_data = resp_xml.xpath('/x:QRZDatabase/x:Callsign',
+                                                namespaces={'x':'http://xmldata.qrz.com'})
+        resp_data = {el.tag.split('}')[1]: el.text for el in resp_xml_data[0].getiterator()}
+        resp_xml_session = resp_xml.xpath('/x:QRZDatabase/x:Session',
+                                          namespaces={'x':'http://xmldata.qrz.com'})
+        resp_session = {el.tag.split('}')[1]: el.text for el in resp_xml_session[0].getiterator()}
         if 'Error' in resp_session:
             if 'Session Timeout' in resp_session['Error']:
                 await self.get_session()
@@ -87,15 +90,16 @@ async def qrz_login(user: str, passwd: str, session: aiohttp.ClientSession):
     async with session.get(url) as resp:
         if resp.status != 200:
             raise ConnectionError(f'Unable to connect to QRZ (HTTP Error {resp.status})')
-        resp_xml = await resp.text()
+        resp_xml = etree.parse(BytesIO(await resp.read())).getroot()
 
-    xml_soup = BeautifulSoup(resp_xml, "xml")
-    resp_data = {tag.name: tag.contents[0] for tag in xml_soup.select('QRZDatabase Session *')}
-    if 'Error' in resp_data:
-        raise ConnectionError(resp_data['Error'])
-    if resp_data['SubExp'] == 'non-subscriber':
+    resp_xml_session = resp_xml.xpath('/x:QRZDatabase/x:Session',
+                                      namespaces={'x':'http://xmldata.qrz.com'})
+    resp_session = {el.tag.split('}')[1]: el.text for el in resp_xml_session[0].getiterator()}
+    if 'Error' in resp_session:
+        raise ConnectionError(resp_session['Error'])
+    if resp_session['SubExp'] == 'non-subscriber':
         raise ConnectionError('Invalid QRZ Subscription')
-    return resp_data['Key']
+    return resp_session['Key']
 
 
 async def qrz_test_session(key: str, session: aiohttp.ClientSession):
@@ -103,11 +107,11 @@ async def qrz_test_session(key: str, session: aiohttp.ClientSession):
     async with session.get(url) as resp:
         if resp.status != 200:
             raise ConnectionError(f'Unable to connect to QRZ (HTTP Error {resp.status})')
-        resp_xml = await resp.text()
+        resp_xml = etree.parse(BytesIO(await resp.read())).getroot()
 
-    xml_soup = BeautifulSoup(resp_xml, "xml")
-
-    resp_session = {tag.name: tag.contents[0] for tag in xml_soup.select('QRZDatabase Session *')}
+    resp_xml_session = resp_xml.xpath('/x:QRZDatabase/x:Session',
+                                      namespaces={'x':'http://xmldata.qrz.com'})
+    resp_session = {el.tag.split('}')[1]: el.text for el in resp_xml_session[0].getiterator()}
     if 'Error' in resp_session:
         raise ConnectionError(resp_session['Error'])