Compare commits
No commits in common. "339195fda694e5dee7b2f8a827e32b722e54434b" and "b6daacca2d2aa203316b8668a1ec95c8656eddb7" have entirely different histories.
339195fda6
...
b6daacca2d
|
|
@ -102,8 +102,8 @@ docker-compose -p recipe-test down
|
|||
```
|
||||
|
||||
|
||||
Test are written in pytest framework. Currently focused on unittest and code
|
||||
coverage. Integration tests to come.
|
||||
Test are written in pytest framework. Currently focused on unittest.
|
||||
Integration tests to come.
|
||||
|
||||
To run test use:
|
||||
```sh
|
||||
|
|
|
|||
|
|
@ -10,6 +10,5 @@ dependencies = [
|
|||
"SQLAlchemy==1.4.39",
|
||||
"python-dotenv==0.20.0",
|
||||
"beautifulsoup4==4.11.1",
|
||||
"psycopg2-binary==2.9.3",
|
||||
"requests~=2.30.0"
|
||||
"psycopg2-binary==2.9.3"
|
||||
]
|
||||
|
|
@ -12,7 +12,6 @@ pyparsing==3.0.9
|
|||
pytest==7.1.3
|
||||
pytest-cov==4.0.0
|
||||
python-dotenv==0.20.0
|
||||
requests~=2.30.0
|
||||
soupsieve==2.3.2.post1
|
||||
SQLAlchemy==1.4.39
|
||||
tomli==2.0.1
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import re
|
|||
from sqlalchemy import select, desc, exists, not_, except_
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
import bs4
|
||||
import requests as req
|
||||
from urllib.request import urlopen
|
||||
from urllib.parse import urljoin
|
||||
import logging
|
||||
from argparse import ArgumentParser
|
||||
|
|
@ -73,22 +73,23 @@ def reparse_ingredients(session):
|
|||
|
||||
|
||||
|
||||
def load_page(recipe_url):
|
||||
def load_recipe(recipe_url):
|
||||
try:
|
||||
logging.info(f'Loading Page: {recipe_url}')
|
||||
with req.get(recipe_url) as f:
|
||||
if f.status_code == 404:
|
||||
raise Exception(f"Page does not exist (404): {recipe_url}")
|
||||
logging.info(f'Loading Recipe: {recipe_url}')
|
||||
with urlopen(recipe_url) as f:
|
||||
if f.getcode() == 404:
|
||||
raise Exception(f"Recipe Does not exist: {recipe_url}")
|
||||
return bs4.BeautifulSoup(f.read().decode(), 'html.parser')
|
||||
|
||||
except Exception as e:
|
||||
logging.warning(f"Could not download or parse recipe: {recipe_url}")
|
||||
logging.warning(e)
|
||||
|
||||
return None
|
||||
|
||||
def parse_recipe(session, recipe, site):
|
||||
recipe_url = urljoin(site.base_url, str(recipe.identifier))
|
||||
recipe_page = load_page(recipe_url)
|
||||
recipe_page = load_recipe(recipe_url)
|
||||
if not recipe_page:
|
||||
return None
|
||||
|
||||
|
|
@ -125,7 +126,7 @@ def parse_recipe(session, recipe, site):
|
|||
|
||||
return recipe
|
||||
|
||||
def main(): # pragma: no cover
|
||||
def main():
|
||||
parser = ArgumentParser(description="Scrape a recipe site for recipies")
|
||||
parser.add_argument('site',
|
||||
help='Name of site')
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ from bs4 import BeautifulSoup
|
|||
import pytest
|
||||
|
||||
|
||||
def test_load_page():
|
||||
def test_load_recipe():
|
||||
page = scrape.load_recipe("https://hs.andreistoica.ca:4943")
|
||||
assert type(page) == BeautifulSoup
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue