You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

36 lines
939 B
Python

9 months ago
from PIL import Image
import pytesseract
import re
from loguru import logger
'''
下载语言包
wget https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata
mkdir tessdata
mv chi_sim.traineddata tessdata
export TESSDATA_PREFIX=tessdata
'''
logger.add('huanbu.log', rotation='100 KB', level='DEBUG', compression='tar.gz')
logger.debug("That's it, beautiful and simple logging!")
def format_number(number, width):
return f'{number:0{width}}'
def extract_text(image_path):
image = Image.open(image_path)
text = pytesseract.image_to_string(image, lang='chi_sim')
text_without_spaces = re.sub(r'\s', '', text)
return text_without_spaces
if __name__ == "__main__":
imgpath='./huanbu/'
for num in range(1, 331):
fnum = format_number(num, 3)
img = f'{imgpath}{fnum}.png'
extracted_text = extract_text(img)
print(extracted_text)
logger.info(extracted_text)