from PIL import Image import pytesseract import re from loguru import logger ''' 下载语言包 wget https://github.com/tesseract-ocr/tessdata/raw/main/chi_sim.traineddata mkdir tessdata mv chi_sim.traineddata tessdata export TESSDATA_PREFIX=tessdata ''' logger.add('huanbu.log', rotation='100 KB', level='DEBUG', compression='tar.gz') logger.debug("That's it, beautiful and simple logging!") def format_number(number, width): return f'{number:0{width}}' def extract_text(image_path): image = Image.open(image_path) text = pytesseract.image_to_string(image, lang='chi_sim') text_without_spaces = re.sub(r'\s', '', text) return text_without_spaces if __name__ == "__main__": imgpath='./huanbu/' for num in range(1, 331): fnum = format_number(num, 3) img = f'{imgpath}{fnum}.png' extracted_text = extract_text(img) print(extracted_text) logger.info(extracted_text)