根据输入地址,利用已有的地址编码文件,构造处理规则策略识别地址的编码。

lib/address.json 地址编码文件(这个文件太大,博客里放不下,需要的话可以到 gitcode 仓库获取:https://gitcode.com/TomorrowAndTuture/address_code)

{"110000000000": {"province_name": "北京市","city_datas": {"110100000000": {"city_name": "市辖区","district_datas": {"110101000000": {"district_name": "东城区","town_datas": {"110101001000": {"town_name": "东华门街道","village_datas": {"110101001001": "多福巷社区居委会","110101001002": "银闸社区居委会","110101001005": "东厂社区居委会","110101001006": "智德社区居委会",
...

main.py

根据输入的详细地址,返回该地址的地址编码(会尽可能查找到更详尽的编码)

import json
import re
import traceback
from typing import Dict
import logging
import os
from logging import handlersdef _logging(**kwargs):level = kwargs.pop('level', logging.DEBUG)filename = kwargs.pop('filename', 'default.log')datefmt = kwargs.pop('datefmt', '%Y-%m-%d %H:%M:%S')format = kwargs.pop('format', '[%(asctime)s,%(msecs)d][%(module)s][%(levelname)s] %(lineno)d - %(message)s')log = logging.getLogger(filename)format_str = logging.Formatter(format, datefmt)th = handlers.TimedRotatingFileHandler(filename=filename, when='MIDNIGHT', backupCount=30, encoding="utf-8")th.suffix = "%Y%m%d.log"th.extMatch = re.compile(r"^\d{4}\d{2}\d{2}(\.\w+)?$", re.ASCII)th.setFormatter(format_str)th.setLevel(level)log.addHandler(th)log.setLevel(level)return logroot_dir = os.path.dirname(os.path.abspath(__file__))
lib_dir = os.path.join(root_dir, "lib")
logs_dir = os.path.join(root_dir, "logs")os.makedirs(logs_dir, exist_ok=True)
os.makedirs(lib_dir, exist_ok=True)
logger = _logging(filename="./logs/address.log")address_file_path = os.path.join(lib_dir, "address.json")
# province_file_path = os.path.join(lib_dir, "province.json")  # 省份
# city_file_path = os.path.join(lib_dir, "city.json")  # 城市
# district_file_path = os.path.join(lib_dir, "district.json")  # 区县
# town_file_path = os.path.join(lib_dir, "town.json")  # 乡镇
# village_file_path = os.path.join(lib_dir, "village.json")  # 村镇class AddressHandler:def __init__(self):self.address_datas: Dict[str] = {}self.province_dict: Dict[str, str] = {}self.city_dict: Dict[str, str] = {}self.district_dict: Dict[str, str] = {}self.town_dict: Dict[str, str] = {}self.village_dict: Dict[str, str] = {}self.load_datas()def load_datas(self):logger.info(f"load address data ...")self.address_datas = json.load(open(address_file_path, encoding='utf-8'))for province_code in self.address_datas:province_info = self.address_datas[province_code]province_name = province_info["province_name"]self.province_dict[province_code] = province_name  # 获取省份编码映射city_datas = province_info["city_datas"]for city_code in city_datas:city_info = city_datas[city_code]city_name = city_info["city_name"]self.city_dict[city_code] = city_name  # 获取城市编码映射district_datas = city_info["district_datas"]for district_code in district_datas:district_info = district_datas[district_code]district_name = district_info["district_name"]self.district_dict[district_code] = district_name  # 获取区县编码映射town_datas = district_info["town_datas"]for town_code in town_datas:town_info = town_datas[town_code]town_name = town_info["town_name"]self.town_dict[town_code] = town_name  # 获取区县编码映射village_datas = town_info["village_datas"]for village_code in village_datas:village_name = village_datas[village_code]self.village_dict[village_code] = village_namedef get_province_info(self, address: str):for code, name in self.province_dict.items():tmp_name = str(name).replace("省", "").replace("市", "").replace("自治区", "").replace("维吾尔", "").replace("壮族", "").replace("回族", "")if address.startswith(tmp_name):return code, namereturn '', ''def get_city_info(self, address: str, province_code: str = ""):if province_code:code_prefix = province_code[:2]for code, name in self.city_dict.items():tmp_name = str(name).replace("市", "").replace("自治州", "")if tmp_name in address and code.startswith(code_prefix):return code, nameelse:for code, name in self.city_dict.items():tmp_name = str(name).replace("市", "").replace("自治州", "")if tmp_name in address:return code, namereturn '', ''def get_district_info(self, address: str, province_code: str):code_prefix = province_code[:2]for code, name in self.province_dict.items():if name in address and code.startswith(code_prefix):return code, namereturn '', ''@staticmethoddef replace_folk(town: str):folks = ["回族", "满族", "蒙古族", "俄罗斯族", "朝鲜族", "傈僳族", "锡伯族", "达斡尔族", "柯尔克孜族", "鄂伦春族","畲族", "土家族", "侗族", "瑶族", "苗族", "维吾尔族", "白族", "壮族", "仫佬族", "仡佬族", "彝族", "藏族","羌族", "傣族", "纳西族", "白族", "水族", "毛南族", "普米族", "哈尼族", "佤族", "拉祜族", "德昂族", "布朗族","基诺族", "阿昌族", "怒族", "东乡族", "土族", "哈萨克族", "塔吉克族"]for folk in folks:town = town.replace(folk, "")return towndef get_district_town_village_info(self, district_datas, tmp_split_after_city_address, city_code):district_code = ""district_name = ""town_code = ""town_name = ""village_code = ""village_name = ""logger.info(f"递进查询区县、乡镇和村镇信息")for k1, v1 in district_datas.items():base_district_name = v1["district_name"]if not base_district_name:continuetmp_base_district_name = base_district_nameif tmp_base_district_name in tmp_split_after_city_address and k1.startswith(city_code[:4]):district_code = k1district_name = base_district_name# 获取区县后部分地址tmp_split_after_district_address = tmp_split_after_city_address.split(tmp_base_district_name, 1)[-1]town_datas = v1.get("town_datas", {})for k2, v2 in town_datas.items():base_town_name = v2["town_name"]tmp_base_town_name = base_town_nametmp_base_town_name = self.replace_folk(tmp_base_town_name)if len(tmp_base_town_name) <= 1:tmp_base_town_name = base_town_nameif tmp_base_town_name in tmp_split_after_district_address and k2.startswith(district_code[:6]):town_code = k2town_name = base_town_name# 获取乡镇后部分地址tmp_split_after_town_address = tmp_split_after_district_address.split(tmp_base_town_name, 1)[-1]village_datas = v2.get("village_datas", {})for k3, v3 in village_datas.items():base_village_name = str(3)# 去掉额外字符,提高村镇识别精度tmp_base_village_name = base_village_name \.replace("村村民委员会", "村") \.replace("村民委员会", "村") \.replace("村委会", "村") \.replace("村村民居委会", "村") \.replace("社区居委会", "社区") \.replace("居民委员会", "") \.replace("居委会", "") \.replace("委员会", "") \.replace("委会", "")if len(tmp_base_village_name) <= 1:tmp_base_village_name = base_village_nameif tmp_base_village_name in tmp_split_after_town_address and k3.startswith(town_code[:9]):village_code = k3village_name = base_village_namereturn district_name, district_code, town_name, town_code, village_name, village_codereturn district_name, district_code, town_name, town_code, village_name, village_codedef handle_address(self, address: str):status_code = 1info = "success"address_code = "000000000000"logger.info(f"地址:{address}")province_code, province_name = self.get_province_info(address)if not province_code:logger.info("未查询到省份信息,先略过省份查询,优先查询城市信息")city_code, city_name = self.get_city_info(address_code)if city_code:province_code = city_code[:2] + '0' * 10province_name = self.province_dict.get(province_code)logger.info(f"优先查询到城市:{city_name}")else:info = "省份和城市信息均未查到"logger.info(info)return status_code, info, address_codelogger.info(f"解析到省份:{province_name}")address_code = province_codeprovince_datas = self.address_datas.get(province_code, {})city_datas = province_datas.get("city_datas", {})city_code, city_name = self.get_city_info(address, province_code)if not city_code:logger.info(f"未查询到城市信息,先略过城市查询,优先查询区县信息")district_code, district_name = self.get_district_info(address, province_code)if district_code:city_code = district_code[:4] + '0' * 8city_name = self.city_dict.get(city_code)logger.info(f"优先查询到曲线:{district_name}")else:info = "城市和区县均未查到"logger.info(info)return status_code, info, address_codelogger.info(f"解析到城市:{city_name}")tmp_city_name = city_name.replace("市", "").replace("自治州", "").replace("地区", "")address_code = city_codetmp_split_after_city_address = address.split(tmp_city_name, 1)[-1]district_datas = city_datas.get(city_code, {}).get("district_datas", {})district_name, district_code, town_name, town_code, village_name, village_code = self.get_district_town_village_info(district_datas, tmp_split_after_city_address, city_code)logger.info(f"区县:{district_name or None};乡镇:{town_name or None};村镇:{village_name or None}")# 如果递进未匹配到乡镇,则再正则匹配查找if town_name == "":logger.info(f"乡镇信息未查到,继续使用正则表达式再次匹配查找")address_match_pattern = r"(.*?省|.*?自治区|.*?市)?(.*?市|.*?自治州)?(.*?区|.*?县)?(.*?镇|.*?乡|.*?街道|.*?街|.*?办事处)?(.*?村|.*?社区|.*?路)?"search_obj = re.search(address_match_pattern, address)if search_obj:match_town_name = search_obj.group(4)match_village_name = search_obj.group(5)if match_town_name and town_name == "":town_name = match_town_nameif match_village_name and village_name == "":village_name = match_village_namelogger.info(f"正则匹配到乡镇和村镇信息:{match_town_name};{match_village_name}")# 将正则匹配到的乡镇和村镇与编码表中的数据进行比对查找if match_town_name:for code, name in self.town_dict.items():if not name:continueif district_code:if not code[0:6] == district_code[0:6]:continueelse:if not code[0:4] == city_code[0:4]:continuetmp_name = nameif len(tmp_name) <= 1:tmp_name = nameif tmp_name in match_town_name or match_town_name in tmp_name:logger.info(f"正则查找到乡镇信息:{name}")town_code = codetown_name = namebreakif match_village_name:for code, name in self.village_dict.items():if not name:continueif town_code:if not code[0:9] == town_code[0:9]:continueelif district_code:if not code[0:6] == district_code[0:6]:continueelse:if not code[0:4] == city_code[0:4]:continuetmp_name = name \.replace("村村民委员会", "村") \.replace("村民委员会", "村") \.replace("村委会", "村") \.replace("村村民居委会", "村") \.replace("社区居委会", "社区") \.replace("居民委员会", "") \.replace("居委会", "") \.replace("委员会", "") \.replace("委会", "")if len(tmp_name) <= 1:tmp_name = nameif tmp_name in match_village_name or match_village_name in tmp_name:logger.info(f"正则查找到村镇信息:{name}")village_code = codevillage_name = namebreak# 反向替换if not town_code and village_code:town_code = village_code[0:9] + "0" * 3town_name = self.town_dict.get(town_code)logger.info(f"反向查找到乡镇:{town_name}")if not district_code and town_code:district_code = town_code[0:6] + "0" * 6district_name = self.district_dict.get(district_code)logger.info(f"反向查找到区县:{district_name}")address_code = village_code or town_code or district_code or city_code or address_codelogger.info(f"{province_name}: {province_code}, {city_name}: {city_code}, {district_name}: {district_code}, {town_name}: {town_code}, {village_name}: {village_code}")return status_code, info, address_codeaddress_handle = AddressHandler()def process(address):logger.info("========== start process ==========")output_data = {"code": -1,"info": "fail","address_code": "",}try:status_code, info, address_code = address_handle.handle_address(address)output_data["code"] = status_codeoutput_data["info"] = infooutput_data["address_code"] = address_codeexcept Exception as e:logger.error(traceback.format_exc())output_data["code"] = -1output_data["info"] = f"fail: {e}"finally:logger.info(f"output_data: {output_data}")logger.info("========== end process ==========")return output_dataif __name__ == '__main__':address_list = ["四川省遂宁高新区宝升镇插板堰村", "山东省淄博市临淄区齐都镇安合村委会", ""]for address in address_list:output = process(address)print(output)# {'code': 1, 'info': 'success', 'address_code': '510900000000'}# {'code': 1, 'info': 'success', 'address_code': '370305100000'}# {'code': 1, 'info': '省份和城市信息均未查到', 'address_code': '000000000000'}

本文来自互联网用户投稿,该文观点仅代表作者本人,不代表本站立场。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。
如若转载,请注明出处:http://www.pswp.cn/web/89713.shtml
繁体地址,请注明出处:http://hk.pswp.cn/web/89713.shtml
英文地址,请注明出处:http://en.pswp.cn/web/89713.shtml

如若内容造成侵权/违法违规/事实不符,请联系英文站点网进行投诉反馈email:809451989@qq.com,一经查实,立即删除!

相关文章

kafka的部署

目录 一、kafka简介 1.1、概述 1.2、消息系统介绍 1.3、点对点消息传递模式 1.4、发布-订阅消息传递模式 二、kafka术语解释 2.1、结构概述 2.2、broker 2.3、topic 2.4、producer 2.5、consumer 2.6、consumer group 2.7、leader 2.8、follower 2.9、partition…

小语种OCR识别技术实现原理

小语种OCR&#xff08;光学字符识别&#xff09;技术的实现原理涉及计算机视觉、自然语言处理&#xff08;NLP&#xff09;和深度学习等多个领域的融合&#xff0c;其核心目标是让计算机能够准确识别并理解不同语言的印刷或手写文本。以下是其关键技术实现原理的详细解析&#…

GPT:让机器拥有“创造力”的语言引擎

当ChatGPT写出莎士比亚风格的十四行诗&#xff0c;当GitHub Copilot自动生成编程代码&#xff0c;背后都源于同一项革命性技术——**GPT&#xff08;Generative Pre-trained Transformer&#xff09;**。今天&#xff0c;我们将揭开这项“语言魔术”背后的科学原理&#xff01;…

LeetCode|Day19|14. 最长公共前缀|Python刷题笔记

LeetCode&#xff5c;Day19&#xff5c;14. 最长公共前缀&#xff5c;Python刷题笔记 &#x1f5d3;️ 本文属于【LeetCode 简单题百日计划】系列 &#x1f449; 点击查看系列总目录 >> &#x1f4cc; 题目简介 题号&#xff1a;14. 最长公共前缀 难度&#xff1a;简单…

安全事件响应分析--基础命令

----万能密码oror1 or # 1or11 1 or 11安全事件响应分析------***windoes***------方法开机启动有无异常文件 【开始】➜【运行】➜【msconfig】文件排查 各个盘下的temp(tmp)相关目录下查看有无异常文件 &#xff1a;Windows产生的 临时文件 可以通过查看日志且通过筛…

基于C#+SQL Server实现(Web)学生选课管理系统

学生选课管理系统的设计与开发一、项目背景学生选课管理系统是一个学校不可缺少的部分&#xff0c;传统的人工管理档案的方式存在着很多的缺点&#xff0c;如&#xff1a;效率低、保密性差等&#xff0c;所以开发一套综合教务系统管理软件很有必要&#xff0c;它应该具有传统的…

垃圾回收(GC)

内存管理策略&#xff0c;在业务进程运行的过程中&#xff0c;由垃圾收集器以类似守护协程的方式在后台运行&#xff0c;按照指定策略回收不再被使用的对象&#xff0c;释放内存空间进行回收 优势&#xff1a; 屏蔽内存回收的细节&#xff1a;屏蔽复杂的内存管理工作&#xff0…

Datawhale AI夏令营-机器学习

比赛简介 「用户新增预测挑战赛」是由科大讯飞主办的一项数据科学竞赛&#xff0c;旨在通过机器学习方法预测用户是否为新增用户 比赛属于二分类任务&#xff0c;评价指标采用F1分数&#xff0c;分数越高表示模型性能越好。 如果你有一份带标签的表格型数据&#xff0c;只要…

Spring IOC容器在Web环境中是如何启动的(源码级剖析)?

文章目录一、Web 环境中的 Spring MVC 框架二、Web 应用部署描述配置传统配置&#xff08;web.xml&#xff09;&#xff1a;Java配置类&#xff08;Servlet 3.0&#xff09;&#xff1a;三、核心启动流程详解1. 启动流程图2. ★容器初始化入口&#xff1a;ContextLoaderListene…

18个优质Qt开源项目汇总

1&#xff0c;Clementine Music Player Clementine Music Player 是一个功能完善、跨平台的开源音乐播放器&#xff0c;非常适合用于学习如何开发媒体类应用&#xff0c;尤其是跨平台桌面应用。它基于 Qt 框架开发&#xff0c;支持多种操作系统&#xff0c;包括 Windows、macO…

计算机视觉:AI 的 “眼睛” 如何看懂世界?

1. 什么是计算机视觉&#xff1a;让机器 “看见” 并 “理解” 的技术1.1 计算机视觉的核心目标计算机视觉&#xff08;CV&#xff09;是人工智能的一个重要分支&#xff0c;它让计算机能够 “看懂” 图像和视频 —— 不仅能捕捉像素信息&#xff0c;还能分析内容、提取语义&am…

华为OD刷题记录

华为OD刷题记录 刷过的题 入门 1、进制 2、NC61 doing 订阅专栏

QT学习教程(二十五)

双缓冲技术&#xff08;Double Buffering&#xff09;&#xff08; 2、公有函数实现&#xff09;#include <QtGui> #include <cmath> using namespace std; #include "plotter.h"以上代码为文件的开头&#xff0c;在这里把std 的名空间加入到当前的全…

设计模式笔记_结构型_装饰器模式

1.装饰器模式介绍装饰器模式是一种结构型设计模式&#xff0c;允许你动态地给对象添加行为&#xff0c;而无需修改其代码。它的核心思想是将对象放入一个“包装器”中&#xff0c;这个包装器提供了额外的功能&#xff0c;同时保持原有对象的接口不变。想象一下&#xff0c;你有…

day25 力扣90.子集II 力扣46.全排列 力扣47.全排列 II

子集II给你一个整数数组 nums &#xff0c;找出并返回所有该数组中不同的递增子序列&#xff0c;递增子序列中 至少有两个元素 。你可以按 任意顺序 返回答案。数组中可能含有重复元素&#xff0c;如出现两个整数相等&#xff0c;也可以视作递增序列的一种特殊情况。示例 1&…

Solidity 中的`bytes`

在 Solidity 中&#xff0c;bytes 和 bytes32 都是用来保存二进制数据的类型&#xff0c;但它们的长度、使用场景、Gas 成本完全不同。✅ 一句话区分类型一句话总结bytes32定长 32 字节&#xff0c;适合做哈希、地址、标识符等固定长度数据。bytes动态长度字节数组&#xff0c;…

初学者STM32—PWM驱动电机与舵机

一、简介 上一节课主要学习了输出比较和PWM的基本原理和结构&#xff0c;本节课就主要以实践为主通过STM32最小系统板和驱动器控制舵机和直流电机。 上一节课的坐标 初学者STM32—输出比较与PWM-CSDN博客 二、舵机 舵机是一种根据输入PWM信号占空比来控制输出角度的装置 输…

C++中的异常处理机制:try-catch

一、基本概念 异常&#xff08;Exception&#xff09;&#xff1a;程序执行过程中发生的非正常情况&#xff0c;比如除以零、访问越界、内存不足等。 异常处理&#xff08;Exception Handling&#xff09;&#xff1a;对异常情况进行捕获、分析&#xff0c;并采取补救措施&…

如何从 Windows 11 或 10 远程访问 Ubuntu 24.04 或 22.04 桌面

了解如何使用 RDP(远程桌面协议)从 Windows 11 或 10 远程连接 Ubuntu 24.04 Noble 或 22.04 LTS Jammy JellyFish 桌面的步骤。 Windows 提供了一个便捷的功能,称为远程桌面连接,它使用 RDP 协议来远程连接 PC。当从 Windows 系统建立远程桌面连接时,使用起来非常简单,…

Linux 服务器中,Tab 键自动补全功能失效

在 Linux 服务器中&#xff0c;Tab 键自动补全功能失效通常与 bash-completion 组件缺失或配置异常有关。以下是解决问题的两个关键 YUM 指令及操作步骤&#xff1a;1. 安装 bash-completion 组件 sudo yum install -y bash-completion说明&#xff1a; bash-completion 是提供…