本文适用于 Rail 0.1 版本.
工作:输入Rial文件的路径,识别词元,输出实例列表.
是一边写代码一边写文章的,所以有时候改了原本的代码不一定会说.以思路为中心.
Rail是一种信息分布与细节构成的表示语言。详见参考文档.
关于本文的分析对象，参考逻辑行的类型.

从源文件到词元

这个Python脚本实现了文件的读取和词元拆分.以及缩进个数的识别.

def split_mul_ind(inp):word = ""line = [] result = []indent = 0indent_list = []ind_in = Truefor char in inp:if char =="\n":if word:line.append(word)word = ""if line:result.append(line)indent_list.append(indent)line = []indent = 0ind_in = Trueelif char == " ":if ind_in:indent += 1elif word:line.append(word)word = ""else:ind_in = Falseword += charif word: line.append(word)if line:result.append(line)indent_list.append(indent)return (result, indent_list)with (open("test.txt", 'r') as f):result, indent_list = split_mul_ind(f.read())def check(result, indent_list, n):print(f"[{n+1}] {result[n]}: {indent_list[n]}")

测试文件 test.txt 如下.包含了Rail语言的20种语义.


:
: *
*
* = *
< *
< * = *
< * = * ? *
* < *
* < * ! *
+ *
+ * = *
+ * < *
+ * < * ! *
> *
> * = *
> * < *
> * < * ! *
* >
# *

测试方法如下.

for i in range(19):check(result, indent_list, i)

运行结果如下.

[1] [':']: 0
[2] [':', '*']: 0
[3] ['*']: 0
[4] ['*', '=', '*']: 0
[5] ['<', '*']: 0
[6] ['<', '*', '=', '*']: 0
[7] ['<', '*', '=', '*', '?', '*']: 0
[8] ['*', '<', '*']: 0
[9] ['*', '<', '*', '!', '*']: 0
[10] ['+', '*']: 0
[11] ['+', '*', '=', '*']: 0
[12] ['+', '*', '<', '*']: 0
[13] ['+', '*', '<', '*', '!', '*']: 0
[14] ['>', '*']: 0
[15] ['>', '*', '=', '*']: 0
[16] ['>', '*', '<', '*']: 0
[17] ['>', '*', '<', '*', '!', '*']: 0
[18] ['*', '>']: 0
[19] ['#', '*']: 0

语句结构

除了空格会被直接忽略,可以为每种语句结构实现一个类,即实现19个类.

class PlaceHoder:def __init__(line):line.content = ""class Comment:def __init__(line, p1):line.content = (p1)class Element:def __init__(line, p1):line.content = (p1)class Rail:def __init__(line, p1, p2):line.content = (p1, p2)class ReferenceDefination1:def __init__(line, p1):line.content = (p1)class ReferenceDefination2:def __init__(line, p1, p2):line.content = (p1, p2)class ReferenceDefination3:def __init__(line, p1, p2, p3):line.content = (p1, p2, p3)class ReferenceImpletement1:def __init__(line, p1, p2):line.content = (p1)class ReferenceImpletement2:def __init__(line, p1, p2, p3):line.content = (p1, p2, p3)class Category1:def __init__(line, p1):line.content = (p1)class Category2:def __init__(line, p1, p2):line.content = (p1, p2)class Category3:def __init__(line, p1, p2):line.content = (p1, p2)class Category4:def __init__(line, p1, p2, p3):line.content = (p1, p2, p3)class AggregateDefination1:def __init__(line, p1):line.content = (p1)class AggregateDefination2:def __init__(line, p1, p2):line.content = (p1, p2)class AggregateDefination3:def __init__(line, p1, p2):line.content = (p1, p2)class AggregateDefination4:def __init__(line, p1, p2, p3):line.content = (p1, p2, p3)class AggregateImpletement1:def __init__(line, p1, p2):line.content = (p1)class Import:def __init__(line, p1):line.content = (p1)

然后呢,要给这些行创建实例.
我看这个长度是固定的,能到6,所以做个分支结构敷衍了事,要改以后该.
先做个骨架.

def get_ins(line_list):lenth = len(line_list)if lenth == 1:...elif lenth == 2:...elif lenth == 3:...elif lenth == 4:...elif lenth == 5:...elif lenth == 6:...

然后照着参考文档一个一个匹配至实例.

def get_ins(line_list):ins = []for line in line_list:lenth = len(line)if lenth == 1:if line[0] == ':':ins.append(PlaceHoder())else:ins.append(Element(line[0]))elif lenth == 2:if line[0] == ':':ins.append(Comment(line[1]))elif line[0] == '<':ins.append(ReferenceDefination1(line[1]))elif line[0] == '>':ins.append(AggregateDefination1(line[1]))elif line[0] == '+':ins.append(Category1(line[1]))elif line[0] == '#':ins.append(Import(line[1]))else:ins.append(AggregateImpletement(line[0]))elif lenth == 3:if line[1] == "=":ins.append(Rail(line[0], line[2]))else:ins.append(ReferenceImpletement1(line[0], line[2]))elif lenth == 4:if line[0] == '+':if line[2] == '=':ins.append(Category2(line[1], line[3]))else:ins.append(Category3(line[1], line[3]))elif line[0] == '>':if line[2] == '=':ins.append(AggregateDefination2(line[1], line[3]))else:ins.append(AggregateDefination3(line[1], line[3]))else:ins.append(ReferenceDefination2(line[1], line[3]))elif lenth == 5:ins.append(ReferenceImpletement2(line[0], line[2], line[4]))elif lenth == 6:if line[0] == '<':ins.append(ReferenceDefination3(line[1], line[3], line[5]))elif line[0] == '+':ins.append(Category4(line[1], line[3], line[5]))else:ins.append(AggregateDefination4(line[1], line[3], line[5]))return ins

测试

使用之前构造的词元列表进行检查.

INS = get_ins(result)
for line in INS:print(line)

测试结果与预期一致.
在这里插入图片描述
本文的最终代码如下.

def split_mul_ind(inp):word = ""line = [] result = []indent = 0indent_list = []ind_in = Truefor char in inp:if char =="\n":if word:line.append(word)word = ""if line:result.append(line)indent_list.append(indent)line = []indent = 0ind_in = Trueelif char == " ":if ind_in:indent += 1elif word:line.append(word)word = ""else:ind_in = Falseword += charif word: line.append(word)if line:result.append(line)indent_list.append(indent)return (result, indent_list)with (open("test.txt", 'r') as f):result, indent_list = split_mul_ind(f.read())def check(result, indent_list, n):print(f"{n+1} | {result[n]}: {indent_list[n]}")for i in range(19):check(result, indent_list, i)def __init__(line):line.content = ""class PlaceHoder:def __init__(line):passclass Comment:def __init__(line, p1):line.content = (p1)class Element:def __init__(line, p1):line.content = (p1)class Rail:def __init__(line, p1, p2):line.content = (p1, p2)class ReferenceDefination1:def __init__(line, p1):line.content = (p1)class ReferenceDefination2:def __init__(line, p1, p2):line.content = (p1, p2)class ReferenceDefination3:def __init__(line, p1, p2, p3):line.content = (p1, p2, p3)class ReferenceImpletement1:def __init__(line, p1, p2):line.content = (p1, p2)class ReferenceImpletement2:def __init__(line, p1, p2, p3):line.content = (p1, p2, p3)class Category1:def __init__(line, p1):line.content = (p1)class Category2:def __init__(line, p1, p2):line.content = (p1, p2)class Category3:def __init__(line, p1, p2):line.content = (p1, p2)class Category4:def __init__(line, p1, p2, p3):line.content = (p1, p2, p3)class AggregateDefination1:def __init__(line, p1):line.content = (p1)class AggregateDefination2:def __init__(line, p1, p2):line.content = (p1, p2)class AggregateDefination3:def __init__(line, p1, p2):line.content = (p1, p2)class AggregateDefination4:def __init__(line, p1, p2, p3):line.content = (p1, p2, p3)class AggregateImpletement:def __init__(line, p1):line.content = (p1)class Import:def __init__(line, p1):line.content = (p1)def get_ins(line_list):ins = []for line in line_list:lenth = len(line)if lenth == 1:if line[0] == ':':ins.append(PlaceHoder())else:ins.append(Element(line[0]))elif lenth == 2:if line[0] == ':':ins.append(Comment(line[1]))elif line[0] == '<':ins.append(ReferenceDefination1(line[1]))elif line[0] == '>':ins.append(AggregateDefination1(line[1]))elif line[0] == '+':ins.append(Category1(line[1]))elif line[0] == '#':ins.append(Import(line[1]))else:ins.append(AggregateImpletement(line[0]))elif lenth == 3:if line[1] == "=":ins.append(Rail(line[0], line[2]))else:ins.append(ReferenceImpletement1(line[0], line[2]))elif lenth == 4:if line[0] == '+':if line[2] == '=':ins.append(Category2(line[1], line[3]))else:ins.append(Category3(line[1], line[3]))elif line[0] == '>':if line[2] == '=':ins.append(AggregateDefination2(line[1], line[3]))else:ins.append(AggregateDefination3(line[1], line[3]))else:ins.append(ReferenceDefination2(line[1], line[3]))elif lenth == 5:ins.append(ReferenceImpletement2(line[0], line[2], line[4]))elif lenth == 6:if line[0] == '<':ins.append(ReferenceDefination3(line[1], line[3], line[5]))elif line[0] == '+':ins.append(Category4(line[1], line[3], line[5]))else:ins.append(AggregateDefination4(line[1], line[3], line[5]))return insINS = get_ins(result)
for line in INS:print(line)