263 lines
5.3 KiB
Python
263 lines
5.3 KiB
Python
iflytek_labels={ # 中文标签对应的ID
|
||
"打车":0,
|
||
"美颜":100,
|
||
"影像剪辑":101,
|
||
"摄影修图":102,
|
||
"相机":103,
|
||
"绘画":104,
|
||
"二手":105,
|
||
"电商":106,
|
||
"团购":107,
|
||
"外卖":108,
|
||
"电影票务":109,
|
||
"社区服务":10,
|
||
"社区超市":110,
|
||
"购物咨询":111,
|
||
"笔记":112,
|
||
"办公":113,
|
||
"日程管理":114,
|
||
"女性":115,
|
||
"经营":116,
|
||
"收款":117,
|
||
"其他":118,
|
||
"薅羊毛":11,
|
||
"魔幻":12,
|
||
"仙侠":13,
|
||
"卡牌":14,
|
||
"飞行空战":15,
|
||
"射击游戏":16,
|
||
"休闲益智":17,
|
||
"动作类":18,
|
||
"体育竞技":19,
|
||
"地图导航":1,
|
||
"棋牌中心":20,
|
||
"经营养成":21,
|
||
"策略":22,
|
||
"MOBA":23,
|
||
"辅助工具":24,
|
||
"约会社交":25,
|
||
"即时通讯":26,
|
||
"工作社交":27,
|
||
"论坛圈子":28,
|
||
"婚恋社交":29,
|
||
"免费WIFI":2,
|
||
"情侣社交":30,
|
||
"社交工具":31,
|
||
"生活社交":32,
|
||
"微博博客":33,
|
||
"新闻":34,
|
||
"漫画":35,
|
||
"小说":36,
|
||
"技术":37,
|
||
"教辅":38,
|
||
"问答交流":39,
|
||
"租车":3,
|
||
"搞笑":40,
|
||
"杂志":41,
|
||
"百科":42,
|
||
"影视娱乐":43,
|
||
"求职":44,
|
||
"兼职":45,
|
||
"视频":46,
|
||
"短视频":47,
|
||
"音乐":48,
|
||
"直播":49,
|
||
"同城服务":4,
|
||
"电台":50,
|
||
"K歌":51,
|
||
"成人":52,
|
||
"中小学":53,
|
||
"职考":54,
|
||
"公务员":55,
|
||
"英语":56,
|
||
"视频教育":57,
|
||
"高等教育":58,
|
||
"成人教育":59,
|
||
"快递物流":5,
|
||
"艺术":60,
|
||
"语言(非英语)":61,
|
||
"旅游资讯":62,
|
||
"综合预定":63,
|
||
"民航":64,
|
||
"铁路":65,
|
||
"酒店":66,
|
||
"行程管理":67,
|
||
"民宿短租":68,
|
||
"出国":69,
|
||
"婚庆":6,
|
||
"工具":70,
|
||
"亲子儿童":71,
|
||
"母婴":72,
|
||
"驾校":73,
|
||
"违章":74,
|
||
"汽车咨询":75,
|
||
"汽车交易":76,
|
||
"日常养车":77,
|
||
"行车辅助":78,
|
||
"租房":79,
|
||
"家政":7,
|
||
"买房":80,
|
||
"装修家居":81,
|
||
"电子产品":82,
|
||
"问诊挂号":83,
|
||
"养生保健":84,
|
||
"医疗服务":85,
|
||
"减肥瘦身":86,
|
||
"美妆美业":87,
|
||
"菜谱":88,
|
||
"餐饮店":89,
|
||
"公共交通":8,
|
||
"体育咨讯":90,
|
||
"运动健身":91,
|
||
"支付":92,
|
||
"保险":93,
|
||
"股票":94,
|
||
"借贷":95,
|
||
"理财":96,
|
||
"彩票":97,
|
||
"记账":98,
|
||
"银行":99,
|
||
"政务":9,
|
||
}
|
||
|
||
print("1.length of iflytek_labels:",len(iflytek_labels))
|
||
|
||
label_des2tag={ # 将中文标签,对应到两个汉字,方便固定字符数的mask操作
|
||
'银行':'银行',
|
||
'社区服务':'社区',
|
||
'电商':'电商',
|
||
'支付':'支付',
|
||
'经营养成':'养成',
|
||
'卡牌':'卡牌',
|
||
'借贷':'借贷',
|
||
'驾校':'驾校',
|
||
'理财':'理财',
|
||
'职考':'职考',
|
||
'新闻':'新闻',
|
||
'旅游资讯':'旅游',
|
||
'公共交通':'交通',
|
||
'魔幻':'魔幻',
|
||
'医疗服务':'医疗',
|
||
'影像剪辑':'影像',
|
||
'动作类':'动作',
|
||
'工具':'工具',
|
||
'体育竞技':'体育',
|
||
'小说':'小说',
|
||
'运动健身':'运动',
|
||
'相机':'相机',
|
||
'辅助工具':'辅助',
|
||
'快递物流':'快递',
|
||
'高等教育':'教育',
|
||
'股票':'股票',
|
||
'菜谱':'菜谱',
|
||
'行车辅助':'行车',
|
||
'仙侠':'仙侠',
|
||
'亲子儿童':'亲子',
|
||
'购物咨询':'购物',
|
||
'射击游戏':'射击',
|
||
'漫画':'漫画',
|
||
'中小学':'小学',
|
||
'同城服务':'同城',
|
||
'成人教育':'成人',
|
||
'求职':'求职',
|
||
'电子产品':'电子',
|
||
'艺术':'艺术',
|
||
'薅羊毛':'赚钱',
|
||
'约会社交':'约会',
|
||
'经营':'经营',
|
||
'兼职':'兼职',
|
||
'短视频':'短视',
|
||
'音乐':'音乐',
|
||
'英语':'英语',
|
||
'棋牌中心':'棋牌',
|
||
'摄影修图':'摄影',
|
||
'养生保健':'养生',
|
||
'办公':'办公',
|
||
'政务':'政务',
|
||
'视频':'视频',
|
||
'论坛圈子':'论坛',
|
||
'彩票':'彩票',
|
||
'直播':'直播',
|
||
'其他':'其他',
|
||
'休闲益智':'休闲',
|
||
'策略':'策略',
|
||
'即时通讯':'通讯',
|
||
'汽车交易':'买车',
|
||
'违章':'违章',
|
||
'地图导航':'地图',
|
||
'民航':'民航',
|
||
'电台':'电台',
|
||
'语言(非英语)':'语言',
|
||
'搞笑':'搞笑',
|
||
'婚恋社交':'婚恋',
|
||
'社区超市':'超市',
|
||
'日常养车':'养车',
|
||
'杂志':'杂志',
|
||
'视频教育':'在线',
|
||
'家政':'家政',
|
||
'影视娱乐':'影视',
|
||
'装修家居':'装修',
|
||
'体育咨讯':'资讯',
|
||
'社交工具':'社交',
|
||
'餐饮店':'餐饮',
|
||
'美颜':'美颜',
|
||
'问诊挂号':'挂号',
|
||
'飞行空战':'飞行',
|
||
'综合预定':'预定',
|
||
'电影票务':'票务',
|
||
'笔记':'笔记',
|
||
'买房':'买房',
|
||
'外卖':'外卖',
|
||
'母婴':'母婴',
|
||
'打车':'打车',
|
||
'情侣社交':'情侣',
|
||
'日程管理':'日程',
|
||
'租车':'租车',
|
||
'微博博客':'博客',
|
||
'百科':'百科',
|
||
'绘画':'绘画',
|
||
'铁路':'铁路',
|
||
'生活社交':'生活',
|
||
'租房':'租房',
|
||
'酒店':'酒店',
|
||
'保险':'保险',
|
||
'问答交流':'问答',
|
||
'收款':'收款',
|
||
'MOBA':'竞技',
|
||
'K歌':'唱歌',
|
||
'技术':'技术',
|
||
'减肥瘦身':'减肥',
|
||
'工作社交':'工作',
|
||
'团购':'团购',
|
||
'记账':'记账',
|
||
'女性':'女性',
|
||
'公务员':'公务',
|
||
'二手':'二手',
|
||
'美妆美业':'美妆',
|
||
'汽车咨询':'汽车',
|
||
'行程管理':'行程',
|
||
'免费WIFI':'免费',
|
||
'教辅':'教辅',
|
||
'成人':'两性',
|
||
'出国':'出国',
|
||
'婚庆':'婚庆',
|
||
'民宿短租':'民宿'}
|
||
|
||
# label_des2tag={'银行':'银行','社区服务':'社区','电商':'电商'}
|
||
# iflytek_labels={"打车":0,"美颜":100,"影像剪辑":101,"摄影修图":102}
|
||
|
||
|
||
label_des2tag_reverse={v:k for k,v in label_des2tag.items()}
|
||
print("length of :",len(label_des2tag),";length of label_des2tag_reverse:",len(label_des2tag_reverse))
|
||
|
||
#print("2.length of label_des2tag:",len(label_des2tag))
|
||
|
||
# 得到两个字的标签的列表
|
||
label_twoword_list=[v for k,v in label_des2tag.items()] # O.K.
|
||
print("label_twoword_list:\n",label_twoword_list)
|
||
|
||
# 得到两个字的标签的列表对应的常规标签和对应的序列信息
|
||
label2seqId_dict={label_des2tag_reverse[element]:seq_id for seq_id,element in enumerate(label_twoword_list)}
|
||
print("label2seqId_dict:\n",label2seqId_dict)
|
||
|