263 lines
5.3 KiB
Python
263 lines
5.3 KiB
Python
|
|
iflytek_labels={ # 中文标签对应的ID
|
|||
|
|
"打车":0,
|
|||
|
|
"美颜":100,
|
|||
|
|
"影像剪辑":101,
|
|||
|
|
"摄影修图":102,
|
|||
|
|
"相机":103,
|
|||
|
|
"绘画":104,
|
|||
|
|
"二手":105,
|
|||
|
|
"电商":106,
|
|||
|
|
"团购":107,
|
|||
|
|
"外卖":108,
|
|||
|
|
"电影票务":109,
|
|||
|
|
"社区服务":10,
|
|||
|
|
"社区超市":110,
|
|||
|
|
"购物咨询":111,
|
|||
|
|
"笔记":112,
|
|||
|
|
"办公":113,
|
|||
|
|
"日程管理":114,
|
|||
|
|
"女性":115,
|
|||
|
|
"经营":116,
|
|||
|
|
"收款":117,
|
|||
|
|
"其他":118,
|
|||
|
|
"薅羊毛":11,
|
|||
|
|
"魔幻":12,
|
|||
|
|
"仙侠":13,
|
|||
|
|
"卡牌":14,
|
|||
|
|
"飞行空战":15,
|
|||
|
|
"射击游戏":16,
|
|||
|
|
"休闲益智":17,
|
|||
|
|
"动作类":18,
|
|||
|
|
"体育竞技":19,
|
|||
|
|
"地图导航":1,
|
|||
|
|
"棋牌中心":20,
|
|||
|
|
"经营养成":21,
|
|||
|
|
"策略":22,
|
|||
|
|
"MOBA":23,
|
|||
|
|
"辅助工具":24,
|
|||
|
|
"约会社交":25,
|
|||
|
|
"即时通讯":26,
|
|||
|
|
"工作社交":27,
|
|||
|
|
"论坛圈子":28,
|
|||
|
|
"婚恋社交":29,
|
|||
|
|
"免费WIFI":2,
|
|||
|
|
"情侣社交":30,
|
|||
|
|
"社交工具":31,
|
|||
|
|
"生活社交":32,
|
|||
|
|
"微博博客":33,
|
|||
|
|
"新闻":34,
|
|||
|
|
"漫画":35,
|
|||
|
|
"小说":36,
|
|||
|
|
"技术":37,
|
|||
|
|
"教辅":38,
|
|||
|
|
"问答交流":39,
|
|||
|
|
"租车":3,
|
|||
|
|
"搞笑":40,
|
|||
|
|
"杂志":41,
|
|||
|
|
"百科":42,
|
|||
|
|
"影视娱乐":43,
|
|||
|
|
"求职":44,
|
|||
|
|
"兼职":45,
|
|||
|
|
"视频":46,
|
|||
|
|
"短视频":47,
|
|||
|
|
"音乐":48,
|
|||
|
|
"直播":49,
|
|||
|
|
"同城服务":4,
|
|||
|
|
"电台":50,
|
|||
|
|
"K歌":51,
|
|||
|
|
"成人":52,
|
|||
|
|
"中小学":53,
|
|||
|
|
"职考":54,
|
|||
|
|
"公务员":55,
|
|||
|
|
"英语":56,
|
|||
|
|
"视频教育":57,
|
|||
|
|
"高等教育":58,
|
|||
|
|
"成人教育":59,
|
|||
|
|
"快递物流":5,
|
|||
|
|
"艺术":60,
|
|||
|
|
"语言(非英语)":61,
|
|||
|
|
"旅游资讯":62,
|
|||
|
|
"综合预定":63,
|
|||
|
|
"民航":64,
|
|||
|
|
"铁路":65,
|
|||
|
|
"酒店":66,
|
|||
|
|
"行程管理":67,
|
|||
|
|
"民宿短租":68,
|
|||
|
|
"出国":69,
|
|||
|
|
"婚庆":6,
|
|||
|
|
"工具":70,
|
|||
|
|
"亲子儿童":71,
|
|||
|
|
"母婴":72,
|
|||
|
|
"驾校":73,
|
|||
|
|
"违章":74,
|
|||
|
|
"汽车咨询":75,
|
|||
|
|
"汽车交易":76,
|
|||
|
|
"日常养车":77,
|
|||
|
|
"行车辅助":78,
|
|||
|
|
"租房":79,
|
|||
|
|
"家政":7,
|
|||
|
|
"买房":80,
|
|||
|
|
"装修家居":81,
|
|||
|
|
"电子产品":82,
|
|||
|
|
"问诊挂号":83,
|
|||
|
|
"养生保健":84,
|
|||
|
|
"医疗服务":85,
|
|||
|
|
"减肥瘦身":86,
|
|||
|
|
"美妆美业":87,
|
|||
|
|
"菜谱":88,
|
|||
|
|
"餐饮店":89,
|
|||
|
|
"公共交通":8,
|
|||
|
|
"体育咨讯":90,
|
|||
|
|
"运动健身":91,
|
|||
|
|
"支付":92,
|
|||
|
|
"保险":93,
|
|||
|
|
"股票":94,
|
|||
|
|
"借贷":95,
|
|||
|
|
"理财":96,
|
|||
|
|
"彩票":97,
|
|||
|
|
"记账":98,
|
|||
|
|
"银行":99,
|
|||
|
|
"政务":9,
|
|||
|
|
}
|
|||
|
|
|
|||
|
|
print("1.length of iflytek_labels:",len(iflytek_labels))
|
|||
|
|
|
|||
|
|
label_des2tag={ # 将中文标签,对应到两个汉字,方便固定字符数的mask操作
|
|||
|
|
'银行':'银行',
|
|||
|
|
'社区服务':'社区',
|
|||
|
|
'电商':'电商',
|
|||
|
|
'支付':'支付',
|
|||
|
|
'经营养成':'养成',
|
|||
|
|
'卡牌':'卡牌',
|
|||
|
|
'借贷':'借贷',
|
|||
|
|
'驾校':'驾校',
|
|||
|
|
'理财':'理财',
|
|||
|
|
'职考':'职考',
|
|||
|
|
'新闻':'新闻',
|
|||
|
|
'旅游资讯':'旅游',
|
|||
|
|
'公共交通':'交通',
|
|||
|
|
'魔幻':'魔幻',
|
|||
|
|
'医疗服务':'医疗',
|
|||
|
|
'影像剪辑':'影像',
|
|||
|
|
'动作类':'动作',
|
|||
|
|
'工具':'工具',
|
|||
|
|
'体育竞技':'体育',
|
|||
|
|
'小说':'小说',
|
|||
|
|
'运动健身':'运动',
|
|||
|
|
'相机':'相机',
|
|||
|
|
'辅助工具':'辅助',
|
|||
|
|
'快递物流':'快递',
|
|||
|
|
'高等教育':'教育',
|
|||
|
|
'股票':'股票',
|
|||
|
|
'菜谱':'菜谱',
|
|||
|
|
'行车辅助':'行车',
|
|||
|
|
'仙侠':'仙侠',
|
|||
|
|
'亲子儿童':'亲子',
|
|||
|
|
'购物咨询':'购物',
|
|||
|
|
'射击游戏':'射击',
|
|||
|
|
'漫画':'漫画',
|
|||
|
|
'中小学':'小学',
|
|||
|
|
'同城服务':'同城',
|
|||
|
|
'成人教育':'成人',
|
|||
|
|
'求职':'求职',
|
|||
|
|
'电子产品':'电子',
|
|||
|
|
'艺术':'艺术',
|
|||
|
|
'薅羊毛':'赚钱',
|
|||
|
|
'约会社交':'约会',
|
|||
|
|
'经营':'经营',
|
|||
|
|
'兼职':'兼职',
|
|||
|
|
'短视频':'短视',
|
|||
|
|
'音乐':'音乐',
|
|||
|
|
'英语':'英语',
|
|||
|
|
'棋牌中心':'棋牌',
|
|||
|
|
'摄影修图':'摄影',
|
|||
|
|
'养生保健':'养生',
|
|||
|
|
'办公':'办公',
|
|||
|
|
'政务':'政务',
|
|||
|
|
'视频':'视频',
|
|||
|
|
'论坛圈子':'论坛',
|
|||
|
|
'彩票':'彩票',
|
|||
|
|
'直播':'直播',
|
|||
|
|
'其他':'其他',
|
|||
|
|
'休闲益智':'休闲',
|
|||
|
|
'策略':'策略',
|
|||
|
|
'即时通讯':'通讯',
|
|||
|
|
'汽车交易':'买车',
|
|||
|
|
'违章':'违章',
|
|||
|
|
'地图导航':'地图',
|
|||
|
|
'民航':'民航',
|
|||
|
|
'电台':'电台',
|
|||
|
|
'语言(非英语)':'语言',
|
|||
|
|
'搞笑':'搞笑',
|
|||
|
|
'婚恋社交':'婚恋',
|
|||
|
|
'社区超市':'超市',
|
|||
|
|
'日常养车':'养车',
|
|||
|
|
'杂志':'杂志',
|
|||
|
|
'视频教育':'在线',
|
|||
|
|
'家政':'家政',
|
|||
|
|
'影视娱乐':'影视',
|
|||
|
|
'装修家居':'装修',
|
|||
|
|
'体育咨讯':'资讯',
|
|||
|
|
'社交工具':'社交',
|
|||
|
|
'餐饮店':'餐饮',
|
|||
|
|
'美颜':'美颜',
|
|||
|
|
'问诊挂号':'挂号',
|
|||
|
|
'飞行空战':'飞行',
|
|||
|
|
'综合预定':'预定',
|
|||
|
|
'电影票务':'票务',
|
|||
|
|
'笔记':'笔记',
|
|||
|
|
'买房':'买房',
|
|||
|
|
'外卖':'外卖',
|
|||
|
|
'母婴':'母婴',
|
|||
|
|
'打车':'打车',
|
|||
|
|
'情侣社交':'情侣',
|
|||
|
|
'日程管理':'日程',
|
|||
|
|
'租车':'租车',
|
|||
|
|
'微博博客':'博客',
|
|||
|
|
'百科':'百科',
|
|||
|
|
'绘画':'绘画',
|
|||
|
|
'铁路':'铁路',
|
|||
|
|
'生活社交':'生活',
|
|||
|
|
'租房':'租房',
|
|||
|
|
'酒店':'酒店',
|
|||
|
|
'保险':'保险',
|
|||
|
|
'问答交流':'问答',
|
|||
|
|
'收款':'收款',
|
|||
|
|
'MOBA':'竞技',
|
|||
|
|
'K歌':'唱歌',
|
|||
|
|
'技术':'技术',
|
|||
|
|
'减肥瘦身':'减肥',
|
|||
|
|
'工作社交':'工作',
|
|||
|
|
'团购':'团购',
|
|||
|
|
'记账':'记账',
|
|||
|
|
'女性':'女性',
|
|||
|
|
'公务员':'公务',
|
|||
|
|
'二手':'二手',
|
|||
|
|
'美妆美业':'美妆',
|
|||
|
|
'汽车咨询':'汽车',
|
|||
|
|
'行程管理':'行程',
|
|||
|
|
'免费WIFI':'免费',
|
|||
|
|
'教辅':'教辅',
|
|||
|
|
'成人':'两性',
|
|||
|
|
'出国':'出国',
|
|||
|
|
'婚庆':'婚庆',
|
|||
|
|
'民宿短租':'民宿'}
|
|||
|
|
|
|||
|
|
# label_des2tag={'银行':'银行','社区服务':'社区','电商':'电商'}
|
|||
|
|
# iflytek_labels={"打车":0,"美颜":100,"影像剪辑":101,"摄影修图":102}
|
|||
|
|
|
|||
|
|
|
|||
|
|
label_des2tag_reverse={v:k for k,v in label_des2tag.items()}
|
|||
|
|
print("length of :",len(label_des2tag),";length of label_des2tag_reverse:",len(label_des2tag_reverse))
|
|||
|
|
|
|||
|
|
#print("2.length of label_des2tag:",len(label_des2tag))
|
|||
|
|
|
|||
|
|
# 得到两个字的标签的列表
|
|||
|
|
label_twoword_list=[v for k,v in label_des2tag.items()] # O.K.
|
|||
|
|
print("label_twoword_list:\n",label_twoword_list)
|
|||
|
|
|
|||
|
|
# 得到两个字的标签的列表对应的常规标签和对应的序列信息
|
|||
|
|
label2seqId_dict={label_des2tag_reverse[element]:seq_id for seq_id,element in enumerate(label_twoword_list)}
|
|||
|
|
print("label2seqId_dict:\n",label2seqId_dict)
|
|||
|
|
|