用户画像含义
用户画像的含义用户画像(persona)的概念最早由交互设计之父Alan Cooper提出:“Personas are a concrete representation of target users.” 是指真实用户的虚拟代表,是建立在一系列属性数据之上的目标用户模型。随着互联网的发展,现在我们说的用户画像又包含了新的内涵 ——通常用户画像是根据用户人口学特征、网络浏览内容、网络社交活动和消费行为等信息而抽象出的一个标签化的用户模型。 构建用户画像的核心工作,主要是 利用存储在服务器上的海量日志和数据库里的大量数据进行分析和挖掘 ,给用户贴“标签”,而“标签”是能表示用户某一维度特征的标识。
通过一个demon 来实现简单的用户画像功能(识别用户的性别和年龄段)
数据源包括
1 app 客户群体信息,包含使用此app客户的年龄分布、性别比例等。
2 用户使用不同app的日志
通过用户的使用不同 app 的时间来推断用户的性别和年龄。
配置文件
UserDraw.properties 表示如何解析用户使用app 日志
#用户画像配置文件:#################################字段分隔符号Separator=\\|#日期Date=11#手机号MDN=0#appIDappID=15#计数count=1#使用时长ProcedureTime=12
UserDrawConfig.java
将properties 文件解析为 config 类
package com.river.userdraw;import cn.hutool.setting.Setting;import lombok.Data;import lombok.ToString;import java.io.Serializable;/** * @author riverfan * 必须序列化,不然就会出错 */@Data@ToStringpublic class UserDrawConfig implements Serializable { public String Separator; public Integer dateIndex ; public Integer mdnIndex ; public Integer appIDIndex ; public Integer countIndex ; public Long procedureTimeIndex ; public static UserDrawConfig getInstance(){ UserDrawConfig userDrawConfig = new UserDrawConfig(); Setting setting = new Setting("UserDraw.properties"); userDrawConfig.setSeparator(setting.get("Separator")); userDrawConfig.setDateIndex(Integer.valueOf(setting.get("Date"))); userDrawConfig.setMdnIndex(Integer.valueOf(setting.get("MDN"))); userDrawConfig.setAppIDIndex(Integer.valueOf(setting.get("appID"))); userDrawConfig.setCountIndex(Integer.valueOf(setting.get("count"))); userDrawConfig.setProcedureTimeIndex(Long.valueOf(setting.get("ProcedureTime"))); return userDrawConfig; } }
UserDrawVo.java
用户的画像信息(算法比较简单)
package com.river.userdraw;import lombok.Data;import lombok.NoArgsConstructor;import lombok.ToString;import java.io.Serializable;/** * 用户画像的结果 * * @author riverfan*/@Data@NoArgsConstructor@ToStringpublic class UserDrawVo implements Serializable { private String statTimeDay; private String MDN; private double male; private double female; private double age1; private double age2; private double age3; private double age4; private double age5; /** 性别融合 */ public void protraitSex(double male2, double female2, long times) { double sum = (this.male + this.female + (male2 + female2) * times); if(sum != 0){ this.male = (this.male + male2 * times) / sum; this.female = (this.female + female2 * times) / sum; } } /** 年龄段融合 */ public void protraitAge(double pAge1, double pAge2, double pAge3, double pAge4, double pAge5, long times) { double sum = (age1 + age2 + age3 + age4 + age5 ) + (pAge1 + pAge2 + pAge3 + pAge4 + pAge5 ) * times; if(sum != 0){ this.age1 = (pAge1 * times + age1) / sum; this.age2 = (pAge2 * times + age2) / sum; this.age3 = (pAge3 * times + age3) / sum; this.age4 = (pAge4 * times + age4) / sum; this.age5 = (pAge5 * times + age5) / sum; } } /** 初始化男女概率 */ public void initSex(float male, float female) { float sum = male + female; if(sum != 0){ this.male = male / sum; this.female = female / sum; } } /** 初始化年龄段概率 */ public void initAge(float pAge1, float pAge2, float pAge3, float pAge4, float pAge5) { float sum = pAge1 + pAge2 + pAge3 + pAge4 + pAge5; if(sum != 0){ this.age1 = pAge1 / sum; this.age2 = pAge2 / sum; this.age3 = pAge3 / sum; this.age4 = pAge4 / sum; this.age5 = pAge5 / sum; } } // 创建画像数据 public UserDrawVo(UserInfoVo userInfoVo, float male, float female, float age1, float age2, float age3, float age4, float age5) { this.statTimeDay = userInfoVo.getNow(); this.MDN = userInfoVo.getMobile(); this.age1 = age1; this.age2 = age2; this.age3 = age3; this.age4 = age4; this.age5 = age5; // 初始化 initAge(age1, age2, age3, age4, age5); initSex(male, female); } }
UserInfoVo.java
用户使用app的信息
package com.river.userdraw;import lombok.Data;import lombok.ToString;import java.io.Serializable;/** * @author riverfan */@Data@ToStringpublic class UserInfoVo implements Serializable { private String now; private String mobile; private String appId; private Long num; private Long userTime; }
AppUserWeightVo.java
app的用户信息(性别和年龄段)
package com.river.userdraw;import lombok.AllArgsConstructor;import lombok.Data;/** * @author riverfan */@Data@AllArgsConstructorpublic class AppUserWeightVo { private String appId; private String favourite; private Float male; private Float female; private Float age1; private Float age2; private Float age3; private Float age4; private Float age5; }
UserDrawDemon.java
main function
package com.river.userdraw;import com.google.common.base.Charsets;import com.google.common.base.Splitter;import com.google.common.collect.Maps;import com.google.common.io.Files;import org.apache.commons.lang.StringUtils;import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaSparkContext;import scala.Tuple2;import java.io.File;import java.io.IOException;import java.text.SimpleDateFormat;import java.util.*;import java.util.stream.Collectors;/** * @author riverfan */public class UserDrawDemon { private static final Splitter SPLITTER = Splitter.on('|'); public static void main(String[] args) throws IOException { UserDrawConfig userDrawConfig = UserDrawConfig.getInstance(); System.out.println(userDrawConfig); SparkConf conf = new SparkConf(); conf.setAppName("UserDrawDemon"); //设置master属性 conf.setMaster("local[2]"); JavaSparkContext sc = new JavaSparkContext(conf); String userdataFile = "/Users/riverfan/mytest/spark/userDraw/userdata.txt"; File file = new File("/Users/riverfan/mytest/spark/userDraw/appTab.txt"); //读取文件 内容比较少 所以直接读取了 Map<String, AppUserWeightVo> appMap = Files.readLines(file, Charsets.UTF_8) .stream() .map(t -> { List<String> list = SPLITTER.splitToList(t); return new AppUserWeightVo( list.get(0), list.get(1), Float.parseFloat(list.get(2)), Float.parseFloat(list.get(3)), Float.parseFloat(list.get(4)), Float.parseFloat(list.get(5)), Float.parseFloat(list.get(6)), Float.parseFloat(list.get(7)), Float.parseFloat(list.get(8))); }).collect(Collectors.toMap(AppUserWeightVo::getAppId, t -> t)); Map<String, UserDrawVo> userDrawMap = Maps.newHashMap(); sc.textFile(userdataFile) .map(t -> { List<String> list = SPLITTER.splitToList(t); String now = list.get(userDrawConfig.getDateIndex()); UserInfoVo userInfoVo = new UserInfoVo(); userInfoVo.setNow(new SimpleDateFormat("yyyyMMdd").format(Long.parseLong(now))); userInfoVo.setMobile(list.get(userDrawConfig.getMdnIndex())); userInfoVo.setAppId(list.get(userDrawConfig.getAppIDIndex())); userInfoVo.setNum(new Long(1)); userInfoVo.setUserTime(userDrawConfig.getProcedureTimeIndex()); return userInfoVo; })// MDN + appID .mapToPair(s -> new Tuple2<>(s.getMobile() + s.getAppId(), s)) .reduceByKey((v1, v2) -> { v1.setNum(v1.getNum() + v2.getNum()); v1.setUserTime(v1.getUserTime() + v2.getUserTime()); return v1; }).filter(s -> StringUtils.isNotBlank(s._2.getAppId())) .collectAsMap() .forEach((k, v) -> { //如果不包含 app id 就不统计 if (!appMap.containsKey(v.getAppId())) { return; } AppUserWeightVo appUserWeightVo = appMap.get(v.getAppId()); // 没有在统计的 app 里 也不统计 if (appUserWeightVo == null) { return; } //初次统计,需要初始化信息 if (!userDrawMap.containsKey(v.getMobile())) { userDrawMap.put(v.getMobile(), new UserDrawVo(v, appUserWeightVo.getMale(), appUserWeightVo.getFemale(), appUserWeightVo.getAge1(), appUserWeightVo.getAge2(), appUserWeightVo.getAge3(), appUserWeightVo.getAge4(), appUserWeightVo.getAge5())); return; } UserDrawVo userDrawVo = userDrawMap.get(v.getMobile()); // 性别权重 userDrawVo.protraitSex(appUserWeightVo.getMale(), appUserWeightVo.getFemale(), v.getNum()); // 年龄段权重 userDrawVo.protraitAge(appUserWeightVo.getAge1(), appUserWeightVo.getAge2(), appUserWeightVo.getAge3(), appUserWeightVo.getAge4(), appUserWeightVo.getAge5(), v.getNum()); }); // 遍历打印 结果 userDrawMap.forEach((k, v) -> System.out.println(k + " " + v + " " + Thread.currentThread())); } }
这里贴出部分数据
appTab.txt
10001|QQ|0.001|0.001|0|0.2|0.3|0.2|0.310002|飞信|0.001|0.001|0|0.2|0.3|0.2|0.310003|MSN|0.001|0.001|0|0.2|0.3|0.2|0.310004|阿里旺旺|0.001|0.001|0|0.2|0.3|0.2|0.310005|微信|0.001|0.001|0|0.2|0.3|0.2|0.310006|陌陌|0.001|0.001|0|0.2|0.3|0.2|0.310007|米聊|0.001|0.001|0|0.2|0.3|0.2|0.310008|啪啪|0.001|0.001|0|0.2|0.3|0.2|0.310009|飞聊|0.001|0.001|0|0.2|0.3|0.2|0.310010|来往|0.001|0.001|0|0.2|0.3|0.2|0.310011|连我|0.001|0.001|0|0.2|0.3|0.2|0.310012|有你|0.001|0.001|0|0.2|0.3|0.2|0.310013|Kakao Talk|0.001|0.001|0|0.2|0.3|0.2|0.310014|Whatsapp|0.001|0.001|0|0.2|0.3|0.2|0.310015|比邻|0.001|0.001|0|0.2|0.3|0.2|0.320016|新浪读书|0.001|0.001|0.1|0.3|0.3|0.2|0.120017|潇湘书院|0.001|0.001|0.1|0.3|0.3|0.2|0.120018|红袖添香|0.001|0.001|0.1|0.3|0.3|0.2|0.120019|纵横中文网|0.001|0.001|0.1|0.3|0.3|0.2|0.120020|掌上书院|0.001|0.001|0.1|0.3|0.3|0.2|0.120021|和阅读|0.001|0.001|0.1|0.3|0.3|0.2|0.120022|掌阅iReader|0.001|0.001|0.1|0.3|0.3|0.2|0.120023|QQ阅读|0.001|0.001|0.1|0.3|0.3|0.2|0.120024|百阅|0.001|0.001|0.1|0.3|0.3|0.2|0.120025|塔读小说|0.001|0.001|0.1|0.3|0.3|0.2|0.120026|Flipboard|0.001|0.001|0.1|0.3|0.3|0.2|0.120027|zaker|0.001|0.001|0.1|0.3|0.3|0.2|0.120028|viva畅读|0.001|0.001|0.1|0.3|0.3|0.2|0.120029|鲜果|0.001|0.001|0.1|0.3|0.3|0.2|0.120030|安卓读书|0.001|0.001|0.1|0.3|0.3|0.2|0.120031|云中书城|0.001|0.001|0.1|0.3|0.3|0.2|0.120032|百度多酷书城|0.001|0.001|0.1|0.3|0.3|0.2|0.120033|阅读星|0.001|0.001|0.1|0.3|0.3|0.2|0.1
userdata.txt
NXxY3tn5XsuFcyzEw8qP8g==|1|100.88.255.208|100.88.184.53|2152|2152|204409856|61650535|18320|81244939|CMNET.MNC002.MCC460.GPRS|1471017931598|23|1|33|330400|1|0|10.32.244.101||43785|0|111.20.242.48||80|724|3718|2|tb.himg.baidu.com|/sys/portraitn/item/e5047a6875313233343531322005||bdtb for Android 7.6.2|image/jpeg|||2912|0||||0|0|255|4006|40060044|500-1000|||2055|116.09607|43.93931|290||4|| +/rmMLtMV+s+gXTDoOaoxQ==|1|100.88.255.71|100.88.189.2|2152|2152|591531008|94392194|18332|102254743|CMNET.MNC002.MCC460.GPRS|1471017991701|0|1|22|220499|9|2|10.227.101.43||46881|0|221.179.178.55||80|941|0|2|api.k.sohu.com|/api/search/v6/hotwords.go?p1=NjA3ODAyMDQwNzQ2OTE5MTIyMw%3D%3D&gid=02ffff1106111119083a8e0ea593b8499ea08bb678c010&pid=-1&apiVersion=35||SohuNews/5.6.0 BuildCode/106||||0|0||||2|0|255|4004|40040016|500-1000|||2728|118.40116|44.72021|100||2|| pRxqXdxHty8oF2NGI/1tNg==|1|100.88.255.208|100.88.190.53|2152|2152|119344128|50899859|18330|82579980|CMNET.MNC002.MCC460.GPRS|1471017987354|80|1|05|050057|9|0|10.30.127.160||50041|0|112.17.1.185||80|642|736|2|pdata.video.iqiyi.com|/k||HCDNClient 10.1.1.22|text/html|||204|0||||0|0|255|4006|40060006|1000-2000|||438|115.98773|42.24422|||4|| H5pNI1xm1wt3Pyf9E9jY5A==|1|100.88.255.201|100.88.185.60|2152|2152|205720576|15470633|18322|81278732|cmnet.MNC002.MCC460.GPRS|1471017857513|115|1|||0|0|10.243.34.147||39828|0|120.55.238.158||80|1116|444|2|service.inke.com|/push/uploadreg?lc=3000000000006914&cv=IK3.0.10_Android&cc=TG36014&ua=OPPON5117&uid=93689652&sid=20SOWbfAka5AoaL5kOUFEM71i2KgOs5eQIIUvdvV3ZIri2lq364i3&devi=864181026876857&imsi=460023493374680&imei=864181026876857&icc=89860024057959334680&conn=3G&vv=1.0.3-2016060211417.android&aid=ea3e31e1e8ab9355&osversion=android_18&proto=4&smid=DurbqHt8FwJFZeL87THICCZ/nKLie60/XuxmCuf0hpN7ndKXFoEPPsVBm+xkH5KI/kL1ZTcEPnkj8Vud5EbIyyVA&type=1&cid=f7b9b9c6693f122591f2cad40c934ff3|||application/json; charset=UTF-8||aliyungf_tc=AQAAAIU2jCR4RwwA+A9o33sXrNFMz38r|56|0||||0|0|255|4006|40060022|1000-2000||||116.07379|43.93334|200||4|| TiasbUQxuUzZjXSFuW770w==|1|100.88.255.200|100.88.190.122|2152|2152|189795328|165711827|18330|102303641|CMNET.MNC007.MCC460.GPRS|1471017993459|3338|1|01|010005|5|0|10.108.49.172||55815|0|112.29.151.173||80|3877|44982|2|mmbiz.qpic.cn|/mmbiz/iabDCicNdsqqo3Xv0p35QHbRzeILGW2cpkBfylwspiaTQBRj5LLEyiaobibELKicIsEez2nKlTaaZhrNXUqIoCZdia5mA/640?wx_fmt=jpeg&tp=webp&wxfrom=5&wx_lazy=1||Mozilla/5.0 (Linux; Android 6.0.1; SM-G9280 Build/MMB29K) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.2 TBS/036555 Safari/537.36 MicroMessenger/6.3.23.840 NetType/cmnet Language/zh_CN|image/webp|http://mp.weixin.qq.com/s?__biz=MzA5MTU0NzU0Mg==&mid=2651333827&idx=1&sn=cd21c8aec05fd5bb224ce09ec0c7d829&scene=1&srcid=0811sEb||41944|0||||0|2|255|4002|40020166|4000-5000|||191|115.99189|42.23195|551||4||qxNdolFZp8i/nmaZPf+ZGw==|1|100.88.255.73|100.88.188.18|2152|2152|212405248|111460711|18322|81235991|CMNET.MNC002.MCC460.GPRS|1471017977218|33797|1|15|151740|8|0|10.229.97.204||48616|0|111.56.13.202||80|772|984|2|trace-ldns.ksyun.com|/getlocaldns||Dalvik/2.1.0 (Linux; U; Android 5.0.2; Letv X501 Build/DAXCNCU5501304131S)||||0|0||||0|0|255||||||5576|116.05452|43.93629|471||4|| GtfeQucYxTd0i6xIiFdw8w==|1|100.88.255.201|100.88.191.114|2152|2152|86510597|24685822|18331|102119319|CMNET.MNC002.MCC460.GPRS|1471017996695|101|1|01|010005|5|0|10.107.114.81||47650|0|111.30.135.151||8080|830|1589|2|dns.weixin.qq.com|/cgi-bin/micromsg-bin/newgetdns?uin=2740428538&clientversion=637736507&scene=0&net=2&md5=02d8691b08787fbbb9fd3ba88c887619&devicetype=android-21&lan=zh_CN&sigver=2||MicroMessenger Client||||1117|0||||0|0|255|4004|40040028|1000-2000|||195|116.38459|42.07039|220||4|| A0pF8zX4LzPPKdkGzCmc0Q==|1|100.88.255.208|100.88.189.238|2152|2152|43518976|81048915|18327|81350411|CMNET.MNC000.MCC460.GPRS|1471017987306|130|1|||2|0|10.145.75.52||33196|0|42.62.119.101||80|1221|451|2|l.fastapi.net|/imp?e=wk7iY42ajGFix6XD6L8UbArH_efTnuBbC38KNTiu7ItACJ3CV-8z3Y8y4qyaIh31PmM.4.69cf3a09&pid=1010231.98-25.1pof3pf.3c7i7.2.obt0qp.0180&sid=1010231||Mozilla/5.0 (Linux; U; Android 4.3; zh-cn; R6007 Build/JLS36C) AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 Mobile Safari/534.30 IqiyiApp/iqiyi IqiyiVersion/7.5.1|text/plain; charset=utf-8|http://static.atm.youku.com/idea/201511/1130/97900/640100.html|cid=1eb90739dbd965599ebb57f49c0cc352|0|0||||0|0|255|4006|40060028|1000-2000||||115.27138|41.89878|100||4||wApjYCPRGpDb2XPhiEzzlA==|1|100.88.255.209|100.88.189.106|2152|2152|228854784|150762283|18326|81297165|CMNET.MNC002.MCC460.GPRS|1471018008089|56|1|05|050054|1|0|10.33.156.190||57476|0|221.181.202.196||80|645|605|2|antileech.cde.letv.com|/time?timestamp=1471018003||SuperNode Downloader/0.9.83|text/html|||22|0||||0|0|255|4017|40170003|1000-2000|||424|117.60531|44.57987|250||4|| KGbfFXK0Hu6LIjhQjhkM0g==|1|100.88.255.201|100.88.187.176|2152|2152|240978944|37534175|18325|82640139|CMNET.MNC000.MCC460.GPRS|1471017996321|470|1|32|320291|0|0|10.244.24.42||52263|0|111.13.46.241||80|1247|12998|2|car3.autoimg.cn|/cardfs/product/g20/M10/2C/26/t_autohomecar__wKgFWVarOeqAb3XkAAa3cO-Bhh4412.jpg||Dalvik/1.6.0 (Linux; U; Android 4.4.2; CHM-TL00H Build/HonorCHM-TL00H)|image/jpeg|http://www.autohome.com.cn||11700|0||||0|0|255|4003|40030023|500-1000|||1358|118.65904|44.95267|100||3|||1|100.88.255.209|100.88.190.133|2152|2152|1182720|156768725|18326|102303129|CMNET.MNC000.MCC460.GPRS|1471017987255|186|1|06|060494|8|0|10.147.216.5||36772|0|61.236.251.38||80|1689|764|2|ws.stream.kg.qq.com|/vcloud1021.tc.qq.com/1021_d888552cc1104af7ae1a43d0e9a40153.f1110.m4a?vkey=017AFF7BC246EF6DC0B05AE8070E8B3D8F3E64F7F66CD6D5465529C4B4A3DB2BC52C0AC3DAA1A4C6&sha=98b29195f7ef2c7dc96f5464930f973e1b92215b&ocid=123456&fromtag=1407&sdtfrom=v1407||Mozilla/5.0 (Linux; Android 4.4.4; OPPO R7 Build/KTU84P) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/37.0.0.0 Mobile MQQBrowser/6.2 TBS/036555 Safari/537.36 MicroMessenger/6.3.22.821 NetType/cmnet Language/zh_CN||http://kg.qq.com/share.html?s=7eTJJwb1BRSZ&from=timeline&isappinstalled=0|network=1; ugc=0DdrQhhj21pm; downtime=3336; playtime=8; sd_user|0|0||||0|2|255|4006|40060029|1000-2000|||2718|117.60377|44.59532|||0||EshDtEikJHum4g1jv/Gl+Q==|1|100.88.255.209|100.88.187.109|2152|2152|207686656|128506337|18333|82631691|CMNET.MNC002.MCC460.GPRS|1471017977472|99|1|||3|0|10.146.16.116||57305|0|111.56.13.161||80|637|8204|2|tva1.sinaimg.cn|/crop.0.0.290.290.180/770170f2jw8edmz6ukhxcj208208274h.jpg||Weibo/763 CFNetwork/758.2.8 Darwin/15.0.0|image/jpeg|||6964|0||||0|0|255|4001|40010007|5000以上||||113.83938|42.24013|471||3|| gCTO9LWDJsWIpRw6J0ER6A==|1|100.88.255.199|100.88.188.49|2152|2152|63114240|52723851|18320|81259021|CMNET.MNC002.MCC460.GPRS|1471017876808|106|1|||0|0|10.244.12.72||41765|0|183.232.231.44||80|1344|886|2|loc.map.baidu.com|/sdk.php||Apache-HttpClient/UNAVAILABLE (java 1.4)|application/x-www-form-urlencoded; charset=utf-8|||583|0||||0|0|255|4168|41680001|500以内||||116.07533|43.97105|200||0|| EELOzFVL3oJ4hVS32YRFDw==|1|100.88.255.73|100.88.185.45|2152|2152|8850432|40398154|18322|81326604|CMNET.MNC002.MCC460.GPRS|1471017971808|3241|1|01|010006|0|0|10.230.21.130||48288|0|111.56.32.71||80|1156|7539|2|img.momocdn.com|/album/9B/42/9B42943D-C7EE-8E86-CB9D-93700699FFE820160607_S.jpg||MomoChat/6.11 Android/826 (vivo Y33; Android 5.0; Gapps 1; zh_CN; 27; vivo)|image/jpeg||SESSIONID=F83ABA4B-596B-31ED-288E-F564D7346107|6067|0||||0|0|255|4007|40070035|500-1000|||223|116.03031|43.91917|471||3|| SM+E6+k/6tXptSjOZHkOHg==|1|100.88.255.71|100.88.188.121|2152|2152|34737152|102766724|1
作者:良人与我
链接:https://www.jianshu.com/p/c776866eab9e