以下微博内容匹配相关内容,参考自:
http://www.jikexueyuan.com/course/1540.html
http://blog.csdn.net/a62321780/article/details/52824295
最近看了一个极客视频和几篇博客。有提到微博内容匹配的。
其中,匹配规则用的是下面这种正则方法
String ALL = "(" + AT + ")" + "|" + "(" + TOPIC + ")" + "|" + "(" + URL + ")" + "|" + "(" + EMOJI + ")";
SpannableString spannableString = new SpannableString(source);
Pattern pattern = Pattern.compile(ALL);
Matcher matcher = pattern.matcher(source);
我之前没用过这种方法。这里,针对视频、博客中的方法和我自己的方法做个对比。
注:匹配规则用的视频和博客中提到的。模拟请求网络数据加载的地方,视频和博客中没有提到,因为不知道微博源码,我只能自己想办法模拟加载到网络数据。我自认为没有微博开发大神的境界和技术。所以,以下代码,数据加载和展示,是自己想办法实现的,不代表真正的微博实现方法。因为是自己想的方法,所以,结果仅供参考!!!
首先,我先说一下实际情况(需求):给出一段文字(后台返回的),匹配其中的话题、网址链接、表情,甚至一些股票代码,如:000001代表平安银行。
因为数据是用户编辑后发出来的,很多是不确定的。如:股票代码。唯一能通用的匹配规则,就是表情、网址链接,已经话题(因为话题是#…#这样的格式)
代码如下,我这里就不多做解释了,注释里有。最后有测试结果
KeyBean
package com.chen.demo;
public class KeyBean {
//关键字
private String key;
//关键字的内容。用户看不到的,如:股票代码对应的股票网页url
private String content;
public String getKey() {
return key;
}
public void setKey(String key) {
this.key = key;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
}
CHEN
package com.chen.demo;
import android.content.Context;
public class CHEN {
/**
* 网址要被替换成的文字
*/
public static String REPLACEMENT_STRING = "#点击链接";
/**
* 匹配表情
*/
public static String emojiRegex = "\\[\\\\[\u4e00-\u9fa5\\w]+\\]";
/**
* 匹配话题(#.....#)
*/
public static String topicRegex = "#[\u4e00-\u9fa5\\w]+#";
/**
* 匹配网址的正则表达式。有http://、https://、ftp://这3中开头的
*/
public static String urlRegex = "((http[s]{0,1}|ftp)://[a-zA-Z0-9\\.\\-]+\\.([a-zA-Z]{2,4})(:\\d+)?(/[a-zA-Z0-9\\.\\-~!@#$%^&*+?:_/=<>,]*)?)|(www.[a-zA-Z0-9\\.\\-]+\\.([a-zA-Z]{2,4})(:\\d+)?(/[a-zA-Z0-9\\.\\-~!@#$%^&*+?:_/=<>,]*)?)|([a-zA-Z0-9\\.\\-]+\\.([a-zA-Z]{2,4})(:\\d+)?(/[a-zA-Z0-9\\.\\-~!@#$%^&*+?:_/=<>,]*)?)";
public static int sp2px(float spValue, Context context) {
final float fontScale = context.getResources().getDisplayMetrics().scaledDensity;
return (int) (spValue * fontScale + 0.5f);
}
/**
* 基础链接,匹配链接、表情、话题
*/
public static String Regex = "(" + REPLACEMENT_STRING + ")|(" + emojiRegex + ")|(" + topicRegex + ")";
}
MyTextView
package com.chen.demo;
import android.content.Context;
import android.graphics.drawable.Drawable;
import android.support.annotation.Nullable;
import android.text.Spannable;
import android.text.SpannableString;
import android.text.Spanned;
import android.text.TextPaint;
import android.text.TextUtils;
import android.text.method.LinkMovementMethod;
import android.text.style.ClickableSpan;
import android.text.style.ImageSpan;
import android.util.AttributeSet;
import android.util.Log;
import android.view.View;
import android.widget.TextView;
import android.widget.Toast;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MyTextView extends TextView {
private Context context = null;
private ArrayList<String> urlList;
public MyTextView(Context context) {
super(context);
this.context = context;
}
public MyTextView(Context context, @Nullable AttributeSet attrs) {
super(context, attrs);
this.context = context;
}
public MyTextView(Context context, @Nullable AttributeSet attrs, int defStyleAttr) {
super(context, attrs, defStyleAttr);
this.context = context;
}
public void showContent(String str, HashMap<String, HashMap<String, String>> map, String regexStr) {
Log.e("MyTextView","showContent");
long startTime=System.currentTimeMillis();
//话题的map
final HashMap<String, String> topicMap = map.get("topic");
//关键字的map
final HashMap<String, String> keyMap = map.get("key");
Log.e("regexStr", regexStr);
String[] ss = regexStr.split("\\|");
int count = ss.length;
Log.e("count", count + "");
String content = str;
//处理匹配的url
Pattern p = Pattern.compile(CHEN.urlRegex);
Matcher m = p.matcher(content);
urlList = new ArrayList<String>();
while (m.find()) {
String urlStr = m.group();
if (urlStr.contains("http://") || urlStr.contains("ftp://")) {
//如果末尾有英文逗号或者中文逗号等,就去掉
while (urlStr.endsWith(",") || urlStr.endsWith(",") || urlStr.endsWith(".") || urlStr.endsWith("。") || urlStr.endsWith(";") || urlStr.endsWith(";") || urlStr.endsWith("!") || urlStr.endsWith("!") || urlStr.endsWith("?") || urlStr.endsWith("?")) {
urlStr = urlStr.substring(0, urlStr.length() - 1);
}
urlList.add(urlStr);
content = content.replace(urlStr, CHEN.REPLACEMENT_STRING);
}
}
//到此,文本中的链接,就都被替换成了“*点击链接”这样的文字
//讲转换后
SpannableString spannableString = new SpannableString(content);
Pattern regex = Pattern.compile(regexStr);
Matcher matcher = regex.matcher(spannableString);
if (matcher.find()) {
setMovementMethod(LinkMovementMethod.getInstance());
matcher.reset();
}
while (matcher.find()) {
final String url = matcher.group(1);
final String emoji = matcher.group(2);
final String topic = matcher.group(3);
//链接处理
if (url != null) {
int start = matcher.start(1);
spannableString.setSpan(new ClickableSpan() {
@Override
public void updateDrawState(TextPaint ds) {
ds.setColor(0xff2097D9);
ds.setUnderlineText(false);
}
@Override
public void onClick(View widget) {
Toast.makeText(context, url, Toast.LENGTH_SHORT).show();
}
}, start, start + CHEN.REPLACEMENT_STRING.length(),
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
try {
Drawable drawable = context.getResources().getDrawable(R.mipmap.web_link);
drawable.setBounds(0, 0, CHEN.sp2px(25, context), CHEN.sp2px(25, context));
ImageSpan imgSpan = new ImageSpan(drawable);
spannableString.setSpan(imgSpan, start,
start + 1, Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
} catch (Exception e) {
//异常以后,就不加小图片了
}
}
//表情处理
if (emoji != null) {
int start = matcher.start(2);
Log.e("emoji", emoji);
Drawable drawable = context.getResources().getDrawable(R.mipmap.emoji_weixiao);
drawable.setBounds(0, 0, CHEN.sp2px(25, context), CHEN.sp2px(25, context));
ImageSpan imgSpan = new ImageSpan(drawable);
spannableString.setSpan(imgSpan, start,
start + emoji.length(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
//处理话题
if (topic != null) {
int start = matcher.start(3);
Log.e("topic", topic);
spannableString.setSpan(new ClickableSpan() {
@Override
public void updateDrawState(TextPaint ds) {
super.updateDrawState(ds);
ds.setColor(0xff2097D9);
ds.setUnderlineText(false);
}
@Override
public void onClick(View widget) {
Toast.makeText(context, topicMap.get(topic), Toast.LENGTH_SHORT).show();
}
}, start, start + topic.length(),
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
}
if (count > 3) {
//除了默认的,还有其他新的,需要匹配的
for (int i = 4; i <= count; i++) {
//额外的数据
final String extraData = matcher.group(i);
if (extraData != null) {
int start = matcher.start(i);
Log.e("extraData==" + i, extraData);
spannableString.setSpan(new ClickableSpan() {
@Override
public void updateDrawState(TextPaint ds) {
super.updateDrawState(ds);
ds.setColor(0xff2097D9);
ds.setUnderlineText(false);
}
@Override
public void onClick(View widget) {
Toast.makeText(context, keyMap.get(extraData), Toast.LENGTH_SHORT).show();
}
}, start, start + extraData.length(),
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
}
}
}
}
setText(spannableString);
Log.e("showContent use time",System.currentTimeMillis()-startTime+"");
}
public void showData(String str, ArrayList<KeyBean> keyBeanList) {
Log.e("MyTextView","showData");
long startTime=System.currentTimeMillis();
String content = str;
//处理匹配的url
Pattern p = Pattern.compile(CHEN.urlRegex);
Matcher m = p.matcher(content);
ArrayList<String> urlList = new ArrayList<String>();
while (m.find()) {
String urlStr = m.group();
if (urlStr.contains("http://") || urlStr.contains("ftp://")) {
//如果末尾有英文逗号或者中文逗号等,就去掉
while (urlStr.endsWith(",") || urlStr.endsWith(",") || urlStr.endsWith(".") || urlStr.endsWith("。") || urlStr.endsWith(";") || urlStr.endsWith(";") || urlStr.endsWith("!") || urlStr.endsWith("!") || urlStr.endsWith("?") || urlStr.endsWith("?")) {
urlStr = urlStr.substring(0, urlStr.length() - 1);
}
urlList.add(urlStr);
content = content.replace(urlStr, CHEN.REPLACEMENT_STRING);
}
}
SpannableString spannableString = new SpannableString(content);
//处理表情相关
String emoji_string = "\\[(.+?)\\]";
Pattern emoji_patten = Pattern.compile(emoji_string);
Matcher matcher = emoji_patten.matcher(content);
while (matcher.find()) {
Drawable drawable = context.getResources().getDrawable(R.mipmap.emoji_weixiao);
drawable.setBounds(0, 0, CHEN.sp2px(25, context), CHEN.sp2px(25, context));
ImageSpan imgSpan = new ImageSpan(drawable);
spannableString.setSpan(imgSpan, matcher.start(),
matcher.end(), Spannable.SPAN_EXCLUSIVE_EXCLUSIVE);
}
//表情相关处理结束
content = spannableString.toString();
//处理链接
if (urlList.size() > 0) {
int urlStartNew = 0;
int urlStartOld = 0;
String urlTemp = content;
for (int i = 0; i < urlList.size(); i++) {
final String regexUrl = urlList.get(i);
spannableString.setSpan(new ClickableSpan() {
@Override
public void updateDrawState(TextPaint ds) {
// TODO Auto-generated method stub
super.updateDrawState(ds);
ds.setColor(0xff2097D9);
ds.setUnderlineText(false);
}
@Override
public void onClick(View widget) {
Toast.makeText(context, regexUrl, Toast.LENGTH_SHORT).show();
}
}, urlStartOld + urlTemp.indexOf(CHEN.REPLACEMENT_STRING), urlStartOld + urlTemp.indexOf(CHEN.REPLACEMENT_STRING) + CHEN.REPLACEMENT_STRING.length(),
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
try {
//“点击链接”前面的回形针图片。大小可自己调整
Drawable drawable = context.getResources().getDrawable(R.mipmap.web_link);
drawable.setBounds(0, 0, CHEN.sp2px(25, context), CHEN.sp2px(25, context));
// ImageSpan imgSpan = new ImageSpan(drawable);
spannableString.setSpan(new ImageSpan(drawable), urlStartOld + urlTemp.indexOf(CHEN.REPLACEMENT_STRING), urlStartOld + urlTemp.indexOf(CHEN.REPLACEMENT_STRING) + 1, Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
} catch (Exception e) {
//异常以后,就不加小图片了
}
setMovementMethod(LinkMovementMethod.getInstance());
urlStartNew = urlTemp.indexOf(CHEN.REPLACEMENT_STRING) + CHEN.REPLACEMENT_STRING.length();
urlStartOld += urlStartNew;
urlTemp = urlTemp.substring(urlStartNew);
}
}
//处理关键字
if (keyBeanList != null) {
for (int i = 0; i < keyBeanList.size(); i++) {
final String data = keyBeanList.get(i).getKey();
final String beanContent=keyBeanList.get(i).getContent();
String temp = content;
int startNew = 0;
int startOld = 0;
if (temp.contains(data)) {
while (temp.contains(data)) {
spannableString.setSpan(new ClickableSpan() {
@Override
public void updateDrawState(TextPaint ds) {
super.updateDrawState(ds);
ds.setColor(0xff2097D9);
ds.setUnderlineText(false);
}
@Override
public void onClick(View widget) {
Toast.makeText(context, beanContent, Toast.LENGTH_SHORT).show();
}
}, startOld + temp.indexOf(data), startOld + temp.indexOf(data) + data.length(),
Spanned.SPAN_EXCLUSIVE_EXCLUSIVE);
setMovementMethod(LinkMovementMethod.getInstance());
startNew = temp.indexOf(data) + data.length();
startOld += startNew;
temp = temp.substring(startNew);
}
}
}
}
setText(spannableString);
Log.e("showData use time",System.currentTimeMillis()-startTime+"");
}
}
activity_main
<?xml version="1.0" encoding="utf-8"?>
<RelativeLayout
xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="match_parent"
>
<com.chen.demo.MyTextView
android:id="@+id/tv"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_alignParentBottom="true"
android:layout_centerHorizontal="true"
android:layout_marginBottom="30dp"
android:background="#55ff0000"
android:padding="5dp"
android:textSize="25sp"/>
</RelativeLayout>
MainActivity
package com.chen.demo;
import android.app.Activity;
import android.os.Bundle;
import java.util.ArrayList;
import java.util.HashMap;
public class MainActivity extends Activity {
private MyTextView tv;
private HashMap<String, HashMap<String, String>> mapMap;
private HashMap<String, String> topicMap;
private HashMap<String, String> keyMap;
private ArrayList<KeyBean> keyBeanList;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
tv = findViewById(R.id.tv);
//方法1开始
mapMap = new HashMap<>();
String str = "哈平安银行哈http://www.baidu.com哈万科哈[\\微笑]哈平安银行哈#明星##游戏#哈万科哈http://www.jikexueyuan.com/哈哈[\\微笑]哈哈#明星#";
//话题数据准备。模拟后台返回,解析后封装到这里面
topicMap = new HashMap<>();
topicMap.put("#明星#", "去到明星话题");
topicMap.put("#游戏#", "去到游戏话题");
mapMap.put("topic", topicMap);
//关键字数据准备
keyMap = new HashMap<>();
keyMap.put("平安银行", "000001");
keyMap.put("万科", "000002");
mapMap.put("key", keyMap);
//平安银行和万科,是用户输入,然后后台返回的。因为是用户输入,具有不确定性,为了匹配他们,需要新建。
String regex = CHEN.Regex + "|(" + "平安银行" + ")|(" + "万科" + ")";
tv.showContent(str, mapMap, regex);
//以上是方法1
//方法2开始
keyBeanList = new ArrayList<>();
KeyBean keyBean_1 = new KeyBean();
keyBean_1.setKey("#明星#");
keyBean_1.setContent("去到明星话题");
keyBeanList.add(keyBean_1);
KeyBean keyBean_2 = new KeyBean();
keyBean_2.setKey("#游戏#");
keyBean_2.setContent("去到游戏话题");
keyBeanList.add(keyBean_2);
KeyBean keyBean_3 = new KeyBean();
keyBean_3.setKey("平安银行");
keyBean_3.setContent("000001");
keyBeanList.add(keyBean_3);
KeyBean keyBean_4 = new KeyBean();
keyBean_4.setKey("万科");
keyBean_4.setContent("000002");
keyBeanList.add(keyBean_4);
// tv.showData(str, keyBeanList);
//以上是方法2
}
}
说明:
1、文字中出现的表情和网页回形针小图片,需要把25sp转换,是因为文字大小是25sp的。要和文字大小对应
2、MyTextView中,showContent方法对应的是微博内容匹配的正则方法,showData用的是我之前一直用的方法。
3、MainActivity中,HashMap和ArrayList对数据的封装,都是模拟从后台拿到需要匹配的数据后进行的封装。还是上面提到的,我不知道微博源码,只能用自己想到的方法
4、如果复制了上面的代码运行起来,会发现,showContent展示数据后,点击“点击链接”关键字,不会提示网址内容,但是showData方法可以。这是因为,我没有对showContent情况下的链接点击做额外处理。因为,到目前为止,我不建议用第一种方法的方式处理链接的匹配。理由是,用户发了链接,在手机上真正展示的时候,都会变成“点击链接”这样的文字。每个链接展示的样式是一样的,当用户在一个文本中发了多条链接后,通过while循环,没办法给对应的位置加对应的链接内容。用第二种方法(showData中的)处理,for循环下,可以给每个区域加自己对应的点击事件。
5、我把上面Mainactivity中的展示内容,复制粘贴了9次,即,用10倍数据测试的。测试结果:(以下结果,是用上面的代码测出来的,每次的时间,都是后台清理APP,杀死进程后,重新启动的情况下得到的时间)
测试手机:NEM-TL00H,EMUI系统4.1.2,Android版本6.0
10-30 15:16:46.497 14863-14863/com.chen.demo E/showContent use time: 13
10-30 15:17:06.484 15277-15277/com.chen.demo E/showContent use time: 13
10-30 15:17:19.642 15428-15428/? E/showContent use time: 13
10-30 15:17:43.331 15628-15628/? E/showContent use time: 13
10-30 15:17:58.190 15789-15789/? E/showContent use time: 13
10-30 15:18:35.988 16218-16218/com.chen.demo E/showData use time: 11
10-30 15:18:53.825 16397-16397/? E/showData use time: 9
10-30 15:19:14.607 16580-16580/? E/showData use time: 9
10-30 15:19:26.864 16728-16728/? E/showData use time: 10
10-30 15:19:38.076 16867-16867/? E/showData use time: 10