微博短链接算法php版本实现代码
作者:
将长网址md5生成32位签名串,分为4段, 每段8个字节
思路:
1)将长网址md5生成32位签名串,分为4段, 每段8个字节;
2)对这四段循环处理, 取8个字节, 将他看成16进制串与0x3fffffff(30位1)与操作, 即超过30位的忽略处理;
3)这30位分成6段, 每5位的数字作为字母表的索引取得特定字符, 依次进行获得6位字符串;
4)总的md5串可以获得4个6位串; 取里面的任意一个就可作为这个长url的短url地址;
下面是PHP代码:
function shorturl($url='', $prefix='', $suffix='') {
$base = array (
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
'y', 'z', '0', '1', '2', '3', '4', '5');
$hex = md5($prefix.$url.$suffix);
$hexLen = strlen($hex);
$subHexLen = $hexLen / 8;
$output = array();
for ($i = 0; $i < $subHexLen; $i++) {
$subHex = substr ($hex, $i * 8, 8);
$int = 0x3FFFFFFF & (1 * ('0x'.$subHex));
$out = '';
for ($j = 0; $j < 6; $j++) {
$val = 0x0000001F & $int;
$out .= $base[$val];
$int = $int >> 5;
}
$output[] = $out;
}
return $output;
}
$urls = shorturl('https://www.jb51.net/');
var_dump($urls);
结果
array(4) {
[0]=>
string(6) "alms1l"
[1]=>
string(6) "2ipmby"
[2]=>
string(6) "avo1hu"
[3]=>
string(6) "fdlban"
}
另外一个版本:
function shorturl($url='', $prefix='', $suffix='') {
$base = array(
"a","b","c","d","e","f","g","h",
"i","j","k","l","m","n","o","p",
"q","r","s","t","u","v","w","x",
"y","z","0","1","2","3","4","5",
"6","7","8","9","A","B","C","D",
"E","F","G","H","I","J","K","L",
"M","N","O","P","Q","R","S","T",
"U","V","W","X","Y","Z");
$hex = md5($prefix.$url.$suffix);
$hexLen = strlen($hex);
$subHexLen = $hexLen / 8;
$output = array();
for ($i = 0; $i < $subHexLen; $i++) {
$subHex = substr ($hex, $i * 8, 8);
$int = 0x3FFFFFFF & (1 * ('0x'.$subHex));
$out = '';
for ($j = 0; $j < 6; $j++) {
$val = 0x0000003D & $int;
$out .= $base[$val];
$int = $int >> 5;
}
$output[] = $out;
}
return $output;
}
结果:
array(4) {
[0] =>
string(6) "6jmMVj"
[1] =>
string(6) "2EnIby"
[2] =>
string(6) "6vIVfu"
[3] =>
string(6) "B7Fb6n"
}
但是升级版本碰撞率反而更高了,不知道为什么。
测试碰撞的测试代码:
$result = array();
$repeats= array();
$loop = 20000;
for($i=0;$i<$loop;$i++){
$url = 'https://www.jb51.net/?id='.$i;
$shorta = shorturl($url);
$short = $shorta[0];
if(in_array($short, $result)){
$repeats[] = $short;
}
$result[] = $short;
}
$result = array();
for($i=0;$i<$loop;$i++){
$url = 'https://www.jb51.net/?id='.$i;
$shorta = shorturl($url);
$short = $shorta[0];
if(in_array($short, $repeats)){
$result[$short][] = $url;
}
}
var_dump($repeats);
var_dump($result);
结果:
array(8) {
[0] =>
string(6) "3eQBzq"
[1] =>
string(6) "uQFnay"
[2] =>
string(6) "qEZbIv"
[3] =>
string(6) "fMneYf"
[4] =>
string(6) "FJj6Fr"
[5] =>
string(6) "3Eviym"
[6] =>
string(6) "j2mmuy"
[7] =>
string(6) "jyQfIv"
}
array(8) {
'jyQfIv' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=1640"
[1] =>
string(27) "https://www.jb51.net/?id=18661"
}
'fMneYf' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=2072"
[1] =>
string(26) "https://www.jb51.net/?id=8480"
}
'3eQBzq' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=4145"
[1] =>
string(26) "https://www.jb51.net/?id=4273"
}
'j2mmuy' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=7131"
[1] =>
string(27) "https://www.jb51.net/?id=17898"
}
'qEZbIv' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=7320"
[1] =>
string(26) "https://www.jb51.net/?id=8134"
}
'uQFnay' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=7347"
[1] =>
string(26) "https://www.jb51.net/?id=7962"
}
'FJj6Fr' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=8628"
[1] =>
string(26) "https://www.jb51.net/?id=9031"
}
'3Eviym' =>
array(2) {
[0] =>
string(27) "https://www.jb51.net/?id=11175"
[1] =>
string(27) "https://www.jb51.net/?id=14437"
}
}
1)将长网址md5生成32位签名串,分为4段, 每段8个字节;
2)对这四段循环处理, 取8个字节, 将他看成16进制串与0x3fffffff(30位1)与操作, 即超过30位的忽略处理;
3)这30位分成6段, 每5位的数字作为字母表的索引取得特定字符, 依次进行获得6位字符串;
4)总的md5串可以获得4个6位串; 取里面的任意一个就可作为这个长url的短url地址;
下面是PHP代码:
复制代码 代码如下:
function shorturl($url='', $prefix='', $suffix='') {
$base = array (
'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h',
'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p',
'q', 'r', 's', 't', 'u', 'v', 'w', 'x',
'y', 'z', '0', '1', '2', '3', '4', '5');
$hex = md5($prefix.$url.$suffix);
$hexLen = strlen($hex);
$subHexLen = $hexLen / 8;
$output = array();
for ($i = 0; $i < $subHexLen; $i++) {
$subHex = substr ($hex, $i * 8, 8);
$int = 0x3FFFFFFF & (1 * ('0x'.$subHex));
$out = '';
for ($j = 0; $j < 6; $j++) {
$val = 0x0000001F & $int;
$out .= $base[$val];
$int = $int >> 5;
}
$output[] = $out;
}
return $output;
}
$urls = shorturl('https://www.jb51.net/');
var_dump($urls);
结果
复制代码 代码如下:
array(4) {
[0]=>
string(6) "alms1l"
[1]=>
string(6) "2ipmby"
[2]=>
string(6) "avo1hu"
[3]=>
string(6) "fdlban"
}
另外一个版本:
复制代码 代码如下:
function shorturl($url='', $prefix='', $suffix='') {
$base = array(
"a","b","c","d","e","f","g","h",
"i","j","k","l","m","n","o","p",
"q","r","s","t","u","v","w","x",
"y","z","0","1","2","3","4","5",
"6","7","8","9","A","B","C","D",
"E","F","G","H","I","J","K","L",
"M","N","O","P","Q","R","S","T",
"U","V","W","X","Y","Z");
$hex = md5($prefix.$url.$suffix);
$hexLen = strlen($hex);
$subHexLen = $hexLen / 8;
$output = array();
for ($i = 0; $i < $subHexLen; $i++) {
$subHex = substr ($hex, $i * 8, 8);
$int = 0x3FFFFFFF & (1 * ('0x'.$subHex));
$out = '';
for ($j = 0; $j < 6; $j++) {
$val = 0x0000003D & $int;
$out .= $base[$val];
$int = $int >> 5;
}
$output[] = $out;
}
return $output;
}
结果:
复制代码 代码如下:
array(4) {
[0] =>
string(6) "6jmMVj"
[1] =>
string(6) "2EnIby"
[2] =>
string(6) "6vIVfu"
[3] =>
string(6) "B7Fb6n"
}
但是升级版本碰撞率反而更高了,不知道为什么。
测试碰撞的测试代码:
复制代码 代码如下:
$result = array();
$repeats= array();
$loop = 20000;
for($i=0;$i<$loop;$i++){
$url = 'https://www.jb51.net/?id='.$i;
$shorta = shorturl($url);
$short = $shorta[0];
if(in_array($short, $result)){
$repeats[] = $short;
}
$result[] = $short;
}
$result = array();
for($i=0;$i<$loop;$i++){
$url = 'https://www.jb51.net/?id='.$i;
$shorta = shorturl($url);
$short = $shorta[0];
if(in_array($short, $repeats)){
$result[$short][] = $url;
}
}
var_dump($repeats);
var_dump($result);
结果:
复制代码 代码如下:
array(8) {
[0] =>
string(6) "3eQBzq"
[1] =>
string(6) "uQFnay"
[2] =>
string(6) "qEZbIv"
[3] =>
string(6) "fMneYf"
[4] =>
string(6) "FJj6Fr"
[5] =>
string(6) "3Eviym"
[6] =>
string(6) "j2mmuy"
[7] =>
string(6) "jyQfIv"
}
array(8) {
'jyQfIv' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=1640"
[1] =>
string(27) "https://www.jb51.net/?id=18661"
}
'fMneYf' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=2072"
[1] =>
string(26) "https://www.jb51.net/?id=8480"
}
'3eQBzq' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=4145"
[1] =>
string(26) "https://www.jb51.net/?id=4273"
}
'j2mmuy' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=7131"
[1] =>
string(27) "https://www.jb51.net/?id=17898"
}
'qEZbIv' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=7320"
[1] =>
string(26) "https://www.jb51.net/?id=8134"
}
'uQFnay' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=7347"
[1] =>
string(26) "https://www.jb51.net/?id=7962"
}
'FJj6Fr' =>
array(2) {
[0] =>
string(26) "https://www.jb51.net/?id=8628"
[1] =>
string(26) "https://www.jb51.net/?id=9031"
}
'3Eviym' =>
array(2) {
[0] =>
string(27) "https://www.jb51.net/?id=11175"
[1] =>
string(27) "https://www.jb51.net/?id=14437"
}
}