爬虫实战 - 色情网站的字体加密解密

image-20220429164519624

image-20220429164601508

这种字体解密,其实就是执行下I函数就可以还原了

function I(r){
    var n="";
    for(i=0;i<r.length;++i){
        n+=String.fromCharCode(128^r.charCodeAt(i));
    }
    return n
}
str = "驘頜傼溅縯类缎姳人跚鐱縦瀮卋嗪嗪癴搭フ氡恳劰邼妪綧氡兠事岱岄丆";
console.log(str)
console.log(I(str));

再来一个python实现

import requests
import re

def decodeStr(str):
    result = ''
    for char in str:
        result +=chr(128 ^ ord(char))
    return result

def decodeHtml(html):
    tempHtml = html
    reobj = re.findall('I\("(.*?)"\)',html)

    for keyword in reobj:
        tempHtml = tempHtml.replace(keyword,decodeStr(keyword),1)
    return tempHtml

url = 'http://188491.com/play/8604.html'
headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
result = requests.get(url=url,headers=headers)
test2 = result.text.encode(result.encoding).decode(result.apparent_encoding)
test2 = decodeHtml(test2)
print(test2)
#

image-20220429164732476