首页 > 脚本专栏 > python > python删除相似度高的图片

教你怎么用python删除相似度高的图片

2021-05-08 09:52:48 作者：DJames23

这篇文章主要介绍了教你怎么用python删除相似度高的图片,文中有非常详细的代码示例,对正在学习python的小伙伴们有很好地帮助,需要的朋友可以参考下

1. 前言

因为输入是视频，切完帧之后都是连续图片，所以我的目录结构如下：

在这里插入图片描述

其中frame_output是视频切帧后的保存路径，1和2文件夹分别对应两个是视频切帧后的图片。

2. 切帧代码如下：

#encoding:utf-8
import os
import sys
import cv2

video_path = '/home/pythonfile/video/'  # 绝对路径，video下有两段视频
out_frame_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'frame_output')  #frame_output是视频切帧后的保存路径
if not os.path.exists(out_frame_path):
    os.makedirs(out_frame_path)
print('out_frame_path', out_frame_path)
files = []
list1 = os.listdir(video_path)
print('list', list1)
for i in range(len(list1)):
    item = os.path.join(video_path, list1[i])
    files.append(item)
print('files',files)
for k,file in enumerate(files):
    frame_dir = os.path.join(out_frame_path, '%d'%(k+1))
    if not os.path.exists(frame_dir):
        os.makedirs(frame_dir)
    cap = cv2.VideoCapture(file)
    j = 0
    print('start prossing NO.%d video' % (k + 1))
    while True:
        ret, frame = cap.read()
        j += 1
        if ret:
        #每三帧保存一张
            if j % 3 == 0:
                cv2.imwrite(os.path.join(frame_dir, '%d.jpg'%j), frame)
        else:
            cap.release()
            break
    print('prossed NO.%d video'%(k+1))

3. 删除相似度高的图片

# coding: utf-8
import os
import cv2
# from skimage.measure import compare_ssim
# from skimage.metrics import _structural_similarity
from skimage.metrics import structural_similarity as ssim

def delete(filename1):
    os.remove(filename1)


def list_all_files(root):
    files = []
    list = os.listdir(root)
    # os.listdir()方法：返回指定文件夹包含的文件或子文件夹名字的列表。该列表顺序以字母排序
    for i in range(len(list)):
        element = os.path.join(root, list[i])
        # 需要先使用python路径拼接os.path.join()函数，将os.listdir()返回的名称拼接成文件或目录的绝对路径再传入os.path.isdir()和os.path.isfile().
        if os.path.isdir(element):  # os.path.isdir()用于判断某一对象(需提供绝对路径)是否为目录
            # temp_dir = os.path.split(element)[-1]
            # os.path.split分割文件名与路径,分割为data_dir和此路径下的文件名，[-1]表示只取data_dir下的文件名
            files.append(list_all_files(element))

        elif os.path.isfile(element):
            files.append(element)
    # print('2',files)
    return files


def ssim_compare(img_files):
    count = 0
    for currIndex, filename in enumerate(img_files):
        if not os.path.exists(img_files[currIndex]):
            print('not exist', img_files[currIndex])
            break
        img = cv2.imread(img_files[currIndex])
        img1 = cv2.imread(img_files[currIndex + 1])
        #进行结构性相似度判断
        # ssim_value = _structural_similarity.structural_similarity(img,img1,multichannel=True)
        ssim_value = ssim(img,img1,multichannel=True)
        if ssim_value > 0.9:
            #基数
            count += 1
            imgs_n.append(img_files[currIndex + 1])
            print('big_ssim:',img_files[currIndex], img_files[currIndex + 1], ssim_value)
        # 避免数组越界
        if currIndex+1 >= len(img_files)-1:
            break
    return count


if __name__ == '__main__':
    path = '/home/dj/pythonfile/frame_output/'

    img_path = path
    imgs_n = []
   
    all_files = list_all_files(path) #返回包含完整路径的所有图片名的列表
    print('1',len(all_files))
   
    for files in all_files:
        # 根据文件名排序，x.rfind('/')是从右边寻找第一个‘/'出现的位置，也就是最后出现的位置
        # 注意sort和sorted的区别，sort作用于原列表，sorted生成新的列表，且sorted可以作用于所有可迭代对象
        files.sort(key = lambda x: int(x[x.rfind('/')+1:-4]))#路径中包含“/”
        # print(files)
        img_files = []
        for img in files:
            if img.endswith('.jpg'):
                # 将所有图片名都放入列表中
                img_files.append(img)
        count = ssim_compare(img_files)
        print(img[:img.rfind('/')],"路径下删除的图片数量为：",count)
    for image in imgs_n:
        delete(image)

4. 导入skimage.measure import compare_ssim出错的解决方法：

将

from skimage.measure import compare_ssim

改为

from skimage.metrics import _structural_similarity

5. structural_similarity.py的源码

from warnings import warn
import numpy as np
from scipy.ndimage import uniform_filter, gaussian_filter

from ..util.dtype import dtype_range
from ..util.arraycrop import crop
from .._shared.utils import warn, check_shape_equality

__all__ = ['structural_similarity']


def structural_similarity(im1, im2,
                          *,
                          win_size=None, gradient=False, data_range=None,
                          multichannel=False, gaussian_weights=False,
                          full=False, **kwargs):
    """
    Compute the mean structural similarity index between two images.

    Parameters
    ----------
    im1, im2 : ndarray
        Images. Any dimensionality with same shape.
    win_size : int or None, optional
        The side-length of the sliding window used in comparison. Must be an
        odd value. If `gaussian_weights` is True, this is ignored and the
        window size will depend on `sigma`.
    gradient : bool, optional
        If True, also return the gradient with respect to im2.
    data_range : float, optional
        The data range of the input image (distance between minimum and
        maximum possible values). By default, this is estimated from the image
        data-type.
    multichannel : bool, optional
        If True, treat the last dimension of the array as channels. Similarity
        calculations are done independently for each channel then averaged.
    gaussian_weights : bool, optional
        If True, each patch has its mean and variance spatially weighted by a
        normalized Gaussian kernel of width sigma=1.5.
    full : bool, optional
        If True, also return the full structural similarity image.

    Other Parameters
    ----------------
    use_sample_covariance : bool
        If True, normalize covariances by N-1 rather than, N where N is the
        number of pixels within the sliding window.
    K1 : float
        Algorithm parameter, K1 (small constant, see [1]_).
    K2 : float
        Algorithm parameter, K2 (small constant, see [1]_).
    sigma : float
        Standard deviation for the Gaussian when `gaussian_weights` is True.

    Returns
    -------
    mssim : float
        The mean structural similarity index over the image.
    grad : ndarray
        The gradient of the structural similarity between im1 and im2 [2]_.
        This is only returned if `gradient` is set to True.
    S : ndarray
        The full SSIM image.  This is only returned if `full` is set to True.

    Notes
    -----
    To match the implementation of Wang et. al. [1]_, set `gaussian_weights`
    to True, `sigma` to 1.5, and `use_sample_covariance` to False.

    .. versionchanged:: 0.16
        This function was renamed from ``skimage.measure.compare_ssim`` to
        ``skimage.metrics.structural_similarity``.

    References
    ----------
    .. [1] Wang, Z., Bovik, A. C., Sheikh, H. R., & Simoncelli, E. P.
       (2004). Image quality assessment: From error visibility to
       structural similarity. IEEE Transactions on Image Processing,
       13, 600-612.
       https://ece.uwaterloo.ca/~z70wang/publications/ssim.pdf,
       :DOI:`10.1109/TIP.2003.819861`

    .. [2] Avanaki, A. N. (2009). Exact global histogram specification
       optimized for structural similarity. Optical Review, 16, 613-621.
       :arxiv:`0901.0065`
       :DOI:`10.1007/s10043-009-0119-z`

    """
    check_shape_equality(im1, im2)

    if multichannel:
        # loop over channels
        args = dict(win_size=win_size,
                    gradient=gradient,
                    data_range=data_range,
                    multichannel=False,
                    gaussian_weights=gaussian_weights,
                    full=full)
        args.update(kwargs)
        nch = im1.shape[-1]
        mssim = np.empty(nch)
        if gradient:
            G = np.empty(im1.shape)
        if full:
            S = np.empty(im1.shape)
        for ch in range(nch):
            ch_result = structural_similarity(im1[..., ch],
                                              im2[..., ch], **args)
            if gradient and full:
                mssim[..., ch], G[..., ch], S[..., ch] = ch_result
            elif gradient:
                mssim[..., ch], G[..., ch] = ch_result
            elif full:
                mssim[..., ch], S[..., ch] = ch_result
            else:
                mssim[..., ch] = ch_result
        mssim = mssim.mean()
        if gradient and full:
            return mssim, G, S
        elif gradient:
            return mssim, G
        elif full:
            return mssim, S
        else:
            return mssim

    K1 = kwargs.pop('K1', 0.01)
    K2 = kwargs.pop('K2', 0.03)
    sigma = kwargs.pop('sigma', 1.5)
    if K1 < 0:
        raise ValueError("K1 must be positive")
    if K2 < 0:
        raise ValueError("K2 must be positive")
    if sigma < 0:
        raise ValueError("sigma must be positive")
    use_sample_covariance = kwargs.pop('use_sample_covariance', True)

    if gaussian_weights:
        # Set to give an 11-tap filter with the default sigma of 1.5 to match
        # Wang et. al. 2004.
        truncate = 3.5

    if win_size is None:
        if gaussian_weights:
            # set win_size used by crop to match the filter size
            r = int(truncate * sigma + 0.5)  # radius as in ndimage
            win_size = 2 * r + 1
        else:
            win_size = 7   # backwards compatibility

    if np.any((np.asarray(im1.shape) - win_size) < 0):
        raise ValueError(
            "win_size exceeds image extent.  If the input is a multichannel "
            "(color) image, set multichannel=True.")

    if not (win_size % 2 == 1):
        raise ValueError('Window size must be odd.')

    if data_range is None:
        if im1.dtype != im2.dtype:
            warn("Inputs have mismatched dtype.  Setting data_range based on "
                 "im1.dtype.", stacklevel=2)
        dmin, dmax = dtype_range[im1.dtype.type]
        data_range = dmax - dmin

    ndim = im1.ndim

    if gaussian_weights:
        filter_func = gaussian_filter
        filter_args = {'sigma': sigma, 'truncate': truncate}
    else:
        filter_func = uniform_filter
        filter_args = {'size': win_size}

    # ndimage filters need floating point data
    im1 = im1.astype(np.float64)
    im2 = im2.astype(np.float64)

    NP = win_size ** ndim

    # filter has already normalized by NP
    if use_sample_covariance:
        cov_norm = NP / (NP - 1)  # sample covariance
    else:
        cov_norm = 1.0  # population covariance to match Wang et. al. 2004

    # compute (weighted) means
    ux = filter_func(im1, **filter_args)
    uy = filter_func(im2, **filter_args)

    # compute (weighted) variances and covariances
    uxx = filter_func(im1 * im1, **filter_args)
    uyy = filter_func(im2 * im2, **filter_args)
    uxy = filter_func(im1 * im2, **filter_args)
    vx = cov_norm * (uxx - ux * ux)
    vy = cov_norm * (uyy - uy * uy)
    vxy = cov_norm * (uxy - ux * uy)

    R = data_range
    C1 = (K1 * R) ** 2
    C2 = (K2 * R) ** 2

    A1, A2, B1, B2 = ((2 * ux * uy + C1,
                       2 * vxy + C2,
                       ux ** 2 + uy ** 2 + C1,
                       vx + vy + C2))
    D = B1 * B2
    S = (A1 * A2) / D

    # to avoid edge effects will ignore filter radius strip around edges
    pad = (win_size - 1) // 2

    # compute (weighted) mean of ssim
    mssim = crop(S, pad).mean()

    if gradient:
        # The following is Eqs. 7-8 of Avanaki 2009.
        grad = filter_func(A1 / D, **filter_args) * im1
        grad += filter_func(-S / B2, **filter_args) * im2
        grad += filter_func((ux * (A2 - A1) - uy * (B2 - B1) * S) / D,
                            **filter_args)
        grad *= (2 / im1.size)

        if full:
            return mssim, grad, S
        else:
            return mssim, grad
    else:
        if full:
            return mssim, S
        else:
            return mssim

到此这篇关于教你怎么用python删除相似度高的图片的文章就介绍到这了,更多相关python删除相似度高的图片内容请搜索脚本之家以前的文章或继续浏览下面的相关文章希望大家以后多多支持脚本之家！