plugin controller change

This commit is contained in:
Manuel Fritsch 2021-01-30 22:29:33 +01:00
parent 53a82b4249
commit aeabde8c4a
887 changed files with 34635 additions and 87014 deletions

BIN
.DS_Store vendored

Binary file not shown.

View file

@ -1 +1 @@
__version__ = "4.0.0.12" __version__ = "4.0.0.13"

View file

@ -23,16 +23,16 @@ class CBPiKettleLogic(metaclass=ABCMeta):
self.cbpi.log.log_data(self.id, value) self.cbpi.log.log_data(self.id, value)
async def run(self): async def run(self):
self.state = True
while self.running: while self.running:
print("RUNNING KETTLE") print("RUNNING KETTLE")
await asyncio.sleep(1) await asyncio.sleep(1)
self.state = False
def get_state(self): def get_state(self):
return dict(running=self.running)
return dict(state=self.state)
async def start(self): async def start(self):
self.running = True self.running = True
async def stop(self): async def stop(self):

View file

@ -7,19 +7,15 @@ import re
import requests import requests
import yaml import yaml
from cbpi.utils.utils import load_config from cbpi.utils.utils import load_config
from zipfile import ZipFile
from cbpi.craftbeerpi import CraftBeerPi from cbpi.craftbeerpi import CraftBeerPi
import os import os
import pathlib import pathlib
import shutil import shutil
import yaml
import click
def create_plugin_file(): from jinja2 import Template
import os.path
if os.path.exists(os.path.join(".", 'config', "plugin_list.txt")) is False:
srcfile = os.path.join(os.path.dirname(__file__), "config", "plugin_list.txt")
destfile = os.path.join(".", 'config')
shutil.copy(srcfile, destfile)
print("Plugin Folder created")
def create_config_file(): def create_config_file():
import os.path import os.path
@ -74,9 +70,7 @@ def clear_db():
os.remove(os.path.join(".", "craftbeerpi.db")) os.remove(os.path.join(".", "craftbeerpi.db"))
print("database Cleared") print("database Cleared")
def check_for_setup(): def check_for_setup():
if os.path.exists(os.path.join(".", "config", "config.yaml")) is False: if os.path.exists(os.path.join(".", "config", "config.yaml")) is False:
print("***************************************************") print("***************************************************")
print("CraftBeerPi Config File not found: %s" % os.path.join(".", "config", "config.yaml")) print("CraftBeerPi Config File not found: %s" % os.path.join(".", "config", "config.yaml"))
@ -87,137 +81,164 @@ def check_for_setup():
return True return True
def list_plugins(): def plugins_add(package_name):
print("***************************************************")
print("CraftBeerPi 4.x Plugin List")
print("***************************************************")
print("")
plugins_yaml = "https://raw.githubusercontent.com/Manuel83/craftbeerpi-plugins/master/plugins_v4.yaml"
r = requests.get(plugins_yaml)
data = yaml.load(r.content, Loader=yaml.FullLoader)
for name, value in data.items():
print(name)
print("")
print("***************************************************")
def add(package_name):
if package_name is None: if package_name is None:
print("Missing Plugin Name: cbpi add --name=") print("Pleaes provide a plugin Name")
return
try:
with open(os.path.join(".", 'config', "config.yaml"), 'rt') as f:
data = yaml.load(f, Loader=yaml.FullLoader)
if package_name in data["plugins"]:
print("")
print("Plugin {} already active".format(package_name))
print("")
return
data["plugins"].append(package_name)
with open(os.path.join(".", 'config', "config.yaml"), 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)
print("")
print("Plugin {} activated".format(package_name))
print("")
except Exception as e:
print(e)
pass
def plugin_remove(package_name):
if package_name is None:
print("Pleaes provide a plugin Name")
return
try:
with open(os.path.join(".", 'config', "config.yaml"), 'rt') as f:
data = yaml.load(f, Loader=yaml.FullLoader)
data["plugins"] = list(filter(lambda k: package_name not in k, data["plugins"]))
with open(os.path.join(".", 'config', "config.yaml"), 'w') as outfile:
yaml.dump(data, outfile, default_flow_style=False)
print("")
print("Plugin {} deactivated".format(package_name))
print("")
except Exception as e:
print(e)
pass
def plugins_list():
print("--------------------------------------")
print("List of active pluigins")
try:
with open(os.path.join(".", 'config', "config.yaml"), 'rt') as f:
data = yaml.load(f, Loader=yaml.FullLoader)
for p in data["plugins"]:
print("- {}".format(p))
except Exception as e:
print(e)
pass
print("--------------------------------------")
def plugin_create(name):
if os.path.exists(os.path.join(".", name)) is True:
print("Cant create Plugin. Folder {} already exists ".format(name))
return return
data = subprocess.check_output([sys.executable, "-m", "pip", "install", package_name]) url = 'https://github.com/Manuel83/craftbeerpi4-plugin-template/archive/main.zip'
data = data.decode('UTF-8') r = requests.get(url)
with open('temp.zip', 'wb') as f:
f.write(r.content)
patter_already_installed = "Requirement already satisfied: %s" % package_name with ZipFile('temp.zip', 'r') as repo_zip:
pattern = "Successfully installed %s-([-0-9a-zA-Z._]*)" % package_name repo_zip.extractall()
match_already_installed = re.search(patter_already_installed, data)
match_installed = re.search(pattern, data)
if match_already_installed is not None:
print("Plugin already installed")
return False
if match_installed is None:
print(data)
print("Faild to install plugin")
return False
version = match_installed.groups()[0]
plugins = load_config("./config/plugin_list.txt")
if plugins is None:
plugins = {}
now = datetime.datetime.now()
plugins[package_name] = dict(version=version, installation_date=now.strftime("%Y-%m-%d %H:%M:%S"))
with open('./config/plugin_list.txt', 'w') as outfile:
yaml.dump(plugins, outfile, default_flow_style=False)
print("Plugin %s added" % package_name)
return True
def remove(package_name): os.rename("./craftbeerpi4-plugin-template-main", os.path.join(".", name))
if package_name is None: os.rename(os.path.join(".", name, "src"), os.path.join(".", name, name))
print("Missing Plugin Name: cbpi add --name=")
return
data = subprocess.check_output([sys.executable, "-m", "pip", "uninstall", "-y", package_name])
data = data.decode('UTF-8')
pattern = "Successfully uninstalled %s-([-0-9a-zA-Z._]*)" % package_name import jinja2
match_uninstalled = re.search(pattern, data)
if match_uninstalled is None: templateLoader = jinja2.FileSystemLoader(searchpath=os.path.join(".", name))
templateEnv = jinja2.Environment(loader=templateLoader)
TEMPLATE_FILE = "setup.py"
template = templateEnv.get_template(TEMPLATE_FILE)
outputText = template.render(name=name)
print("Faild to uninstall plugin") with open(os.path.join(".", name, "setup.py"), "w") as fh:
return False fh.write(outputText)
plugins = load_config("./config/plugin_list.txt") TEMPLATE_FILE = "MANIFEST.in"
if plugins is None: template = templateEnv.get_template(TEMPLATE_FILE)
plugins = {} outputText = template.render(name=name)
with open(os.path.join(".", name, "MANIFEST.in"), "w") as fh:
fh.write(outputText)
if package_name not in plugins: TEMPLATE_FILE = os.path.join("/", name , "config.yaml")
return False template = templateEnv.get_template(TEMPLATE_FILE)
outputText = template.render(name=name)
del plugins[package_name] with open(os.path.join(".", name, name, "config.yaml"), "w") as fh:
with open('./config/plugin_list.txt', 'w') as outfile: fh.write(outputText)
yaml.dump(plugins, outfile, default_flow_style=False) print("")
print("")
print("Plugin {} created! See https://craftbeerpi.gitbook.io/craftbeerpi4/development how to run your plugin ".format(name))
print("")
print("Happy Development! Cheers")
print("")
print("")
print("Plugin %s removed" % package_name)
return True
@click.group()
def main(): def main():
level =logging.INFO
parser = argparse.ArgumentParser(description='Welcome to CraftBeerPi 4')
parser.add_argument("action", type=str, help="start,stop,restart,setup,plugins")
parser.add_argument('--debug', dest='debug', action='store_true')
parser.add_argument("--name", type=str, help="Plugin name")
args = parser.parse_args()
if args.debug is True:
level =logging.DEBUG
else:
level =logging.INFO
#logging.basicConfig(level=logging.INFO, filename='./logs/app.log', filemode='a', format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
logging.basicConfig(level=level, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s') logging.basicConfig(level=level, format='%(asctime)s - %(levelname)s - %(name)s - %(message)s')
pass
if args.action == "setup":
print("Setting up CBPi") @click.command()
create_home_folder_structure() def setup():
create_plugin_file() '''Create Config folder'''
create_config_file() print("Setting up CraftBeerPi")
copy_splash() create_home_folder_structure()
create_config_file()
@click.command()
def start():
if check_for_setup() is False:
return return
print("START")
cbpi = CraftBeerPi()
cbpi.start()
if args.action == "cleardb": @click.command()
clear_db() def plugins():
return '''List active plugins'''
plugins_list()
if args.action == "plugins": return
list_plugins()
return
if args.action == "add":
add(args.name)
return
if args.action == "remove":
remove(args.name)
return
if args.action == "start":
if check_for_setup() is False:
return
cbpi = CraftBeerPi()
cbpi.start()
return
parser.print_help()
@click.command()
@click.argument('name')
def add(name):
'''Activate Plugin'''
plugins_add(name)
@click.command()
@click.argument('name')
def remove(name):
'''Deactivate Plugin'''
plugin_remove(name)
@click.command()
@click.argument('name')
def create(name):
'''Deactivate Plugin'''
plugin_create(name)
main.add_command(setup)
main.add_command(start)
main.add_command(plugins)
main.add_command(add)
main.add_command(remove)
main.add_command(create)

View file

@ -9,3 +9,6 @@ port: 8000
username: cbpi username: cbpi
password: 123 password: 123
plugins:
- cbpi4-ui

View file

@ -37,6 +37,6 @@ class ActorController(BasicController):
instance = data.get("instance") instance = data.get("instance")
state = state=instance.get_state() state = state=instance.get_state()
except Exception as e: except Exception as e:
logging.error("Faild to crate actor dict {} ".format(e)) logging.error("Faild to create actor dict {} ".format(e))
state = dict() state = dict()
return dict(name=data.get("name"), id=data.get("id"), type=data.get("type"), state=state,props=data.get("props", [])) return dict(name=data.get("name"), id=data.get("id"), type=data.get("type"), state=state,props=data.get("props", []))

View file

@ -31,13 +31,12 @@ class BasicController:
logging.info("{} Load ".format(self.name)) logging.info("{} Load ".format(self.name))
with open(self.path) as json_file: with open(self.path) as json_file:
data = json.load(json_file) data = json.load(json_file)
self.data = data["data"] self.data = data["data"]
if self.autostart is True: if self.autostart is True:
for d in self.data: for d in self.data:
logging.info("{} Starting ".format(self.name)) logging.info("{} Starting ".format(self.name))
await self.start(d.get("id")) await self.start(d.get("id"))
await self.push_udpate()
async def save(self): async def save(self):
logging.info("{} Save ".format(self.name)) logging.info("{} Save ".format(self.name))
@ -76,6 +75,7 @@ class BasicController:
instance = item.get("instance") instance = item.get("instance")
await instance.stop() await instance.stop()
await instance.task await instance.task
await self.push_udpate()
except Exception as e: except Exception as e:
logging.error("{} Cant stop {} - {}".format(self.name, id, e)) logging.error("{} Cant stop {} - {}".format(self.name, id, e))
@ -84,20 +84,17 @@ class BasicController:
try: try:
item = self.find_by_id(id) item = self.find_by_id(id)
instance = item.get("instance") instance = item.get("instance")
if instance is not None and instance.running is True: if instance is not None and instance.running is True:
logging.warning("{} already running {}".format(self.name, id)) logging.warning("{} already running {}".format(self.name, id))
return return
type = item["type"] type = item["type"]
clazz = self.types[type]["class"] clazz = self.types[type]["class"]
item["instance"] = clazz(self.cbpi, item["id"], {}) item["instance"] = clazz(self.cbpi, item["id"], {})
await item["instance"].start() await item["instance"].start()
item["instance"].task = self._loop.create_task(item["instance"].run()) item["instance"].task = self._loop.create_task(item["instance"].run())
logging.info("Sensor started {}".format(id)) logging.info("{} started {}".format(self.name, id))
except Exception as e: except Exception as e:
logging.error("{} Cant start {} - {}".format(self.name, id, e)) logging.error("{} Cant start {} - {}".format(self.name, id, e))

View file

@ -13,6 +13,7 @@ class KettleController(BasicController):
item = self.find_by_id(id) item = self.find_by_id(id)
instance = item.get("instance") instance = item.get("instance")
await instance.start() await instance.start()
await self.push_udpate()
except Exception as e: except Exception as e:
logging.error("Faild to switch on KettleLogic {} {}".format(id, e)) logging.error("Faild to switch on KettleLogic {} {}".format(id, e))
@ -21,6 +22,19 @@ class KettleController(BasicController):
item = self.find_by_id(id) item = self.find_by_id(id)
instance = item.get("instance") instance = item.get("instance")
await instance.stop() await instance.stop()
await self.push_udpate()
except Exception as e:
logging.error("Faild to switch on KettleLogic {} {}".format(id, e))
async def toggle(self, id):
try:
item = self.find_by_id(id)
instance = item.get("instance")
if instance is None or instance.running == False:
await self.start(id)
else:
await instance.stop()
await self.push_udpate()
except Exception as e: except Exception as e:
logging.error("Faild to switch on KettleLogic {} {}".format(id, e)) logging.error("Faild to switch on KettleLogic {} {}".format(id, e))
@ -35,7 +49,7 @@ class KettleController(BasicController):
def create_dict(self, data): def create_dict(self, data):
try: try:
instance = data.get("instance") instance = data.get("instance")
state = dict(state=instance.get_state()) state = instance.get_state()
except Exception as e: except Exception as e:
logging.error("Faild to create KettleLogic dict {} ".format(e)) logging.error("Faild to create KettleLogic dict {} ".format(e))
state = dict() state = dict()

View file

@ -18,72 +18,8 @@ class PluginController():
def __init__(self, cbpi): def __init__(self, cbpi):
self.cbpi = cbpi self.cbpi = cbpi
self.plugins = load_config("./config/plugin_list.txt")
if self.plugins is None:
self.plugins = {}
async def load_plugin_list(self):
async with aiohttp.ClientSession() as session:
async with session.get('http://localhost:2202/list') as resp:
if (resp.status == 200):
data = yaml.load(await resp.text())
self.plugins = data
return data
def installed_plugins(self):
return self.plugins
async def install(self, package_name):
async def install(cbpi, plugins, package_name):
data = subprocess.check_output(
[sys.executable, "-m", "pip", "install", package_name])
data = data.decode('UTF-8')
if package_name not in self.plugins:
now = datetime.datetime.now()
self.plugins[package_name] = dict(
version="1.0", installation_date=now.strftime("%Y-%m-%d %H:%M:%S"))
with open('./config/plugin_list.txt', 'w') as outfile:
yaml.dump(self.plugins, outfile, default_flow_style=False)
if data.startswith('Requirement already satisfied'):
self.cbpi.notify(
key="p", message="Plugin already installed ", type="warning")
else:
self.cbpi.notify(
key="p", message="Plugin installed ", type="success")
async with aiohttp.ClientSession() as session:
async with session.get('http://localhost:2202/get/%s' % package_name) as resp:
if (resp.status == 200):
data = await resp.json()
await self.cbpi.job.start_job(install(self.cbpi, self.plugins, data["package_name"]), data["package_name"], "plugins_install")
return True
else:
self.cbpi.notify(
key="p", message="Failed to install Plugin %s " % package_name, type="danger")
return False
async def uninstall(self, package_name):
async def uninstall(cbpi, plugins, package_name):
print("try to uninstall", package_name)
try:
data = subprocess.check_output(
[sys.executable, "-m", "pip", "uninstall", "-y", package_name])
data = data.decode('UTF-8')
if data.startswith("Successfully uninstalled"):
cbpi.notify(key="p", message="Plugin %s Uninstalled" %
package_name, type="success")
else:
cbpi.notify(key="p", message=data, type="success")
except Exception as e:
print(e)
if package_name in self.plugins:
print("Uninstall", self.plugins[package_name])
await self.cbpi.job.start_job(uninstall(self.cbpi, self.plugins, package_name), package_name, "plugins_uninstall")
def load_plugins(self): def load_plugins(self):
this_directory = os.path.dirname(__file__) this_directory = os.path.dirname(__file__)
@ -110,18 +46,21 @@ class PluginController():
def load_plugins_from_evn(self): def load_plugins_from_evn(self):
for p in self.plugins:
logger.debug("Load Plugin %s" % p) for p in self.cbpi.static_config.get("plugins",[]):
try: try:
logger.info("Try to load plugin: %s " % p) logger.info("Try to load plugin: %s " % p)
self.modules[p] = import_module(p) self.modules[p] = import_module(p)
self.modules[p].setup(self.cbpi) self.modules[p].setup(self.cbpi)
#logger.info("Plugin %s loaded successfully" % p) logger.info("Plugin %s loaded successfully" % p)
except Exception as e: except Exception as e:
logger.error("FAILED to load plugin %s " % p) logger.error("FAILED to load plugin %s " % p)
logger.error(e) logger.error(e)
def register(self, name, clazz) -> None: def register(self, name, clazz) -> None:
''' '''
Register a new actor type Register a new actor type
@ -171,9 +110,7 @@ class PluginController():
parameters.append(self._parse_property_object(p)) parameters.append(self._parse_property_object(p))
result["properties"] = parameters result["properties"] = parameters
for method_name, method in cls.__dict__.items(): for method_name, method in cls.__dict__.items():
if hasattr(method, "action"): if hasattr(method, "action"):
key = method.__getattribute__("key") key = method.__getattribute__("key")
parameters = [] parameters = []
for p in method.__getattribute__("parameters"): for p in method.__getattribute__("parameters"):

View file

@ -11,7 +11,7 @@ class SensorController(BasicController):
instance = data.get("instance") instance = data.get("instance")
state = state=instance.get_state() state = state=instance.get_state()
except Exception as e: except Exception as e:
logging.error("Faild to crate actor dict {} ".format(e)) logging.error("Faild to create sensor dict {} ".format(e))
state = dict() state = dict()
return dict(name=data.get("name"), id=data.get("id"), type=data.get("type"), state=state,props=data.get("props", [])) return dict(name=data.get("name"), id=data.get("id"), type=data.get("type"), state=state,props=data.get("props", []))

View file

@ -222,7 +222,7 @@ class StepController:
return next((i for i, item in enumerate(self.profile) if item["id"] == id), None) return next((i for i, item in enumerate(self.profile) if item["id"] == id), None)
async def push_udpate(self): async def push_udpate(self):
await self.cbpi.bus.fire("step/update", data=list(map(lambda x: self.create_dict(x), self.profile))) self.cbpi.ws.send(dict(topic="step_update", data=list(map(lambda x: self.create_dict(x), self.profile))))
async def start_step(self,step): async def start_step(self,step):
logging.info("Start Step") logging.info("Start Step")

View file

@ -20,11 +20,7 @@ except Exception:
import RPi.GPIO as GPIO import RPi.GPIO as GPIO
@parameters([Property.Number(label="Param1", configurable=True), @parameters([])
Property.Text(label="Param2", configurable=True, default_value="HALLO"),
Property.Select(label="Param3", options=[1,2,4]),
Property.Sensor(label="Param4"),
Property.Actor(label="Param5")])
class CustomActor(CBPiActor): class CustomActor(CBPiActor):
my_name = "" my_name = ""
@ -37,7 +33,6 @@ class CustomActor(CBPiActor):
def init(self): def init(self):
print("INIT") print("INIT")
self.state = False self.state = False
pass pass

View file

@ -2,11 +2,7 @@ import asyncio
from cbpi.api import * from cbpi.api import *
@parameters([Property.Number(label="Param1", configurable=True), @parameters([])
Property.Text(label="Param2", configurable=True, default_value="HALLO"),
Property.Select(label="Param3", options=[1,2,4]),
Property.Sensor(label="Param4"),
Property.Actor(label="Param5")])
class CustomLogic(CBPiKettleLogic): class CustomLogic(CBPiKettleLogic):
pass pass

View file

@ -7,11 +7,7 @@ from aiohttp import web
from cbpi.api import * from cbpi.api import *
@parameters([Property.Number(label="Param1", configurable=True), @parameters([])
Property.Text(label="Param2", configurable=True, default_value="HALLO"),
Property.Select(label="Param3", options=[1,2,4]),
Property.Sensor(label="Param4"),
Property.Actor(label="Param5")])
class CustomSensor(CBPiSensor): class CustomSensor(CBPiSensor):
def __init__(self, cbpi, id, props): def __init__(self, cbpi, id, props):
@ -36,7 +32,7 @@ class CustomSensor(CBPiSensor):
while self.running is True: while self.running is True:
self.value = random.randint(0,50) self.value = random.randint(0,50)
self.push_update(self.value) self.push_update(self.value)
await asyncio.sleep(1) await asyncio.sleep(10)
def get_state(self): def get_state(self):
return dict(value=self.value) return dict(value=self.value)

View file

@ -182,6 +182,31 @@ class KettleHttpEndpoints():
await self.controller.off(id) await self.controller.off(id)
return web.Response(status=204) return web.Response(status=204)
@request_mapping(path="/{id}/toggle", method="POST", auth_required=False)
async def http_toggle(self, request) -> web.Response:
"""
---
description: Switch actor on
tags:
- Kettle
parameters:
- name: "id"
in: "path"
description: "Kettle ID"
required: true
type: "string"
responses:
"204":
description: successful operation
"405":
description: invalid HTTP Met
"""
id = request.match_info['id']
await self.controller.toggle(id)
return web.Response(status=204)
@request_mapping(path="/{id}/action", method="POST", auth_required=auth) @request_mapping(path="/{id}/action", method="POST", auth_required=auth)
async def http_action(self, request) -> web.Response: async def http_action(self, request) -> web.Response:
@ -233,11 +258,20 @@ class KettleHttpEndpoints():
required: true required: true
type: "integer" type: "integer"
format: "int64" format: "int64"
- in: body
name: body
description: Update Temp
required: true
schema:
type: object
properties:
temp:
type: integer
responses: responses:
"204": "204":
description: successful operation description: successful operation
""" """
id = request.match_info['id'] id = request.match_info['id']
#data = await request.json() data = await request.json()
await self.controller.set_target_temp(id,999) await self.controller.set_target_temp(id,data.get("temp"))
return web.Response(status=204) return web.Response(status=204)

View file

@ -15,7 +15,7 @@
"id": "Aifjxmw4QdPfU3XbR6iyis", "id": "Aifjxmw4QdPfU3XbR6iyis",
"name": "Pump1", "name": "Pump1",
"props": {}, "props": {},
"state": false, "state": true,
"type": "CustomActor" "type": "CustomActor"
}, },
{ {
@ -24,6 +24,34 @@
"props": {}, "props": {},
"state": false, "state": false,
"type": "CustomActor" "type": "CustomActor"
},
{
"id": "NjammuygecdvMpoGYc3rXt",
"name": "Heater Boil",
"props": {},
"state": false,
"type": "CustomActor"
},
{
"id": "j4PnSfuWRhgZDgrQScLN7e",
"name": "Vent1",
"props": {},
"state": true,
"type": "CustomActor"
},
{
"id": "ZGJqoybWv3eWrEeGJLopFs",
"name": "Water In",
"props": {},
"state": false,
"type": "CustomActor"
},
{
"id": "NfYJEWbTXPUSUQzS83dfAn",
"name": "Vent Out",
"props": {},
"state": false,
"type": "CustomActor"
} }
] ]
} }

View file

@ -1,64 +1,425 @@
{ {
"elements": [ "elements": [
{ {
"id": "6c670263-7b19-426c-8769-19aac8ebb381", "id": "1ad5cec3-0f10-4910-b5ba-b4a96207d0ca",
"name": "CustomSVG", "name": "Kettle",
"props": { "props": {
"name": "tank", "heigth": "150",
"width": "200" "width": "100"
}, },
"type": "CustomSVG", "type": "Kettle",
"x": 295, "x": 225,
"y": 45 "y": 160
}, },
{ {
"id": "cbe859ca-b8e8-433f-952c-938a2f8a309b", "id": "ba621aee-a733-4238-b892-0f39100a5d21",
"name": "Kettle",
"props": {
"heigth": "150",
"width": "100"
},
"type": "Kettle",
"x": 530,
"y": 160
},
{
"id": "b61f57d9-e9ce-42b5-97df-3b2d7deaf18c",
"name": "Kettle",
"props": {
"heigth": "150",
"width": "100"
},
"type": "Kettle",
"x": 780,
"y": 160
},
{
"id": "f2facefa-5808-4f63-93e7-fd8c3343aa2f",
"name": "Pump1",
"props": {
"actor": "Aifjxmw4QdPfU3XbR6iyis"
},
"type": "ActorButton",
"x": 410,
"y": 380
},
{
"id": "6996220e-b314-4c23-82c5-2d0873bcd1bc",
"name": "KettleControl",
"props": {
"kettle": "oHxKz3z5RjbsxfSz6KUgov",
"orientation": "vertical"
},
"type": "KettleControl",
"x": 165,
"y": 205
},
{
"id": "91547101-86e5-405c-84e4-295d3565adfb",
"name": "Vent",
"props": {
"actor": "j4PnSfuWRhgZDgrQScLN7e"
},
"type": "ActorButton",
"x": 550,
"y": 380
},
{
"id": "a7ec6424-0df5-489e-85a6-5b36d039079b",
"name": "Pump2",
"props": {
"actor": "HX2bKdobuANehPggYcynnj"
},
"type": "ActorButton",
"x": 680,
"y": 380
},
{
"id": "39bb1a5b-294e-47e6-b472-699ef05aa780",
"name": "KettleControl",
"props": {
"kettle": "a7bWex85Z9Td4atwgazpXW",
"orientation": "vertical"
},
"type": "KettleControl",
"x": 720,
"y": 205
},
{
"id": "310054aa-729b-45b2-a3a3-2c73196a2444",
"name": "HLT",
"props": {
"color": "#fff",
"size": "15"
},
"type": "Text",
"x": 235,
"y": 165
},
{
"id": "72a66e4f-f7ce-4ac2-9956-c581590bfb3d",
"name": "MashTun",
"props": {
"color": "#fff",
"size": "15"
},
"type": "Text",
"x": 540,
"y": 165
},
{
"id": "62f58450-5ce6-45bf-b178-0dde9225ab52",
"name": "Boil",
"props": {
"color": "#fff",
"size": "15"
},
"type": "Text",
"x": 820,
"y": 165
},
{
"id": "e2b351fa-b66e-416a-a6d6-887ee41b3d7e",
"name": "Water",
"props": {
"actor": "ZGJqoybWv3eWrEeGJLopFs"
},
"type": "ActorButton",
"x": 45,
"y": 160
},
{
"id": "9f3f87d4-3c2a-4dcc-9740-8f7efcc553bf",
"name": "Sensor Data",
"props": {
"color": "#fff",
"sensor": "8ohkXvFA9UrkHLsxQL38wu",
"size": "30",
"unit": "\u00b0"
},
"type": "Sensor",
"x": 255,
"y": 185
},
{
"id": "8df86373-7ed9-4d49-9d29-3b80e67989ab",
"name": "Sensor Data",
"props": {
"color": "#fff",
"sensor": "8ohkXvFA9UrkHLsxQL38wu",
"size": "30",
"unit": "\u00b0"
},
"type": "Sensor",
"x": 810,
"y": 185
},
{
"id": "16a0e88b-09fb-4f32-9d9a-b82d02c48190",
"name": "TargetTemp",
"props": {
"color": "#fff",
"kettle": "oHxKz3z5RjbsxfSz6KUgov",
"size": "12",
"unit": "\u00b0"
},
"type": "TargetTemp",
"x": 260,
"y": 225
},
{
"id": "2204b231-ca45-4773-a110-0e4b19dfab89",
"name": "TargetTemp",
"props": {
"color": "#fff",
"kettle": "a7bWex85Z9Td4atwgazpXW",
"size": "12",
"unit": "\u00b0"
},
"type": "TargetTemp",
"x": 820,
"y": 225
},
{
"id": "8f3c656c-16b7-4f81-9d6d-8219e90e87d0",
"name": "CustomSVG", "name": "CustomSVG",
"props": { "props": {
"name": "tank", "name": "cbpi_svg",
"width": "100" "width": "50"
}, },
"type": "CustomSVG", "type": "CustomSVG",
"x": 555, "x": 555,
"y": 55 "y": 240
}, },
{ {
"id": "1f1d5ee6-1ccc-409b-a240-c81d50b71627", "id": "2a8b37f8-c0af-4592-9771-2e6500ef4299",
"name": "CustomSVG", "name": "CustomSVG",
"props": { "props": {
"name": "kettle", "name": "cbpi_svg",
"width": "100" "width": "50"
}, },
"type": "CustomSVG", "type": "CustomSVG",
"x": 795, "x": 245,
"y": 90 "y": 240
},
{
"id": "16ec8526-7f2c-4973-bf97-4ab3363e6ca1",
"name": "CustomSVG",
"props": {
"name": "cbpi_svg",
"width": "50"
},
"type": "CustomSVG",
"x": 805,
"y": 240
},
{
"id": "4fecbb43-53be-4d4a-b24d-2d980777afbe",
"name": "CraftBeerPi Brewery",
"props": {
"color": "#fff",
"size": "40"
},
"type": "Text",
"x": 45,
"y": 65
},
{
"id": "4996dd17-b047-4d27-8598-0563dfd444ab",
"name": "Steps",
"props": {
"width": "200"
},
"type": "Steps",
"x": 35,
"y": 315
},
{
"id": "44014b52-4bf0-4136-88a7-3cb9f1882962",
"name": "Out",
"props": {
"actor": "NfYJEWbTXPUSUQzS83dfAn"
},
"type": "ActorButton",
"x": 985,
"y": 265
},
{
"id": "d4a56a0e-f410-47c1-879a-ff41c6422a6e",
"name": "Sensor Data",
"props": {
"color": "red",
"sensor": "8ohkXvFA9UrkHLsxQL38wu",
"size": "40",
"unit": "\u00b0"
},
"type": "Sensor",
"x": 555,
"y": 180
} }
], ],
"pathes": [ "pathes": [
{ {
"condition": [
"ZGJqoybWv3eWrEeGJLopFs"
],
"coordinates": [ "coordinates": [
[ [
305, 225,
75 180
], ],
[ [
160, 115,
190 180
],
[
245,
460
],
[
525,
395
],
[
560,
75
] ]
], ],
"id": "d22d65d2-c4db-4553-856a-e9239a79e136" "id": "731806be-b2cb-4706-8dd1-00bfc7daa818"
},
{
"condition": [
"Aifjxmw4QdPfU3XbR6iyis",
"j4PnSfuWRhgZDgrQScLN7e"
],
"coordinates": [
[
480,
400
],
[
550,
400
]
],
"id": "39c646bc-3655-433d-a989-aa25a4a1d3ab"
},
{
"condition": [
"Aifjxmw4QdPfU3XbR6iyis",
"j4PnSfuWRhgZDgrQScLN7e"
],
"coordinates": [
[
320,
285
],
[
360,
285
],
[
360,
400
],
[
410,
400
]
],
"id": "3fd4d742-a9b4-4d6f-ab75-9fcfed4f5104"
},
{
"condition": [
"Aifjxmw4QdPfU3XbR6iyis",
"j4PnSfuWRhgZDgrQScLN7e"
],
"coordinates": [
[
535,
175
],
[
390,
175
],
[
390,
215
],
[
325,
215
]
],
"id": "91f38257-788c-4255-99cf-f454c69a7d93"
},
{
"condition": [
"Aifjxmw4QdPfU3XbR6iyis",
"j4PnSfuWRhgZDgrQScLN7e"
],
"coordinates": [
[
580,
380
],
[
580,
305
]
],
"id": "0f9ffe1d-0b0c-4a0e-9dbf-3931ded3d050"
},
{
"coordinates": [
[
615,
400
],
[
680,
400
]
],
"id": "fbbd511d-b51c-43a3-95e7-1608f21fdb33"
},
{
"coordinates": [
[
780,
180
],
[
710,
180
],
[
710,
380
]
],
"id": "e4f7b27e-a0db-48e8-82e2-7a07f1a61dc5"
},
{
"condition": [
"NfYJEWbTXPUSUQzS83dfAn"
],
"coordinates": [
[
985,
285
],
[
880,
285
]
],
"id": "0dc28018-7282-4a43-98e6-c1dd198c93d5"
},
{
"condition": [
"NfYJEWbTXPUSUQzS83dfAn"
],
"coordinates": [
[
1015,
375
],
[
1015,
300
]
],
"id": "6ca9c0f9-d4a6-45cf-bfdd-b7f6740c4bc1"
} }
] ]
} }

View file

@ -1,14 +1,13 @@
name: CraftBeerPi name: CraftBeerPi
version: 4.0 version: 4.0
index_url: /cbpi_ui/static/index.html index_url: /cbpi_ui/static/index.html
plugins:
- cbpi4-ui
port: 8080 port: 8080
# login data # login data
username: cbpi username: cbpi
password: 123 password: 123
ws_push_all: true ws_push_all: true

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 30 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 13 KiB

View file

@ -1,81 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
<svg version="1.1" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" x="0" y="0" width="150" height="220" viewBox="0, 0, 150, 220">
<defs>
<linearGradient id="Gradient_1" gradientUnits="userSpaceOnUse" x1="3.5" y1="110.5" x2="147.5" y2="110.5">
<stop offset="0" stop-color="#323232"/>
<stop offset="0.357" stop-color="#FFFFFF"/>
<stop offset="0.571" stop-color="#919191"/>
<stop offset="1" stop-color="#4A4A4A"/>
</linearGradient>
<linearGradient id="Gradient_2" gradientUnits="userSpaceOnUse" x1="73.868" y1="3.277" x2="77.132" y2="217.723">
<stop offset="0" stop-color="#5D5D5D"/>
<stop offset="1" stop-color="#000000" stop-opacity="0.959"/>
</linearGradient>
<linearGradient id="Gradient_3" gradientUnits="userSpaceOnUse" x1="3.5" y1="101.083" x2="147.5" y2="101.083">
<stop offset="0" stop-color="#323232"/>
<stop offset="0.357" stop-color="#FFFFFF"/>
<stop offset="0.571" stop-color="#919191"/>
<stop offset="1" stop-color="#4A4A4A"/>
</linearGradient>
<linearGradient id="Gradient_4" gradientUnits="userSpaceOnUse" x1="2.75" y1="110.5" x2="148.25" y2="110.5">
<stop offset="0" stop-color="#232323"/>
<stop offset="0.357" stop-color="#5B5B5B"/>
<stop offset="0.571" stop-color="#474747"/>
<stop offset="1" stop-color="#282828"/>
</linearGradient>
<linearGradient id="Gradient_5" gradientUnits="userSpaceOnUse" x1="219.5" y1="110" x2="223.5" y2="110">
<stop offset="0" stop-color="#232323"/>
<stop offset="0.357" stop-color="#5B5B5B"/>
<stop offset="0.571" stop-color="#474747"/>
<stop offset="1" stop-color="#282828"/>
</linearGradient>
</defs>
<g id="Ebene_1" display="none">
<g display="none">
<path d="M135.5,3 C141.774,3.18 146.086,7.113 147.348,13.254 L147.5,13.254 L147.5,156.127 L111.5,185.434 C102.435,192.824 93.37,200.214 84.3,207.598 L84.3,218 L66.7,218 L66.7,207.328 C57.672,199.985 48.594,192.701 39.5,185.434 L3.5,156.127 L3.5,13.254 L3.652,13.254 C4.623,7.127 9.57,3.297 15.5,3 L135.5,3 z" fill="url(#Gradient_1)"/>
<path d="M135.5,3 C141.774,3.18 146.086,7.113 147.348,13.254 L147.5,13.254 L147.5,156.127 L111.5,185.434 C102.435,192.824 93.37,200.214 84.3,207.598 L84.3,218 L66.7,218 L66.7,207.328 C57.672,199.985 48.594,192.701 39.5,185.434 L3.5,156.127 L3.5,13.254 L3.652,13.254 C4.623,7.127 9.57,3.297 15.5,3 L135.5,3 z" fill-opacity="0" stroke="#272727" stroke-width="1"/>
</g>
</g>
<g id="Ebene_4"/>
<g id="Ebene_3">
<g display="none">
<g display="none">
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill="url(#Gradient_2)"/>
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
</g>
<path d="M75.5,189.637 C41.08,189.637 13.177,182.258 13.177,173.156 C13.177,164.053 41.08,156.674 75.5,156.674 C109.92,156.674 137.823,164.053 137.823,173.156 C137.823,182.258 109.92,189.637 75.5,189.637 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="10"/>
<path d="M75.5,189.637 C41.08,189.637 13.177,182.258 13.177,173.156 C13.177,164.053 41.08,156.674 75.5,156.674 C109.92,156.674 137.823,164.053 137.823,173.156 C137.823,182.258 109.92,189.637 75.5,189.637 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="8"/>
<path d="M75.5,177.357 C41.08,177.357 13.177,169.978 13.177,160.875 C13.177,151.772 41.08,144.393 75.5,144.393 C109.92,144.393 137.822,151.772 137.822,160.875 C137.822,169.978 109.92,177.357 75.5,177.357 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="10"/>
<path d="M75.5,177.357 C41.08,177.357 13.177,169.978 13.177,160.875 C13.177,151.772 41.08,144.393 75.5,144.393 C109.92,144.393 137.823,151.772 137.823,160.875 C137.823,169.978 109.92,177.357 75.5,177.357 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="8"/>
<path d="M75.5,165.076 C41.08,165.076 13.177,157.697 13.177,148.594 C13.177,139.492 41.08,132.113 75.5,132.113 C109.92,132.113 137.823,139.492 137.823,148.594 C137.823,157.697 109.92,165.076 75.5,165.076 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="10"/>
<path d="M75.5,165.076 C41.08,165.076 13.177,157.697 13.177,148.594 C13.177,139.492 41.08,132.113 75.5,132.113 C109.92,132.113 137.823,139.492 137.823,148.594 C137.823,157.697 109.92,165.076 75.5,165.076 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="8"/>
</g>
<g>
<path d="M2.25,159.208 C2.25,163.834 34.821,167.583 75,167.583 C115.179,167.583 147.75,163.834 147.75,159.208 L147.75,208.875 C147.75,213.5 115.179,217.25 75,217.25 C34.821,217.25 2.25,213.5 2.25,208.875 L2.25,159.208 z" fill="#3B2CD5"/>
<path d="M75,167.333 C34.821,167.333 2.25,163.584 2.25,158.958 C2.25,154.333 34.821,150.583 75,150.583 C115.179,150.583 147.75,154.333 147.75,158.958 C147.75,163.584 115.179,167.333 75,167.333 z" fill="#2193FF"/>
</g>
<path d="M75.5,20 C35.321,20 2.75,16.25 2.75,11.625 C2.75,7 35.321,3.25 75.5,3.25 C115.679,3.25 148.25,7 148.25,11.625 C148.25,16.25 115.679,20 75.5,20 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
<path d="M75.5,217.75 C35.321,217.75 2.75,214 2.75,209.375 C2.75,204.75 35.321,201 75.5,201 C115.679,201 148.25,204.75 148.25,209.375 C148.25,214 115.679,217.75 75.5,217.75 z" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
<path d="M2.75,208.604 L2.75,12.396" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
<path d="M148.25,209.375 L148.25,11.625" fill-opacity="0" stroke="#CDCDCD" stroke-width="1"/>
</g>
<g id="Ebene_2">
<g display="none">
<path d="M75.5,3.333 C115.265,3.333 147.5,14.414 147.5,28.083 L147.5,174.083 C147.5,187.752 115.264,198.833 75.5,198.833 C35.736,198.833 3.5,187.752 3.5,174.083 L3.5,28.083 C3.5,14.414 35.736,3.333 75.5,3.333 z" fill="url(#Gradient_3)"/>
<path d="M75.5,3.333 C115.265,3.333 147.5,14.414 147.5,28.083 L147.5,174.083 C147.5,187.752 115.264,198.833 75.5,198.833 C35.736,198.833 3.5,187.752 3.5,174.083 L3.5,28.083 C3.5,14.414 35.736,3.333 75.5,3.333 z" fill-opacity="0" stroke="#272727" stroke-width="1"/>
</g>
<g display="none">
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill="#919191"/>
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill-opacity="0" stroke="#000000" stroke-width="1"/>
</g>
<g>
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill="url(#Gradient_4)"/>
<path d="M2.75,3.25 L148.25,3.25 L148.25,217.75 L2.75,217.75 L2.75,3.25 z" fill-opacity="0" stroke="#000000" stroke-width="1"/>
</g>
<g>
<path d="M219.5,108.5 L223.5,108.5 L223.5,111.5 L219.5,111.5 L219.5,108.5 z" fill="url(#Gradient_5)"/>
<path d="M219.5,108.5 L223.5,108.5 L223.5,111.5 L219.5,111.5 L219.5,108.5 z" fill-opacity="0" stroke="#000000" stroke-width="1"/>
</g>
</g>
</svg>

Before

Width:  |  Height:  |  Size: 6.9 KiB

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 9.2 KiB

View file

@ -4,18 +4,20 @@
"agitator": "", "agitator": "",
"heater": "8BLRqagLicCdEBDdc77Sgr", "heater": "8BLRqagLicCdEBDdc77Sgr",
"id": "oHxKz3z5RjbsxfSz6KUgov", "id": "oHxKz3z5RjbsxfSz6KUgov",
"name": "Test1111111", "name": "MashTun",
"props": {}, "props": {},
"sensor": "", "sensor": "8ohkXvFA9UrkHLsxQL38wu",
"state": {}, "state": {
"target_temp": null, "running": false
},
"target_temp": 52,
"type": "CustomKettleLogic" "type": "CustomKettleLogic"
}, },
{ {
"agitator": "", "agitator": "",
"heater": "", "heater": "",
"id": "WxAkesrkqiHH3Gywc4fMci", "id": "WxAkesrkqiHH3Gywc4fMci",
"name": "Test", "name": "HLT",
"props": { "props": {
"Param2": "13", "Param2": "13",
"Param3": 1, "Param3": 1,
@ -25,43 +27,20 @@
"sensor": "", "sensor": "",
"state": {}, "state": {},
"target_temp": null, "target_temp": null,
"type": "CustomKettleLogic"
},
{
"agitator": "",
"heater": "8BLRqagLicCdEBDdc77Sgr",
"id": "gc9Bwp38jtyxkVWH5oYRNZ",
"name": "Test",
"props": {
"Param3": 1,
"Param5": "8BLRqagLicCdEBDdc77Sgr"
},
"sensor": "",
"state": {},
"target_temp": null,
"type": "CustomKettleLogic"
},
{
"agitator": "",
"heater": "",
"id": "ZfF2N2UnEHtgExNgZJyF5i",
"name": "Test",
"props": {},
"sensor": "",
"state": {},
"target_temp": null,
"type": "CustomKettleLogic"
},
{
"agitator": "",
"heater": "8BLRqagLicCdEBDdc77Sgr",
"id": "oTivUB7LueLeUWoZAnLhwp",
"name": "",
"props": {},
"sensor": "",
"state": {},
"target_temp": null,
"type": "" "type": ""
},
{
"agitator": "",
"heater": "NjammuygecdvMpoGYc3rXt",
"id": "a7bWex85Z9Td4atwgazpXW",
"name": "Boil",
"props": {},
"sensor": "",
"state": {
"running": false
},
"target_temp": 55,
"type": "CustomKettleLogic"
} }
] ]
} }

View file

@ -2,10 +2,10 @@
"data": [ "data": [
{ {
"id": "8ohkXvFA9UrkHLsxQL38wu", "id": "8ohkXvFA9UrkHLsxQL38wu",
"name": "Test1112222", "name": "Sensor1",
"props": {}, "props": {},
"state": { "state": {
"value": 49 "value": 0
}, },
"type": "CustomSensor" "type": "CustomSensor"
} }

View file

@ -1,15 +1,43 @@
{ {
"basic": { "basic": {
"name": "" "name": "PALE ALE"
}, },
"profile": [ "profile": [
{ {
"id": "6mdUtsrBaWeDvKgUXJiLqu", "id": "T2y34Mbex9KjNWXhzfCRby",
"name": "Test", "name": "MashIn",
"props": { "props": {
"Param1": 123, "Param1": 123,
"Param2": "HALLO", "Param2": "HALLO",
"Param3": 1 "Param3": 1,
"count": 1,
"wohoo": 0
},
"status": "P",
"type": "CustomStep2"
},
{
"id": "RjS8Zb2GGpUtNsqHsES3yF",
"name": "Step2",
"props": {
"Param1": 123,
"Param2": "HALLO",
"Param3": 1,
"count": 0,
"wohoo": 0
},
"status": "I",
"type": "CustomStep2"
},
{
"id": "WkZG4fDNxZdtZ7uoTsSHhR",
"name": "Mash Step 1",
"props": {
"Param1": 123,
"Param2": "HALLO",
"Param3": 1,
"count": 0,
"wohoo": 0
}, },
"status": "I", "status": "I",
"type": "CustomStep2" "type": "CustomStep2"

View file

@ -28,9 +28,10 @@ setup(name='cbpi',
"voluptuous==0.12.1", "voluptuous==0.12.1",
"pyfiglet==0.8.post1", "pyfiglet==0.8.post1",
'pandas==1.1.5', 'pandas==1.1.5',
'click==7.1.2',
'shortuuid==1.0.1', 'shortuuid==1.0.1',
'tabulate==0.8.7', 'tabulate==0.8.7',
'cbpi4-ui==0.0.2', 'cbpi4-ui==0.0.3',
], ],
dependency_links=[ dependency_links=[
'https://testpypi.python.org/pypi' 'https://testpypi.python.org/pypi'

BIN
temp.zip Normal file

Binary file not shown.

View file

@ -1,2 +1,3 @@
/Users/manuelfritsch/Documents/git/cbpi4-ui-plugin /Users/manuelfritsch/Documents/git/cbpi4-ui-plugin
/Users/manuelfritsch/Documents/git/cbpi4-ui /Users/manuelfritsch/Documents/git/cbpi4-ui
/Users/manuelfritsch/Documents/git/myplugin/plugin1

View file

@ -1,56 +0,0 @@
About the Copyright Holders
===========================
* Copyright (c) 2008-2011 AQR Capital Management, LLC
AQR Capital Management began pandas development in 2008. Development was
led by Wes McKinney. AQR released the source under this license in 2009.
* Copyright (c) 2011-2012, Lambda Foundry, Inc.
Wes is now an employee of Lambda Foundry, and remains the pandas project
lead.
* Copyright (c) 2011-2012, PyData Development Team
The PyData Development Team is the collection of developers of the PyData
project. This includes all of the PyData sub-projects, including pandas. The
core team that coordinates development on GitHub can be found here:
https://github.com/pydata.
Full credits for pandas contributors can be found in the documentation.
Our Copyright Policy
====================
PyData uses a shared copyright model. Each contributor maintains copyright
over their contributions to PyData. However, it is important to note that
these contributions are typically only changes to the repositories. Thus,
the PyData source code, in its entirety, is not the copyright of any single
person or institution. Instead, it is the collective copyright of the
entire PyData Development Team. If individual contributors want to maintain
a record of what changes/contributions they have specific copyright on,
they should indicate their copyright in the commit message of the change
when they commit the change to one of the PyData repositories.
With this in mind, the following banner should be used in any source code
file to indicate the copyright and license terms:
```
#-----------------------------------------------------------------------------
# Copyright (c) 2012, PyData Development Team
# All rights reserved.
#
# Distributed under the terms of the BSD Simplified License.
#
# The full license is in the LICENSE file, distributed with this software.
#-----------------------------------------------------------------------------
```
Other licenses can be found in the LICENSES directory.
License
=======
pandas is distributed under a 3-clause ("Simplified" or "New") BSD
license. Parts of NumPy, SciPy, numpydoc, bottleneck, which all have
BSD-compatible licenses, are included. Their licenses follow the pandas
license.

View file

@ -1,31 +0,0 @@
BSD 3-Clause License
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
All rights reserved.
Copyright (c) 2011-2020, Open source contributors.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
* Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View file

@ -1,95 +0,0 @@
Metadata-Version: 2.1
Name: pandas
Version: 1.2.0
Summary: Powerful data structures for data analysis, time series, and statistics
Home-page: https://pandas.pydata.org
Maintainer: The PyData Development Team
Maintainer-email: pydata@googlegroups.com
License: BSD
Project-URL: Bug Tracker, https://github.com/pandas-dev/pandas/issues
Project-URL: Documentation, https://pandas.pydata.org/pandas-docs/stable/
Project-URL: Source Code, https://github.com/pandas-dev/pandas
Platform: any
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Operating System :: OS Independent
Classifier: Intended Audience :: Science/Research
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.7
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Cython
Classifier: Topic :: Scientific/Engineering
Requires-Python: >=3.7.1
Requires-Dist: python-dateutil (>=2.7.3)
Requires-Dist: pytz (>=2017.3)
Requires-Dist: numpy (>=1.16.5)
Provides-Extra: test
Requires-Dist: pytest (>=5.0.1) ; extra == 'test'
Requires-Dist: pytest-xdist ; extra == 'test'
Requires-Dist: hypothesis (>=3.58) ; extra == 'test'
**pandas** is a Python package that provides fast, flexible, and expressive data
structures designed to make working with structured (tabular, multidimensional,
potentially heterogeneous) and time series data both easy and intuitive. It
aims to be the fundamental high-level building block for doing practical,
**real world** data analysis in Python. Additionally, it has the broader goal
of becoming **the most powerful and flexible open source data analysis /
manipulation tool available in any language**. It is already well on its way
toward this goal.
pandas is well suited for many different kinds of data:
- Tabular data with heterogeneously-typed columns, as in an SQL table or
Excel spreadsheet
- Ordered and unordered (not necessarily fixed-frequency) time series data.
- Arbitrary matrix data (homogeneously typed or heterogeneous) with row and
column labels
- Any other form of observational / statistical data sets. The data actually
need not be labeled at all to be placed into a pandas data structure
The two primary data structures of pandas, Series (1-dimensional) and DataFrame
(2-dimensional), handle the vast majority of typical use cases in finance,
statistics, social science, and many areas of engineering. For R users,
DataFrame provides everything that R's ``data.frame`` provides and much
more. pandas is built on top of `NumPy <https://www.numpy.org>`__ and is
intended to integrate well within a scientific computing environment with many
other 3rd party libraries.
Here are just a few of the things that pandas does well:
- Easy handling of **missing data** (represented as NaN) in floating point as
well as non-floating point data
- Size mutability: columns can be **inserted and deleted** from DataFrame and
higher dimensional objects
- Automatic and explicit **data alignment**: objects can be explicitly
aligned to a set of labels, or the user can simply ignore the labels and
let `Series`, `DataFrame`, etc. automatically align the data for you in
computations
- Powerful, flexible **group by** functionality to perform
split-apply-combine operations on data sets, for both aggregating and
transforming data
- Make it **easy to convert** ragged, differently-indexed data in other
Python and NumPy data structures into DataFrame objects
- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting**
of large data sets
- Intuitive **merging** and **joining** data sets
- Flexible **reshaping** and pivoting of data sets
- **Hierarchical** labeling of axes (possible to have multiple labels per
tick)
- Robust IO tools for loading data from **flat files** (CSV and delimited),
Excel files, databases, and saving / loading data from the ultrafast **HDF5
format**
- **Time series**-specific functionality: date range generation and frequency
conversion, moving window statistics, date shifting and lagging.
Many of these principles are here to address the shortcomings frequently
experienced using other languages / scientific research environments. For data
scientists, working with data is typically divided into multiple stages:
munging and cleaning data, analyzing / modeling it, then organizing the results
of the analysis into a form suitable for plotting or tabular display. pandas is
the ideal tool for all of these tasks.

File diff suppressed because it is too large Load diff

View file

@ -1,5 +0,0 @@
Wheel-Version: 1.0
Generator: bdist_wheel (0.36.2)
Root-Is-Purelib: false
Tag: cp38-cp38-macosx_10_9_x86_64

View file

@ -1,3 +0,0 @@
[pandas_plotting_backends]
matplotlib = pandas:plotting._matplotlib

View file

@ -20,9 +20,10 @@ del hard_dependencies, dependency, missing_dependencies
# numpy compat # numpy compat
from pandas.compat.numpy import ( from pandas.compat.numpy import (
np_version_under1p17 as _np_version_under1p17, _np_version_under1p16,
np_version_under1p18 as _np_version_under1p18, _np_version_under1p17,
is_numpy_dev as _is_numpy_dev, _np_version_under1p18,
_is_numpy_dev,
) )
try: try:
@ -33,7 +34,7 @@ except ImportError as e: # pragma: no cover
raise ImportError( raise ImportError(
f"C extension: {module} not built. If you want to import " f"C extension: {module} not built. If you want to import "
"pandas from the source directory, you may need to run " "pandas from the source directory, you may need to run "
"'python setup.py build_ext --force' to build the C extensions first." "'python setup.py build_ext --inplace --force' to build the C extensions first."
) from e ) from e
from pandas._config import ( from pandas._config import (
@ -58,8 +59,6 @@ from pandas.core.api import (
UInt16Dtype, UInt16Dtype,
UInt32Dtype, UInt32Dtype,
UInt64Dtype, UInt64Dtype,
Float32Dtype,
Float64Dtype,
CategoricalDtype, CategoricalDtype,
PeriodDtype, PeriodDtype,
IntervalDtype, IntervalDtype,
@ -102,7 +101,6 @@ from pandas.core.api import (
to_datetime, to_datetime,
to_timedelta, to_timedelta,
# misc # misc
Flags,
Grouper, Grouper,
factorize, factorize,
unique, unique,
@ -187,61 +185,181 @@ __version__ = v.get("closest-tag", v["version"])
__git_version__ = v.get("full-revisionid") __git_version__ = v.get("full-revisionid")
del get_versions, v del get_versions, v
# GH 27101 # GH 27101
def __getattr__(name): # TODO: remove Panel compat in 1.0
import warnings if pandas.compat.PY37:
if name == "datetime": def __getattr__(name):
warnings.warn( import warnings
"The pandas.datetime class is deprecated "
"and will be removed from pandas in a future version. " if name == "Panel":
"Import from datetime module instead.",
FutureWarning, warnings.warn(
stacklevel=2, "The Panel class is removed from pandas. Accessing it "
) "from the top-level namespace will also be removed in the next version",
FutureWarning,
stacklevel=2,
)
class Panel:
pass
return Panel
elif name == "datetime":
warnings.warn(
"The pandas.datetime class is deprecated "
"and will be removed from pandas in a future version. "
"Import from datetime module instead.",
FutureWarning,
stacklevel=2,
)
from datetime import datetime as dt
return dt
elif name == "np":
warnings.warn(
"The pandas.np module is deprecated "
"and will be removed from pandas in a future version. "
"Import numpy directly instead",
FutureWarning,
stacklevel=2,
)
import numpy as np
return np
elif name in {"SparseSeries", "SparseDataFrame"}:
warnings.warn(
f"The {name} class is removed from pandas. Accessing it from "
"the top-level namespace will also be removed in the next version",
FutureWarning,
stacklevel=2,
)
return type(name, (), {})
elif name == "SparseArray":
warnings.warn(
"The pandas.SparseArray class is deprecated "
"and will be removed from pandas in a future version. "
"Use pandas.arrays.SparseArray instead.",
FutureWarning,
stacklevel=2,
)
from pandas.core.arrays.sparse import SparseArray as _SparseArray
return _SparseArray
raise AttributeError(f"module 'pandas' has no attribute '{name}'")
else:
class Panel:
pass
class SparseDataFrame:
pass
class SparseSeries:
pass
class __numpy:
def __init__(self):
import numpy as np
import warnings
self.np = np
self.warnings = warnings
def __getattr__(self, item):
self.warnings.warn(
"The pandas.np module is deprecated "
"and will be removed from pandas in a future version. "
"Import numpy directly instead",
FutureWarning,
stacklevel=2,
)
try:
return getattr(self.np, item)
except AttributeError as err:
raise AttributeError(f"module numpy has no attribute {item}") from err
np = __numpy()
class __Datetime(type):
from datetime import datetime as dt from datetime import datetime as dt
return dt datetime = dt
elif name == "np": def __getattr__(cls, item):
cls.emit_warning()
warnings.warn( try:
"The pandas.np module is deprecated " return getattr(cls.datetime, item)
"and will be removed from pandas in a future version. " except AttributeError as err:
"Import numpy directly instead", raise AttributeError(
FutureWarning, f"module datetime has no attribute {item}"
stacklevel=2, ) from err
)
import numpy as np
return np def __instancecheck__(cls, other):
return isinstance(other, cls.datetime)
elif name in {"SparseSeries", "SparseDataFrame"}: class __DatetimeSub(metaclass=__Datetime):
warnings.warn( def emit_warning(dummy=0):
f"The {name} class is removed from pandas. Accessing it from " import warnings
"the top-level namespace will also be removed in the next version",
FutureWarning,
stacklevel=2,
)
return type(name, (), {}) warnings.warn(
"The pandas.datetime class is deprecated "
"and will be removed from pandas in a future version. "
"Import from datetime instead.",
FutureWarning,
stacklevel=3,
)
elif name == "SparseArray": def __new__(cls, *args, **kwargs):
cls.emit_warning()
from datetime import datetime as dt
warnings.warn( return dt(*args, **kwargs)
"The pandas.SparseArray class is deprecated "
"and will be removed from pandas in a future version. "
"Use pandas.arrays.SparseArray instead.",
FutureWarning,
stacklevel=2,
)
from pandas.core.arrays.sparse import SparseArray as _SparseArray
return _SparseArray datetime = __DatetimeSub
raise AttributeError(f"module 'pandas' has no attribute '{name}'") class __SparseArray(type):
from pandas.core.arrays.sparse import SparseArray as sa
SparseArray = sa
def __instancecheck__(cls, other):
return isinstance(other, cls.SparseArray)
class __SparseArraySub(metaclass=__SparseArray):
def emit_warning(dummy=0):
import warnings
warnings.warn(
"The pandas.SparseArray class is deprecated "
"and will be removed from pandas in a future version. "
"Use pandas.arrays.SparseArray instead.",
FutureWarning,
stacklevel=3,
)
def __new__(cls, *args, **kwargs):
cls.emit_warning()
from pandas.core.arrays.sparse import SparseArray as sa
return sa(*args, **kwargs)
SparseArray = __SparseArraySub
# module level doc-string # module level doc-string

View file

@ -392,7 +392,7 @@ class option_context(ContextDecorator):
""" """
def __init__(self, *args): def __init__(self, *args):
if len(args) % 2 != 0 or len(args) < 2: if not (len(args) % 2 == 0 and len(args) >= 2):
raise ValueError( raise ValueError(
"Need to invoke as option_context(pat, val, [(pat, val), ...])." "Need to invoke as option_context(pat, val, [(pat, val), ...])."
) )
@ -460,7 +460,9 @@ def register_option(
path = key.split(".") path = key.split(".")
for k in path: for k in path:
if not re.match("^" + tokenize.Name + "$", k): # NOTE: tokenize.Name is not a public constant
# error: Module has no attribute "Name" [attr-defined]
if not re.match("^" + tokenize.Name + "$", k): # type: ignore
raise ValueError(f"{k} is not a valid identifier") raise ValueError(f"{k} is not a valid identifier")
if keyword.iskeyword(k): if keyword.iskeyword(k):
raise ValueError(f"{k} is a python keyword") raise ValueError(f"{k} is a python keyword")
@ -648,7 +650,7 @@ def _build_option_description(k: str) -> str:
s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]" s += f"\n [default: {o.defval}] [currently: {_get_option(k, True)}]"
if d: if d:
rkey = d.rkey or "" rkey = d.rkey if d.rkey else ""
s += "\n (Deprecated" s += "\n (Deprecated"
s += f", use `{rkey}` instead." s += f", use `{rkey}` instead."
s += ")" s += ")"

View file

@ -22,7 +22,7 @@ def detect_console_encoding() -> str:
encoding = None encoding = None
try: try:
encoding = sys.stdout.encoding or sys.stdin.encoding encoding = sys.stdout.encoding or sys.stdin.encoding
except (AttributeError, OSError): except (AttributeError, IOError):
pass pass
# try again for something better # try again for something better

View file

@ -88,18 +88,17 @@ def _valid_locales(locales, normalize):
valid_locales : list valid_locales : list
A list of valid locales. A list of valid locales.
""" """
return [ if normalize:
loc normalizer = lambda x: locale.normalize(x.strip())
for loc in ( else:
locale.normalize(loc.strip()) if normalize else loc.strip() normalizer = lambda x: x.strip()
for loc in locales
) return list(filter(can_set_locale, map(normalizer, locales)))
if can_set_locale(loc)
]
def _default_locale_getter(): def _default_locale_getter():
return subprocess.check_output(["locale -a"], shell=True) raw_locales = subprocess.check_output(["locale -a"], shell=True)
return raw_locales
def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_getter): def get_locales(prefix=None, normalize=True, locale_getter=_default_locale_getter):

View file

@ -6,7 +6,6 @@ from functools import wraps
import gzip import gzip
import operator import operator
import os import os
import re
from shutil import rmtree from shutil import rmtree
import string import string
import tempfile import tempfile
@ -26,7 +25,7 @@ from pandas._config.localization import ( # noqa:F401
from pandas._libs.lib import no_default from pandas._libs.lib import no_default
import pandas._libs.testing as _testing import pandas._libs.testing as _testing
from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeries from pandas._typing import Dtype, FilePathOrBuffer, FrameOrSeries
from pandas.compat import get_lzma_file, import_lzma from pandas.compat import _get_lzma_file, _import_lzma
from pandas.core.dtypes.common import ( from pandas.core.dtypes.common import (
is_bool, is_bool,
@ -71,7 +70,7 @@ from pandas.core.arrays.datetimelike import DatetimeLikeArrayMixin
from pandas.io.common import urlopen from pandas.io.common import urlopen
from pandas.io.formats.printing import pprint_thing from pandas.io.formats.printing import pprint_thing
lzma = import_lzma() lzma = _import_lzma()
_N = 30 _N = 30
_K = 4 _K = 4
@ -85,7 +84,6 @@ ALL_INT_DTYPES = UNSIGNED_INT_DTYPES + SIGNED_INT_DTYPES
ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES ALL_EA_INT_DTYPES = UNSIGNED_EA_INT_DTYPES + SIGNED_EA_INT_DTYPES
FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"] FLOAT_DTYPES: List[Dtype] = [float, "float32", "float64"]
FLOAT_EA_DTYPES: List[Dtype] = ["Float32", "Float64"]
COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"] COMPLEX_DTYPES: List[Dtype] = [complex, "complex64", "complex128"]
STRING_DTYPES: List[Dtype] = [str, "str", "U"] STRING_DTYPES: List[Dtype] = [str, "str", "U"]
@ -108,8 +106,6 @@ ALL_NUMPY_DTYPES = (
+ BYTES_DTYPES + BYTES_DTYPES
) )
NULL_OBJECTS = [None, np.nan, pd.NaT, float("nan"), pd.NA]
# set testing_mode # set testing_mode
_testing_mode_warnings = (DeprecationWarning, ResourceWarning) _testing_mode_warnings = (DeprecationWarning, ResourceWarning)
@ -119,24 +115,14 @@ def set_testing_mode():
# set the testing mode filters # set the testing mode filters
testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")
if "deprecate" in testing_mode: if "deprecate" in testing_mode:
# pandas\_testing.py:119: error: Argument 2 to "simplefilter" has warnings.simplefilter("always", _testing_mode_warnings)
# incompatible type "Tuple[Type[DeprecationWarning],
# Type[ResourceWarning]]"; expected "Type[Warning]"
warnings.simplefilter(
"always", _testing_mode_warnings # type: ignore[arg-type]
)
def reset_testing_mode(): def reset_testing_mode():
# reset the testing mode filters # reset the testing mode filters
testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None") testing_mode = os.environ.get("PANDAS_TESTING_MODE", "None")
if "deprecate" in testing_mode: if "deprecate" in testing_mode:
# pandas\_testing.py:126: error: Argument 2 to "simplefilter" has warnings.simplefilter("ignore", _testing_mode_warnings)
# incompatible type "Tuple[Type[DeprecationWarning],
# Type[ResourceWarning]]"; expected "Type[Warning]"
warnings.simplefilter(
"ignore", _testing_mode_warnings # type: ignore[arg-type]
)
set_testing_mode() set_testing_mode()
@ -253,22 +239,16 @@ def decompress_file(path, compression):
if compression is None: if compression is None:
f = open(path, "rb") f = open(path, "rb")
elif compression == "gzip": elif compression == "gzip":
# pandas\_testing.py:243: error: Incompatible types in assignment f = gzip.open(path, "rb")
# (expression has type "IO[Any]", variable has type "BinaryIO")
f = gzip.open(path, "rb") # type: ignore[assignment]
elif compression == "bz2": elif compression == "bz2":
# pandas\_testing.py:245: error: Incompatible types in assignment f = bz2.BZ2File(path, "rb")
# (expression has type "BZ2File", variable has type "BinaryIO")
f = bz2.BZ2File(path, "rb") # type: ignore[assignment]
elif compression == "xz": elif compression == "xz":
f = get_lzma_file(lzma)(path, "rb") f = _get_lzma_file(lzma)(path, "rb")
elif compression == "zip": elif compression == "zip":
zip_file = zipfile.ZipFile(path) zip_file = zipfile.ZipFile(path)
zip_names = zip_file.namelist() zip_names = zip_file.namelist()
if len(zip_names) == 1: if len(zip_names) == 1:
# pandas\_testing.py:252: error: Incompatible types in assignment f = zip_file.open(zip_names.pop())
# (expression has type "IO[bytes]", variable has type "BinaryIO")
f = zip_file.open(zip_names.pop()) # type: ignore[assignment]
else: else:
raise ValueError(f"ZIP file {path} error. Only one file per ZIP.") raise ValueError(f"ZIP file {path} error. Only one file per ZIP.")
else: else:
@ -304,17 +284,11 @@ def write_to_compressed(compression, path, data, dest="test"):
if compression == "zip": if compression == "zip":
compress_method = zipfile.ZipFile compress_method = zipfile.ZipFile
elif compression == "gzip": elif compression == "gzip":
# pandas\_testing.py:288: error: Incompatible types in assignment compress_method = gzip.GzipFile
# (expression has type "Type[GzipFile]", variable has type
# "Type[ZipFile]")
compress_method = gzip.GzipFile # type: ignore[assignment]
elif compression == "bz2": elif compression == "bz2":
# pandas\_testing.py:290: error: Incompatible types in assignment compress_method = bz2.BZ2File
# (expression has type "Type[BZ2File]", variable has type
# "Type[ZipFile]")
compress_method = bz2.BZ2File # type: ignore[assignment]
elif compression == "xz": elif compression == "xz":
compress_method = get_lzma_file(lzma) compress_method = _get_lzma_file(lzma)
else: else:
raise ValueError(f"Unrecognized compression type: {compression}") raise ValueError(f"Unrecognized compression type: {compression}")
@ -324,10 +298,7 @@ def write_to_compressed(compression, path, data, dest="test"):
method = "writestr" method = "writestr"
else: else:
mode = "wb" mode = "wb"
# pandas\_testing.py:302: error: Incompatible types in assignment args = (data,)
# (expression has type "Tuple[Any]", variable has type "Tuple[Any,
# Any]")
args = (data,) # type: ignore[assignment]
method = "write" method = "write"
with compress_method(path, mode=mode) as f: with compress_method(path, mode=mode) as f:
@ -694,7 +665,6 @@ def assert_index_equal(
check_less_precise: Union[bool, int] = no_default, check_less_precise: Union[bool, int] = no_default,
check_exact: bool = True, check_exact: bool = True,
check_categorical: bool = True, check_categorical: bool = True,
check_order: bool = True,
rtol: float = 1.0e-5, rtol: float = 1.0e-5,
atol: float = 1.0e-8, atol: float = 1.0e-8,
obj: str = "Index", obj: str = "Index",
@ -724,12 +694,6 @@ def assert_index_equal(
Whether to compare number exactly. Whether to compare number exactly.
check_categorical : bool, default True check_categorical : bool, default True
Whether to compare internal Categorical exactly. Whether to compare internal Categorical exactly.
check_order : bool, default True
Whether to compare the order of index entries as well as their values.
If True, both indexes must contain the same elements, in the same order.
If False, both indexes must contain the same elements, but in any order.
.. versionadded:: 1.2.0
rtol : float, default 1e-5 rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False. Relative tolerance. Only used when check_exact is False.
@ -741,36 +705,30 @@ def assert_index_equal(
obj : str, default 'Index' obj : str, default 'Index'
Specify object name being compared, internally used to show appropriate Specify object name being compared, internally used to show appropriate
assertion message. assertion message.
Examples
--------
>>> from pandas.testing import assert_index_equal
>>> a = pd.Index([1, 2, 3])
>>> b = pd.Index([1, 2, 3])
>>> assert_index_equal(a, b)
""" """
__tracebackhide__ = True __tracebackhide__ = True
def _check_types(left, right, obj="Index"): def _check_types(l, r, obj="Index"):
if exact: if exact:
assert_class_equal(left, right, exact=exact, obj=obj) assert_class_equal(l, r, exact=exact, obj=obj)
# Skip exact dtype checking when `check_categorical` is False # Skip exact dtype checking when `check_categorical` is False
if check_categorical: if check_categorical:
assert_attr_equal("dtype", left, right, obj=obj) assert_attr_equal("dtype", l, r, obj=obj)
# allow string-like to have different inferred_types # allow string-like to have different inferred_types
if left.inferred_type in ("string"): if l.inferred_type in ("string"):
assert right.inferred_type in ("string") assert r.inferred_type in ("string")
else: else:
assert_attr_equal("inferred_type", left, right, obj=obj) assert_attr_equal("inferred_type", l, r, obj=obj)
def _get_ilevel_values(index, level): def _get_ilevel_values(index, level):
# accept level number only # accept level number only
unique = index.levels[level] unique = index.levels[level]
level_codes = index.codes[level] level_codes = index.codes[level]
filled = take_1d(unique._values, level_codes, fill_value=unique._na_value) filled = take_1d(unique._values, level_codes, fill_value=unique._na_value)
return unique._shallow_copy(filled, name=index.names[level]) values = unique._shallow_copy(filled, name=index.names[level])
return values
if check_less_precise is not no_default: if check_less_precise is not no_default:
warnings.warn( warnings.warn(
@ -802,11 +760,6 @@ def assert_index_equal(
msg3 = f"{len(right)}, {right}" msg3 = f"{len(right)}, {right}"
raise_assert_detail(obj, msg1, msg2, msg3) raise_assert_detail(obj, msg1, msg2, msg3)
# If order doesn't matter then sort the index entries
if not check_order:
left = left.sort_values()
right = right.sort_values()
# MultiIndex special comparison for little-friendly error messages # MultiIndex special comparison for little-friendly error messages
if left.nlevels > 1: if left.nlevels > 1:
left = cast(MultiIndex, left) left = cast(MultiIndex, left)
@ -986,7 +939,7 @@ def assert_categorical_equal(
if check_category_order: if check_category_order:
assert_index_equal(left.categories, right.categories, obj=f"{obj}.categories") assert_index_equal(left.categories, right.categories, obj=f"{obj}.categories")
assert_numpy_array_equal( assert_numpy_array_equal(
left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes" left.codes, right.codes, check_dtype=check_dtype, obj=f"{obj}.codes",
) )
else: else:
try: try:
@ -995,7 +948,9 @@ def assert_categorical_equal(
except TypeError: except TypeError:
# e.g. '<' not supported between instances of 'int' and 'str' # e.g. '<' not supported between instances of 'int' and 'str'
lc, rc = left.categories, right.categories lc, rc = left.categories, right.categories
assert_index_equal(lc, rc, obj=f"{obj}.categories") assert_index_equal(
lc, rc, obj=f"{obj}.categories",
)
assert_index_equal( assert_index_equal(
left.categories.take(left.codes), left.categories.take(left.codes),
right.categories.take(right.codes), right.categories.take(right.codes),
@ -1023,14 +978,8 @@ def assert_interval_array_equal(left, right, exact="equiv", obj="IntervalArray")
""" """
_check_isinstance(left, right, IntervalArray) _check_isinstance(left, right, IntervalArray)
kwargs = {} assert_index_equal(left.left, right.left, exact=exact, obj=f"{obj}.left")
if left._left.dtype.kind in ["m", "M"]: assert_index_equal(left.right, right.right, exact=exact, obj=f"{obj}.left")
# We have a DatetimeArray or TimedeltaArray
kwargs["check_freq"] = False
assert_equal(left._left, right._left, obj=f"{obj}.left", **kwargs)
assert_equal(left._right, right._right, obj=f"{obj}.left", **kwargs)
assert_attr_equal("closed", left, right, obj=obj) assert_attr_equal("closed", left, right, obj=obj)
@ -1041,22 +990,20 @@ def assert_period_array_equal(left, right, obj="PeriodArray"):
assert_attr_equal("freq", left, right, obj=obj) assert_attr_equal("freq", left, right, obj=obj)
def assert_datetime_array_equal(left, right, obj="DatetimeArray", check_freq=True): def assert_datetime_array_equal(left, right, obj="DatetimeArray"):
__tracebackhide__ = True __tracebackhide__ = True
_check_isinstance(left, right, DatetimeArray) _check_isinstance(left, right, DatetimeArray)
assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
if check_freq: assert_attr_equal("freq", left, right, obj=obj)
assert_attr_equal("freq", left, right, obj=obj)
assert_attr_equal("tz", left, right, obj=obj) assert_attr_equal("tz", left, right, obj=obj)
def assert_timedelta_array_equal(left, right, obj="TimedeltaArray", check_freq=True): def assert_timedelta_array_equal(left, right, obj="TimedeltaArray"):
__tracebackhide__ = True __tracebackhide__ = True
_check_isinstance(left, right, TimedeltaArray) _check_isinstance(left, right, TimedeltaArray)
assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data") assert_numpy_array_equal(left._data, right._data, obj=f"{obj}._data")
if check_freq: assert_attr_equal("freq", left, right, obj=obj)
assert_attr_equal("freq", left, right, obj=obj)
def raise_assert_detail(obj, message, left, right, diff=None, index_values=None): def raise_assert_detail(obj, message, left, right, diff=None, index_values=None):
@ -1145,13 +1092,13 @@ def assert_numpy_array_equal(
if err_msg is None: if err_msg is None:
if left.shape != right.shape: if left.shape != right.shape:
raise_assert_detail( raise_assert_detail(
obj, f"{obj} shapes are different", left.shape, right.shape obj, f"{obj} shapes are different", left.shape, right.shape,
) )
diff = 0 diff = 0
for left_arr, right_arr in zip(left, right): for l, r in zip(left, right):
# count up differences # count up differences
if not array_equivalent(left_arr, right_arr, strict_nan=strict_nan): if not array_equivalent(l, r, strict_nan=strict_nan):
diff += 1 diff += 1
diff = diff * 100.0 / left.size diff = diff * 100.0 / left.size
@ -1214,13 +1161,6 @@ def assert_extension_array_equal(
Missing values are checked separately from valid values. Missing values are checked separately from valid values.
A mask of missing values is computed for each and checked to match. A mask of missing values is computed for each and checked to match.
The remaining all-valid values are cast to object dtype and checked. The remaining all-valid values are cast to object dtype and checked.
Examples
--------
>>> from pandas.testing import assert_extension_array_equal
>>> a = pd.Series([1, 2, 3, 4])
>>> b, c = a.array, a.array
>>> assert_extension_array_equal(b, c)
""" """
if check_less_precise is not no_default: if check_less_precise is not no_default:
warnings.warn( warnings.warn(
@ -1287,7 +1227,6 @@ def assert_series_equal(
check_categorical=True, check_categorical=True,
check_category_order=True, check_category_order=True,
check_freq=True, check_freq=True,
check_flags=True,
rtol=1.0e-5, rtol=1.0e-5,
atol=1.0e-8, atol=1.0e-8,
obj="Series", obj="Series",
@ -1334,11 +1273,6 @@ def assert_series_equal(
.. versionadded:: 1.0.2 .. versionadded:: 1.0.2
check_freq : bool, default True check_freq : bool, default True
Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex. Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
check_flags : bool, default True
Whether to check the `flags` attribute.
.. versionadded:: 1.2.0
rtol : float, default 1e-5 rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False. Relative tolerance. Only used when check_exact is False.
@ -1350,13 +1284,6 @@ def assert_series_equal(
obj : str, default 'Series' obj : str, default 'Series'
Specify object name being compared, internally used to show appropriate Specify object name being compared, internally used to show appropriate
assertion message. assertion message.
Examples
--------
>>> from pandas.testing import assert_series_equal
>>> a = pd.Series([1, 2, 3, 4])
>>> b = pd.Series([1, 2, 3, 4])
>>> assert_series_equal(a, b)
""" """
__tracebackhide__ = True __tracebackhide__ = True
@ -1382,9 +1309,6 @@ def assert_series_equal(
msg2 = f"{len(right)}, {right.index}" msg2 = f"{len(right)}, {right.index}"
raise_assert_detail(obj, "Series length are different", msg1, msg2) raise_assert_detail(obj, "Series length are different", msg1, msg2)
if check_flags:
assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}"
# index comparison # index comparison
assert_index_equal( assert_index_equal(
left.index, left.index,
@ -1458,16 +1382,7 @@ def assert_series_equal(
check_dtype=check_dtype, check_dtype=check_dtype,
index_values=np.asarray(left.index), index_values=np.asarray(left.index),
) )
elif is_extension_array_dtype_and_needs_i8_conversion( elif needs_i8_conversion(left.dtype) or needs_i8_conversion(right.dtype):
left.dtype, right.dtype
) or is_extension_array_dtype_and_needs_i8_conversion(right.dtype, left.dtype):
assert_extension_array_equal(
left._values,
right._values,
check_dtype=check_dtype,
index_values=np.asarray(left.index),
)
elif needs_i8_conversion(left.dtype) and needs_i8_conversion(right.dtype):
# DatetimeArray or TimedeltaArray # DatetimeArray or TimedeltaArray
assert_extension_array_equal( assert_extension_array_equal(
left._values, left._values,
@ -1516,7 +1431,6 @@ def assert_frame_equal(
check_categorical=True, check_categorical=True,
check_like=False, check_like=False,
check_freq=True, check_freq=True,
check_flags=True,
rtol=1.0e-5, rtol=1.0e-5,
atol=1.0e-8, atol=1.0e-8,
obj="DataFrame", obj="DataFrame",
@ -1578,8 +1492,6 @@ def assert_frame_equal(
(same as in columns) - same labels must be with the same data. (same as in columns) - same labels must be with the same data.
check_freq : bool, default True check_freq : bool, default True
Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex. Whether to check the `freq` attribute on a DatetimeIndex or TimedeltaIndex.
check_flags : bool, default True
Whether to check the `flags` attribute.
rtol : float, default 1e-5 rtol : float, default 1e-5
Relative tolerance. Only used when check_exact is False. Relative tolerance. Only used when check_exact is False.
@ -1647,11 +1559,11 @@ def assert_frame_equal(
# shape comparison # shape comparison
if left.shape != right.shape: if left.shape != right.shape:
raise_assert_detail( raise_assert_detail(
obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}" obj, f"{obj} shape mismatch", f"{repr(left.shape)}", f"{repr(right.shape)}",
) )
if check_flags: if check_like:
assert left.flags == right.flags, f"{repr(left.flags)} != {repr(right.flags)}" left, right = left.reindex_like(right), right
# index comparison # index comparison
assert_index_equal( assert_index_equal(
@ -1661,7 +1573,6 @@ def assert_frame_equal(
check_names=check_names, check_names=check_names,
check_exact=check_exact, check_exact=check_exact,
check_categorical=check_categorical, check_categorical=check_categorical,
check_order=not check_like,
rtol=rtol, rtol=rtol,
atol=atol, atol=atol,
obj=f"{obj}.index", obj=f"{obj}.index",
@ -1675,15 +1586,11 @@ def assert_frame_equal(
check_names=check_names, check_names=check_names,
check_exact=check_exact, check_exact=check_exact,
check_categorical=check_categorical, check_categorical=check_categorical,
check_order=not check_like,
rtol=rtol, rtol=rtol,
atol=atol, atol=atol,
obj=f"{obj}.columns", obj=f"{obj}.columns",
) )
if check_like:
left, right = left.reindex_like(right), right
# compare by blocks # compare by blocks
if by_blocks: if by_blocks:
rblocks = right._to_dict_of_blocks() rblocks = right._to_dict_of_blocks()
@ -1779,7 +1686,7 @@ def box_expected(expected, box_cls, transpose=True):
elif box_cls is pd.DataFrame: elif box_cls is pd.DataFrame:
expected = pd.Series(expected).to_frame() expected = pd.Series(expected).to_frame()
if transpose: if transpose:
# for vector operations, we need a DataFrame to be a single-row, # for vector operations, we we need a DataFrame to be a single-row,
# not a single-column, in order to operate against non-DataFrame # not a single-column, in order to operate against non-DataFrame
# vectors of the same length. # vectors of the same length.
expected = expected.T expected = expected.T
@ -1877,20 +1784,6 @@ def assert_copy(iter1, iter2, **eql_kwargs):
assert elem1 is not elem2, msg assert elem1 is not elem2, msg
def is_extension_array_dtype_and_needs_i8_conversion(left_dtype, right_dtype) -> bool:
"""
Checks that we have the combination of an ExtensionArraydtype and
a dtype that should be converted to int64
Returns
-------
bool
Related to issue #37609
"""
return is_extension_array_dtype(left_dtype) and needs_i8_conversion(right_dtype)
def getCols(k): def getCols(k):
return string.ascii_uppercase[:k] return string.ascii_uppercase[:k]
@ -1955,7 +1848,8 @@ def makeTimedeltaIndex(k=10, freq="D", name=None, **kwargs):
def makePeriodIndex(k=10, name=None, **kwargs): def makePeriodIndex(k=10, name=None, **kwargs):
dt = datetime(2000, 1, 1) dt = datetime(2000, 1, 1)
return pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs) dr = pd.period_range(start=dt, periods=k, freq="B", name=name, **kwargs)
return dr
def makeMultiIndex(k=10, names=None, **kwargs): def makeMultiIndex(k=10, names=None, **kwargs):
@ -2053,7 +1947,8 @@ def index_subclass_makers_generator():
makeCategoricalIndex, makeCategoricalIndex,
makeMultiIndex, makeMultiIndex,
] ]
yield from make_index_funcs for make_index_func in make_index_funcs:
yield make_index_func
def all_timeseries_index_generator(k=10): def all_timeseries_index_generator(k=10):
@ -2067,8 +1962,7 @@ def all_timeseries_index_generator(k=10):
""" """
make_index_funcs = [makeDateIndex, makePeriodIndex, makeTimedeltaIndex] make_index_funcs = [makeDateIndex, makePeriodIndex, makeTimedeltaIndex]
for make_index_func in make_index_funcs: for make_index_func in make_index_funcs:
# pandas\_testing.py:1986: error: Cannot call function of unknown type yield make_index_func(k=k)
yield make_index_func(k=k) # type: ignore[operator]
# make series # make series
@ -2192,18 +2086,17 @@ def makeCustomIndex(
names = [names] names = [names]
# specific 1D index type requested? # specific 1D index type requested?
idx_func = { idx_func = dict(
"i": makeIntIndex, i=makeIntIndex,
"f": makeFloatIndex, f=makeFloatIndex,
"s": makeStringIndex, s=makeStringIndex,
"u": makeUnicodeIndex, u=makeUnicodeIndex,
"dt": makeDateIndex, dt=makeDateIndex,
"td": makeTimedeltaIndex, td=makeTimedeltaIndex,
"p": makePeriodIndex, p=makePeriodIndex,
}.get(idx_type) ).get(idx_type)
if idx_func: if idx_func:
# pandas\_testing.py:2120: error: Cannot call function of unknown type idx = idx_func(nentries)
idx = idx_func(nentries) # type: ignore[operator]
# but we need to fill in the name # but we need to fill in the name
if names: if names:
idx.name = names[0] idx.name = names[0]
@ -2231,8 +2124,7 @@ def makeCustomIndex(
# build a list of lists to create the index from # build a list of lists to create the index from
div_factor = nentries // ndupe_l[i] + 1 div_factor = nentries // ndupe_l[i] + 1
# pandas\_testing.py:2148: error: Need type annotation for 'cnt' cnt = Counter()
cnt = Counter() # type: ignore[var-annotated]
for j in range(div_factor): for j in range(div_factor):
label = f"{prefix}_l{i}_g{j}" label = f"{prefix}_l{i}_g{j}"
cnt[label] = ndupe_l[i] cnt[label] = ndupe_l[i]
@ -2390,14 +2282,7 @@ def _create_missing_idx(nrows, ncols, density, random_state=None):
def makeMissingDataframe(density=0.9, random_state=None): def makeMissingDataframe(density=0.9, random_state=None):
df = makeDataFrame() df = makeDataFrame()
# pandas\_testing.py:2306: error: "_create_missing_idx" gets multiple i, j = _create_missing_idx(*df.shape, density=density, random_state=random_state)
# values for keyword argument "density" [misc]
# pandas\_testing.py:2306: error: "_create_missing_idx" gets multiple
# values for keyword argument "random_state" [misc]
i, j = _create_missing_idx( # type: ignore[misc]
*df.shape, density=density, random_state=random_state
)
df.values[i, j] = np.nan df.values[i, j] = np.nan
return df return df
@ -2422,10 +2307,7 @@ def optional_args(decorator):
is_decorating = not kwargs and len(args) == 1 and callable(args[0]) is_decorating = not kwargs and len(args) == 1 and callable(args[0])
if is_decorating: if is_decorating:
f = args[0] f = args[0]
# pandas\_testing.py:2331: error: Incompatible types in assignment args = []
# (expression has type "List[<nothing>]", variable has type
# "Tuple[Any, ...]")
args = [] # type: ignore[assignment]
return dec(f) return dec(f)
else: else:
return dec return dec
@ -2509,7 +2391,7 @@ def can_connect(url, error_classes=None):
@optional_args @optional_args
def network( def network(
t, t,
url="https://www.google.com", url="http://www.google.com",
raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT, raise_on_error=_RAISE_NETWORK_ERROR_DEFAULT,
check_before_test=False, check_before_test=False,
error_classes=None, error_classes=None,
@ -2533,7 +2415,7 @@ def network(
The test requiring network connectivity. The test requiring network connectivity.
url : path url : path
The url to test via ``pandas.io.common.urlopen`` to check The url to test via ``pandas.io.common.urlopen`` to check
for connectivity. Defaults to 'https://www.google.com'. for connectivity. Defaults to 'http://www.google.com'.
raise_on_error : bool raise_on_error : bool
If True, never catches errors. If True, never catches errors.
check_before_test : bool check_before_test : bool
@ -2577,7 +2459,7 @@ def network(
You can specify alternative URLs:: You can specify alternative URLs::
>>> @network("https://www.yahoo.com") >>> @network("http://www.yahoo.com")
... def test_something_with_yahoo(): ... def test_something_with_yahoo():
... raise IOError("Failure Message") ... raise IOError("Failure Message")
>>> test_something_with_yahoo() >>> test_something_with_yahoo()
@ -2607,20 +2489,15 @@ def network(
@wraps(t) @wraps(t)
def wrapper(*args, **kwargs): def wrapper(*args, **kwargs):
if ( if check_before_test and not raise_on_error:
check_before_test if not can_connect(url, error_classes):
and not raise_on_error skip()
and not can_connect(url, error_classes)
):
skip()
try: try:
return t(*args, **kwargs) return t(*args, **kwargs)
except Exception as err: except Exception as err:
errno = getattr(err, "errno", None) errno = getattr(err, "errno", None)
if not errno and hasattr(errno, "reason"): if not errno and hasattr(errno, "reason"):
# pandas\_testing.py:2521: error: "Exception" has no attribute errno = getattr(err.reason, "errno", None)
# "reason"
errno = getattr(err.reason, "errno", None) # type: ignore[attr-defined]
if errno in skip_errnos: if errno in skip_errnos:
skip(f"Skipping test due to known errno and error {err}") skip(f"Skipping test due to known errno and error {err}")
@ -2648,11 +2525,10 @@ with_connectivity_check = network
@contextmanager @contextmanager
def assert_produces_warning( def assert_produces_warning(
expected_warning: Optional[Union[Type[Warning], bool]] = Warning, expected_warning=Warning,
filter_level="always", filter_level="always",
check_stacklevel: bool = True, check_stacklevel=True,
raise_on_extra_warnings: bool = True, raise_on_extra_warnings=True,
match: Optional[str] = None,
): ):
""" """
Context manager for running code expected to either raise a specific Context manager for running code expected to either raise a specific
@ -2687,8 +2563,6 @@ def assert_produces_warning(
raise_on_extra_warnings : bool, default True raise_on_extra_warnings : bool, default True
Whether extra warnings not of the type `expected_warning` should Whether extra warnings not of the type `expected_warning` should
cause the test to fail. cause the test to fail.
match : str, optional
Match warning message.
Examples Examples
-------- --------
@ -2715,28 +2589,28 @@ def assert_produces_warning(
with warnings.catch_warnings(record=True) as w: with warnings.catch_warnings(record=True) as w:
saw_warning = False saw_warning = False
matched_message = False
warnings.simplefilter(filter_level) warnings.simplefilter(filter_level)
yield w yield w
extra_warnings = [] extra_warnings = []
for actual_warning in w: for actual_warning in w:
if not expected_warning: if expected_warning and issubclass(
continue actual_warning.category, expected_warning
):
expected_warning = cast(Type[Warning], expected_warning)
if issubclass(actual_warning.category, expected_warning):
saw_warning = True saw_warning = True
if check_stacklevel and issubclass( if check_stacklevel and issubclass(
actual_warning.category, (FutureWarning, DeprecationWarning) actual_warning.category, (FutureWarning, DeprecationWarning)
): ):
_assert_raised_with_correct_stacklevel(actual_warning) from inspect import getframeinfo, stack
if match is not None and re.search(match, str(actual_warning.message)):
matched_message = True
caller = getframeinfo(stack()[2][0])
msg = (
"Warning not set with correct stacklevel. "
f"File where warning is raised: {actual_warning.filename} != "
f"{caller.filename}. Warning message: {actual_warning.message}"
)
assert actual_warning.filename == caller.filename, msg
else: else:
extra_warnings.append( extra_warnings.append(
( (
@ -2746,41 +2620,18 @@ def assert_produces_warning(
actual_warning.lineno, actual_warning.lineno,
) )
) )
if expected_warning: if expected_warning:
expected_warning = cast(Type[Warning], expected_warning) msg = (
if not saw_warning: f"Did not see expected warning of class "
raise AssertionError( f"{repr(expected_warning.__name__)}"
f"Did not see expected warning of class " )
f"{repr(expected_warning.__name__)}" assert saw_warning, msg
)
if match and not matched_message:
raise AssertionError(
f"Did not see warning {repr(expected_warning.__name__)} "
f"matching {match}"
)
if raise_on_extra_warnings and extra_warnings: if raise_on_extra_warnings and extra_warnings:
raise AssertionError( raise AssertionError(
f"Caused unexpected warning(s): {repr(extra_warnings)}" f"Caused unexpected warning(s): {repr(extra_warnings)}"
) )
def _assert_raised_with_correct_stacklevel(
actual_warning: warnings.WarningMessage,
) -> None:
from inspect import getframeinfo, stack
caller = getframeinfo(stack()[3][0])
msg = (
"Warning not set with correct stacklevel. "
f"File where warning is raised: {actual_warning.filename} != "
f"{caller.filename}. Warning message: {actual_warning.message}"
)
assert actual_warning.filename == caller.filename, msg
class RNGContext: class RNGContext:
""" """
Context manager to set the numpy random number generator speed. Returns Context manager to set the numpy random number generator speed. Returns
@ -2849,7 +2700,7 @@ def use_numexpr(use, min_elements=None):
if min_elements is None: if min_elements is None:
min_elements = expr._MIN_ELEMENTS min_elements = expr._MIN_ELEMENTS
olduse = expr.USE_NUMEXPR olduse = expr._USE_NUMEXPR
oldmin = expr._MIN_ELEMENTS oldmin = expr._MIN_ELEMENTS
expr.set_use_numexpr(use) expr.set_use_numexpr(use)
expr._MIN_ELEMENTS = min_elements expr._MIN_ELEMENTS = min_elements
@ -3029,10 +2880,11 @@ def convert_rows_list_to_csv_str(rows_list: List[str]):
Expected output of to_csv() in current OS. Expected output of to_csv() in current OS.
""" """
sep = os.linesep sep = os.linesep
return sep.join(rows_list) + sep expected = sep.join(rows_list) + sep
return expected
def external_error_raised(expected_exception: Type[Exception]) -> ContextManager: def external_error_raised(expected_exception: Type[Exception],) -> ContextManager:
""" """
Helper function to mark pytest.raises that have an external error message. Helper function to mark pytest.raises that have an external error message.

View file

@ -1,7 +1,5 @@
from datetime import datetime, timedelta, tzinfo from datetime import datetime, timedelta, tzinfo
from io import BufferedIOBase, RawIOBase, TextIOBase, TextIOWrapper from pathlib import Path
from mmap import mmap
from os import PathLike
from typing import ( from typing import (
IO, IO,
TYPE_CHECKING, TYPE_CHECKING,
@ -14,8 +12,6 @@ from typing import (
List, List,
Mapping, Mapping,
Optional, Optional,
Sequence,
Tuple,
Type, Type,
TypeVar, TypeVar,
Union, Union,
@ -27,27 +23,16 @@ import numpy as np
# and use a string literal forward reference to it in subsequent types # and use a string literal forward reference to it in subsequent types
# https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles # https://mypy.readthedocs.io/en/latest/common_issues.html#import-cycles
if TYPE_CHECKING: if TYPE_CHECKING:
from typing import final from pandas._libs import Period, Timedelta, Timestamp # noqa: F401
from pandas._libs import Period, Timedelta, Timestamp from pandas.core.dtypes.dtypes import ExtensionDtype # noqa: F401
from pandas.core.dtypes.dtypes import ExtensionDtype from pandas import Interval # noqa: F401
from pandas import Interval
from pandas.core.arrays.base import ExtensionArray # noqa: F401 from pandas.core.arrays.base import ExtensionArray # noqa: F401
from pandas.core.frame import DataFrame from pandas.core.frame import DataFrame # noqa: F401
from pandas.core.generic import NDFrame # noqa: F401 from pandas.core.generic import NDFrame # noqa: F401
from pandas.core.groupby.generic import DataFrameGroupBy, SeriesGroupBy from pandas.core.indexes.base import Index # noqa: F401
from pandas.core.indexes.base import Index from pandas.core.series import Series # noqa: F401
from pandas.core.resample import Resampler
from pandas.core.series import Series
from pandas.core.window.rolling import BaseWindow
from pandas.io.formats.format import EngFormatter
else:
# typing.final does not exist until py38
final = lambda x: x
# array-like # array-like
@ -74,9 +59,10 @@ Timezone = Union[str, tzinfo]
# other # other
Dtype = Union[ Dtype = Union[
"ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool, object]] "ExtensionDtype", str, np.dtype, Type[Union[str, float, int, complex, bool]]
] ]
DtypeObj = Union[np.dtype, "ExtensionDtype"] DtypeObj = Union[np.dtype, "ExtensionDtype"]
FilePathOrBuffer = Union[str, Path, IO[AnyStr]]
# FrameOrSeriesUnion means either a DataFrame or a Series. E.g. # FrameOrSeriesUnion means either a DataFrame or a Series. E.g.
# `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series # `def func(a: FrameOrSeriesUnion) -> FrameOrSeriesUnion: ...` means that if a Series
@ -92,9 +78,7 @@ FrameOrSeries = TypeVar("FrameOrSeries", bound="NDFrame")
Axis = Union[str, int] Axis = Union[str, int]
Label = Optional[Hashable] Label = Optional[Hashable]
IndexLabel = Union[Label, Sequence[Label]]
Level = Union[Label, int] Level = Union[Label, int]
Shape = Tuple[int, ...]
Ordered = Optional[bool] Ordered = Optional[bool]
JSONSerializable = Optional[Union[PythonScalar, List, Dict]] JSONSerializable = Optional[Union[PythonScalar, List, Dict]]
Axes = Collection Axes = Collection
@ -117,34 +101,8 @@ IndexKeyFunc = Optional[Callable[["Index"], Union["Index", AnyArrayLike]]]
# types of `func` kwarg for DataFrame.aggregate and Series.aggregate # types of `func` kwarg for DataFrame.aggregate and Series.aggregate
AggFuncTypeBase = Union[Callable, str] AggFuncTypeBase = Union[Callable, str]
AggFuncTypeDict = Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]]
AggFuncType = Union[ AggFuncType = Union[
AggFuncTypeBase, AggFuncTypeBase,
List[AggFuncTypeBase], List[AggFuncTypeBase],
AggFuncTypeDict, Dict[Label, Union[AggFuncTypeBase, List[AggFuncTypeBase]]],
] ]
AggObjType = Union[
"Series",
"DataFrame",
"SeriesGroupBy",
"DataFrameGroupBy",
"BaseWindow",
"Resampler",
]
# filenames and file-like-objects
Buffer = Union[IO[AnyStr], RawIOBase, BufferedIOBase, TextIOBase, TextIOWrapper, mmap]
FileOrBuffer = Union[str, Buffer[T]]
FilePathOrBuffer = Union["PathLike[str]", FileOrBuffer[T]]
# for arbitrary kwargs passed during reading/writing files
StorageOptions = Optional[Dict[str, Any]]
# compression keywords and compression
CompressionDict = Dict[str, Any]
CompressionOptions = Optional[Union[str, CompressionDict]]
# type of float formatter in DataFrameFormatter
FloatFormatType = Union[str, Callable, "EngFormatter"]

View file

@ -1,18 +1,20 @@
# This file was generated by 'versioneer.py' (0.19) from # This file was generated by 'versioneer.py' (0.15) from
# revision-control system data, or from the parent directory name of an # revision-control system data, or from the parent directory name of an
# unpacked source archive. Distribution tarballs contain a pre-generated copy # unpacked source archive. Distribution tarballs contain a pre-generated copy
# of this file. # of this file.
import json from warnings import catch_warnings
with catch_warnings(record=True):
import json
import sys
version_json = ''' version_json = '''
{ {
"date": "2020-12-26T13:47:00+0000",
"dirty": false, "dirty": false,
"error": null, "error": null,
"full-revisionid": "3e89b4c4b1580aa890023fc550774e63d499da25", "full-revisionid": "b5958ee1999e9aead1938c0bba2b674378807b3d",
"version": "1.2.0" "version": "1.1.5"
} }
''' # END VERSION_JSON ''' # END VERSION_JSON

View file

@ -4,7 +4,7 @@ Public toolkit API.
from pandas._libs.lib import infer_dtype from pandas._libs.lib import infer_dtype
from pandas.core.dtypes.api import * # noqa: F401, F403 from pandas.core.dtypes.api import * # noqa: F403, F401
from pandas.core.dtypes.concat import union_categoricals from pandas.core.dtypes.concat import union_categoricals
from pandas.core.dtypes.dtypes import ( from pandas.core.dtypes.dtypes import (
CategoricalDtype, CategoricalDtype,

View file

@ -7,7 +7,6 @@ from pandas.core.arrays import (
BooleanArray, BooleanArray,
Categorical, Categorical,
DatetimeArray, DatetimeArray,
FloatingArray,
IntegerArray, IntegerArray,
IntervalArray, IntervalArray,
PandasArray, PandasArray,
@ -21,7 +20,6 @@ __all__ = [
"BooleanArray", "BooleanArray",
"Categorical", "Categorical",
"DatetimeArray", "DatetimeArray",
"FloatingArray",
"IntegerArray", "IntegerArray",
"IntervalArray", "IntervalArray",
"PandasArray", "PandasArray",

View file

@ -8,17 +8,27 @@ Other items:
* platform checker * platform checker
""" """
import platform import platform
import struct
import sys import sys
import warnings import warnings
from pandas._typing import F from pandas._typing import F
PY37 = sys.version_info >= (3, 7)
PY38 = sys.version_info >= (3, 8) PY38 = sys.version_info >= (3, 8)
PY39 = sys.version_info >= (3, 9) PY39 = sys.version_info >= (3, 9)
PYPY = platform.python_implementation() == "PyPy" PYPY = platform.python_implementation() == "PyPy"
IS64 = sys.maxsize > 2 ** 32 IS64 = sys.maxsize > 2 ** 32
# ----------------------------------------------------------------------------
# functions largely based / taken from the six module
# Much of the code in this module comes from Benjamin Peterson's six library.
# The license for this library can be found in LICENSES/SIX and the code can be
# found at https://bitbucket.org/gutworth/six
def set_function_name(f: F, name: str, cls) -> F: def set_function_name(f: F, name: str, cls) -> F:
""" """
Bind the name/qualname attributes of the function. Bind the name/qualname attributes of the function.
@ -29,6 +39,7 @@ def set_function_name(f: F, name: str, cls) -> F:
return f return f
# https://github.com/pandas-dev/pandas/pull/9123
def is_platform_little_endian() -> bool: def is_platform_little_endian() -> bool:
""" """
Checking if the running platform is little endian. Checking if the running platform is little endian.
@ -50,7 +61,7 @@ def is_platform_windows() -> bool:
bool bool
True if the running platform is windows. True if the running platform is windows.
""" """
return sys.platform in ["win32", "cygwin"] return sys.platform == "win32" or sys.platform == "cygwin"
def is_platform_linux() -> bool: def is_platform_linux() -> bool:
@ -62,7 +73,7 @@ def is_platform_linux() -> bool:
bool bool
True if the running platform is linux. True if the running platform is linux.
""" """
return sys.platform == "linux" return sys.platform == "linux2"
def is_platform_mac() -> bool: def is_platform_mac() -> bool:
@ -77,7 +88,19 @@ def is_platform_mac() -> bool:
return sys.platform == "darwin" return sys.platform == "darwin"
def import_lzma(): def is_platform_32bit() -> bool:
"""
Checking if the running platform is 32-bit.
Returns
-------
bool
True if the running platform is 32-bit.
"""
return struct.calcsize("P") * 8 < 64
def _import_lzma():
""" """
Importing the `lzma` module. Importing the `lzma` module.
@ -97,7 +120,7 @@ def import_lzma():
warnings.warn(msg) warnings.warn(msg)
def get_lzma_file(lzma): def _get_lzma_file(lzma):
""" """
Importing the `LZMAFile` class from the `lzma` module. Importing the `LZMAFile` class from the `lzma` module.

View file

@ -11,24 +11,25 @@ VERSIONS = {
"fsspec": "0.7.4", "fsspec": "0.7.4",
"fastparquet": "0.3.2", "fastparquet": "0.3.2",
"gcsfs": "0.6.0", "gcsfs": "0.6.0",
"lxml.etree": "4.3.0", "lxml.etree": "3.8.0",
"matplotlib": "2.2.3", "matplotlib": "2.2.2",
"numexpr": "2.6.8", "numexpr": "2.6.2",
"odfpy": "1.3.0", "odfpy": "1.3.0",
"openpyxl": "2.5.7", "openpyxl": "2.5.7",
"pandas_gbq": "0.12.0", "pandas_gbq": "0.12.0",
"pyarrow": "0.15.0", "pyarrow": "0.13.0",
"pytables": "3.4.3",
"pytest": "5.0.1", "pytest": "5.0.1",
"pyxlsb": "1.0.6", "pyxlsb": "1.0.6",
"s3fs": "0.4.0", "s3fs": "0.4.0",
"scipy": "1.2.0", "scipy": "1.2.0",
"sqlalchemy": "1.2.8", "sqlalchemy": "1.1.4",
"tables": "3.5.1", "tables": "3.4.3",
"tabulate": "0.8.3", "tabulate": "0.8.3",
"xarray": "0.12.3", "xarray": "0.8.2",
"xlrd": "1.2.0", "xlrd": "1.1.0",
"xlwt": "1.3.0", "xlwt": "1.2.0",
"xlsxwriter": "1.0.2", "xlsxwriter": "0.9.8",
"numba": "0.46.0", "numba": "0.46.0",
} }

View file

@ -8,19 +8,19 @@ import numpy as np
# numpy versioning # numpy versioning
_np_version = np.__version__ _np_version = np.__version__
_nlv = LooseVersion(_np_version) _nlv = LooseVersion(_np_version)
np_version_under1p17 = _nlv < LooseVersion("1.17") _np_version_under1p16 = _nlv < LooseVersion("1.16")
np_version_under1p18 = _nlv < LooseVersion("1.18") _np_version_under1p17 = _nlv < LooseVersion("1.17")
_np_version_under1p18 = _nlv < LooseVersion("1.18")
_np_version_under1p19 = _nlv < LooseVersion("1.19") _np_version_under1p19 = _nlv < LooseVersion("1.19")
_np_version_under1p20 = _nlv < LooseVersion("1.20") _np_version_under1p20 = _nlv < LooseVersion("1.20")
is_numpy_dev = ".dev" in str(_nlv) _is_numpy_dev = ".dev" in str(_nlv)
_min_numpy_ver = "1.16.5"
if _nlv < _min_numpy_ver: if _nlv < "1.15.4":
raise ImportError( raise ImportError(
f"this version of pandas is incompatible with numpy < {_min_numpy_ver}\n" "this version of pandas is incompatible with numpy < 1.15.4\n"
f"your numpy version is {_np_version}.\n" f"your numpy version is {_np_version}.\n"
f"Please upgrade numpy to >= {_min_numpy_ver} to use this pandas version" "Please upgrade numpy to >= 1.15.4 to use this pandas version"
) )
@ -65,6 +65,7 @@ def np_array_datetime64_compat(arr, *args, **kwargs):
__all__ = [ __all__ = [
"np", "np",
"_np_version", "_np_version",
"np_version_under1p17", "_np_version_under1p16",
"is_numpy_dev", "_np_version_under1p17",
"_is_numpy_dev",
] ]

View file

@ -1,24 +1,27 @@
""" """
For compatibility with numpy libraries, pandas functions or methods have to For compatibility with numpy libraries, pandas functions or
accept '*args' and '**kwargs' parameters to accommodate numpy arguments that methods have to accept '*args' and '**kwargs' parameters to
are not actually used or respected in the pandas implementation. accommodate numpy arguments that are not actually used or
respected in the pandas implementation.
To ensure that users do not abuse these parameters, validation is performed in To ensure that users do not abuse these parameters, validation
'validators.py' to make sure that any extra parameters passed correspond ONLY is performed in 'validators.py' to make sure that any extra
to those in the numpy signature. Part of that validation includes whether or parameters passed correspond ONLY to those in the numpy signature.
not the user attempted to pass in non-default values for these extraneous Part of that validation includes whether or not the user attempted
parameters. As we want to discourage users from relying on these parameters to pass in non-default values for these extraneous parameters. As we
when calling the pandas implementation, we want them only to pass in the want to discourage users from relying on these parameters when calling
default values for these parameters. the pandas implementation, we want them only to pass in the default values
for these parameters.
This module provides a set of commonly used default arguments for functions and This module provides a set of commonly used default arguments for functions
methods that are spread throughout the codebase. This module will make it and methods that are spread throughout the codebase. This module will make it
easier to adjust to future upstream changes in the analogous numpy signatures. easier to adjust to future upstream changes in the analogous numpy signatures.
""" """
from collections import OrderedDict
from distutils.version import LooseVersion from distutils.version import LooseVersion
from typing import Any, Dict, Optional, Union from typing import Any, Dict, Optional, Union
from numpy import __version__, ndarray from numpy import __version__ as _np_version, ndarray
from pandas._libs.lib import is_bool, is_integer from pandas._libs.lib import is_bool, is_integer
from pandas.errors import UnsupportedFunctionCall from pandas.errors import UnsupportedFunctionCall
@ -71,7 +74,7 @@ class CompatValidator:
raise ValueError(f"invalid validation method '{method}'") raise ValueError(f"invalid validation method '{method}'")
ARGMINMAX_DEFAULTS = {"out": None} ARGMINMAX_DEFAULTS = dict(out=None)
validate_argmin = CompatValidator( validate_argmin = CompatValidator(
ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1 ARGMINMAX_DEFAULTS, fname="argmin", method="both", max_fname_arg_count=1
) )
@ -90,10 +93,11 @@ def process_skipna(skipna, args):
def validate_argmin_with_skipna(skipna, args, kwargs): def validate_argmin_with_skipna(skipna, args, kwargs):
""" """
If 'Series.argmin' is called via the 'numpy' library, the third parameter If 'Series.argmin' is called via the 'numpy' library,
in its signature is 'out', which takes either an ndarray or 'None', so the third parameter in its signature is 'out', which
check if the 'skipna' parameter is either an instance of ndarray or is takes either an ndarray or 'None', so check if the
None, since 'skipna' itself should be a boolean 'skipna' parameter is either an instance of ndarray or
is None, since 'skipna' itself should be a boolean
""" """
skipna, args = process_skipna(skipna, args) skipna, args = process_skipna(skipna, args)
validate_argmin(args, kwargs) validate_argmin(args, kwargs)
@ -102,22 +106,23 @@ def validate_argmin_with_skipna(skipna, args, kwargs):
def validate_argmax_with_skipna(skipna, args, kwargs): def validate_argmax_with_skipna(skipna, args, kwargs):
""" """
If 'Series.argmax' is called via the 'numpy' library, the third parameter If 'Series.argmax' is called via the 'numpy' library,
in its signature is 'out', which takes either an ndarray or 'None', so the third parameter in its signature is 'out', which
check if the 'skipna' parameter is either an instance of ndarray or is takes either an ndarray or 'None', so check if the
None, since 'skipna' itself should be a boolean 'skipna' parameter is either an instance of ndarray or
is None, since 'skipna' itself should be a boolean
""" """
skipna, args = process_skipna(skipna, args) skipna, args = process_skipna(skipna, args)
validate_argmax(args, kwargs) validate_argmax(args, kwargs)
return skipna return skipna
ARGSORT_DEFAULTS: Dict[str, Optional[Union[int, str]]] = {} ARGSORT_DEFAULTS: "OrderedDict[str, Optional[Union[int, str]]]" = OrderedDict()
ARGSORT_DEFAULTS["axis"] = -1 ARGSORT_DEFAULTS["axis"] = -1
ARGSORT_DEFAULTS["kind"] = "quicksort" ARGSORT_DEFAULTS["kind"] = "quicksort"
ARGSORT_DEFAULTS["order"] = None ARGSORT_DEFAULTS["order"] = None
if LooseVersion(__version__) >= LooseVersion("1.17.0"): if LooseVersion(_np_version) >= LooseVersion("1.17.0"):
# GH-26361. NumPy added radix sort and changed default to None. # GH-26361. NumPy added radix sort and changed default to None.
ARGSORT_DEFAULTS["kind"] = None ARGSORT_DEFAULTS["kind"] = None
@ -126,9 +131,9 @@ validate_argsort = CompatValidator(
ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both" ARGSORT_DEFAULTS, fname="argsort", max_fname_arg_count=0, method="both"
) )
# two different signatures of argsort, this second validation for when the # two different signatures of argsort, this second validation
# `kind` param is supported # for when the `kind` param is supported
ARGSORT_DEFAULTS_KIND: Dict[str, Optional[int]] = {} ARGSORT_DEFAULTS_KIND: "OrderedDict[str, Optional[int]]" = OrderedDict()
ARGSORT_DEFAULTS_KIND["axis"] = -1 ARGSORT_DEFAULTS_KIND["axis"] = -1
ARGSORT_DEFAULTS_KIND["order"] = None ARGSORT_DEFAULTS_KIND["order"] = None
validate_argsort_kind = CompatValidator( validate_argsort_kind = CompatValidator(
@ -138,10 +143,11 @@ validate_argsort_kind = CompatValidator(
def validate_argsort_with_ascending(ascending, args, kwargs): def validate_argsort_with_ascending(ascending, args, kwargs):
""" """
If 'Categorical.argsort' is called via the 'numpy' library, the first If 'Categorical.argsort' is called via the 'numpy' library, the
parameter in its signature is 'axis', which takes either an integer or first parameter in its signature is 'axis', which takes either
'None', so check if the 'ascending' parameter has either integer type or is an integer or 'None', so check if the 'ascending' parameter has
None, since 'ascending' itself should be a boolean either integer type or is None, since 'ascending' itself should
be a boolean
""" """
if is_integer(ascending) or ascending is None: if is_integer(ascending) or ascending is None:
args = (ascending,) + args args = (ascending,) + args
@ -151,7 +157,7 @@ def validate_argsort_with_ascending(ascending, args, kwargs):
return ascending return ascending
CLIP_DEFAULTS: Dict[str, Any] = {"out": None} CLIP_DEFAULTS: Dict[str, Any] = dict(out=None)
validate_clip = CompatValidator( validate_clip = CompatValidator(
CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3 CLIP_DEFAULTS, fname="clip", method="both", max_fname_arg_count=3
) )
@ -159,10 +165,10 @@ validate_clip = CompatValidator(
def validate_clip_with_axis(axis, args, kwargs): def validate_clip_with_axis(axis, args, kwargs):
""" """
If 'NDFrame.clip' is called via the numpy library, the third parameter in If 'NDFrame.clip' is called via the numpy library, the third
its signature is 'out', which can takes an ndarray, so check if the 'axis' parameter in its signature is 'out', which can takes an ndarray,
parameter is an instance of ndarray, since 'axis' itself should either be so check if the 'axis' parameter is an instance of ndarray, since
an integer or None 'axis' itself should either be an integer or None
""" """
if isinstance(axis, ndarray): if isinstance(axis, ndarray):
args = (axis,) + args args = (axis,) + args
@ -172,7 +178,7 @@ def validate_clip_with_axis(axis, args, kwargs):
return axis return axis
CUM_FUNC_DEFAULTS: Dict[str, Any] = {} CUM_FUNC_DEFAULTS: "OrderedDict[str, Any]" = OrderedDict()
CUM_FUNC_DEFAULTS["dtype"] = None CUM_FUNC_DEFAULTS["dtype"] = None
CUM_FUNC_DEFAULTS["out"] = None CUM_FUNC_DEFAULTS["out"] = None
validate_cum_func = CompatValidator( validate_cum_func = CompatValidator(
@ -185,9 +191,10 @@ validate_cumsum = CompatValidator(
def validate_cum_func_with_skipna(skipna, args, kwargs, name): def validate_cum_func_with_skipna(skipna, args, kwargs, name):
""" """
If this function is called via the 'numpy' library, the third parameter in If this function is called via the 'numpy' library, the third
its signature is 'dtype', which takes either a 'numpy' dtype or 'None', so parameter in its signature is 'dtype', which takes either a
check if the 'skipna' parameter is a boolean or not 'numpy' dtype or 'None', so check if the 'skipna' parameter is
a boolean or not
""" """
if not is_bool(skipna): if not is_bool(skipna):
args = (skipna,) + args args = (skipna,) + args
@ -197,7 +204,7 @@ def validate_cum_func_with_skipna(skipna, args, kwargs, name):
return skipna return skipna
ALLANY_DEFAULTS: Dict[str, Optional[bool]] = {} ALLANY_DEFAULTS: "OrderedDict[str, Optional[bool]]" = OrderedDict()
ALLANY_DEFAULTS["dtype"] = None ALLANY_DEFAULTS["dtype"] = None
ALLANY_DEFAULTS["out"] = None ALLANY_DEFAULTS["out"] = None
ALLANY_DEFAULTS["keepdims"] = False ALLANY_DEFAULTS["keepdims"] = False
@ -208,10 +215,10 @@ validate_any = CompatValidator(
ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1 ALLANY_DEFAULTS, fname="any", method="both", max_fname_arg_count=1
) )
LOGICAL_FUNC_DEFAULTS = {"out": None, "keepdims": False} LOGICAL_FUNC_DEFAULTS = dict(out=None, keepdims=False)
validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs") validate_logical_func = CompatValidator(LOGICAL_FUNC_DEFAULTS, method="kwargs")
MINMAX_DEFAULTS = {"axis": None, "out": None, "keepdims": False} MINMAX_DEFAULTS = dict(axis=None, out=None, keepdims=False)
validate_min = CompatValidator( validate_min = CompatValidator(
MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1 MINMAX_DEFAULTS, fname="min", method="both", max_fname_arg_count=1
) )
@ -219,28 +226,28 @@ validate_max = CompatValidator(
MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1 MINMAX_DEFAULTS, fname="max", method="both", max_fname_arg_count=1
) )
RESHAPE_DEFAULTS: Dict[str, str] = {"order": "C"} RESHAPE_DEFAULTS: Dict[str, str] = dict(order="C")
validate_reshape = CompatValidator( validate_reshape = CompatValidator(
RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1 RESHAPE_DEFAULTS, fname="reshape", method="both", max_fname_arg_count=1
) )
REPEAT_DEFAULTS: Dict[str, Any] = {"axis": None} REPEAT_DEFAULTS: Dict[str, Any] = dict(axis=None)
validate_repeat = CompatValidator( validate_repeat = CompatValidator(
REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1 REPEAT_DEFAULTS, fname="repeat", method="both", max_fname_arg_count=1
) )
ROUND_DEFAULTS: Dict[str, Any] = {"out": None} ROUND_DEFAULTS: Dict[str, Any] = dict(out=None)
validate_round = CompatValidator( validate_round = CompatValidator(
ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1 ROUND_DEFAULTS, fname="round", method="both", max_fname_arg_count=1
) )
SORT_DEFAULTS: Dict[str, Optional[Union[int, str]]] = {} SORT_DEFAULTS: "OrderedDict[str, Optional[Union[int, str]]]" = OrderedDict()
SORT_DEFAULTS["axis"] = -1 SORT_DEFAULTS["axis"] = -1
SORT_DEFAULTS["kind"] = "quicksort" SORT_DEFAULTS["kind"] = "quicksort"
SORT_DEFAULTS["order"] = None SORT_DEFAULTS["order"] = None
validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs") validate_sort = CompatValidator(SORT_DEFAULTS, fname="sort", method="kwargs")
STAT_FUNC_DEFAULTS: Dict[str, Optional[Any]] = {} STAT_FUNC_DEFAULTS: "OrderedDict[str, Optional[Any]]" = OrderedDict()
STAT_FUNC_DEFAULTS["dtype"] = None STAT_FUNC_DEFAULTS["dtype"] = None
STAT_FUNC_DEFAULTS["out"] = None STAT_FUNC_DEFAULTS["out"] = None
@ -274,13 +281,13 @@ validate_median = CompatValidator(
MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1 MEDIAN_DEFAULTS, fname="median", method="both", max_fname_arg_count=1
) )
STAT_DDOF_FUNC_DEFAULTS: Dict[str, Optional[bool]] = {} STAT_DDOF_FUNC_DEFAULTS: "OrderedDict[str, Optional[bool]]" = OrderedDict()
STAT_DDOF_FUNC_DEFAULTS["dtype"] = None STAT_DDOF_FUNC_DEFAULTS["dtype"] = None
STAT_DDOF_FUNC_DEFAULTS["out"] = None STAT_DDOF_FUNC_DEFAULTS["out"] = None
STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False STAT_DDOF_FUNC_DEFAULTS["keepdims"] = False
validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs") validate_stat_ddof_func = CompatValidator(STAT_DDOF_FUNC_DEFAULTS, method="kwargs")
TAKE_DEFAULTS: Dict[str, Optional[str]] = {} TAKE_DEFAULTS: "OrderedDict[str, Optional[str]]" = OrderedDict()
TAKE_DEFAULTS["out"] = None TAKE_DEFAULTS["out"] = None
TAKE_DEFAULTS["mode"] = "raise" TAKE_DEFAULTS["mode"] = "raise"
validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs") validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
@ -288,9 +295,10 @@ validate_take = CompatValidator(TAKE_DEFAULTS, fname="take", method="kwargs")
def validate_take_with_convert(convert, args, kwargs): def validate_take_with_convert(convert, args, kwargs):
""" """
If this function is called via the 'numpy' library, the third parameter in If this function is called via the 'numpy' library, the third
its signature is 'axis', which takes either an ndarray or 'None', so check parameter in its signature is 'axis', which takes either an
if the 'convert' parameter is either an instance of ndarray or is None ndarray or 'None', so check if the 'convert' parameter is either
an instance of ndarray or is None
""" """
if isinstance(convert, ndarray) or convert is None: if isinstance(convert, ndarray) or convert is None:
args = (convert,) + args args = (convert,) + args
@ -300,7 +308,7 @@ def validate_take_with_convert(convert, args, kwargs):
return convert return convert
TRANSPOSE_DEFAULTS = {"axes": None} TRANSPOSE_DEFAULTS = dict(axes=None)
validate_transpose = CompatValidator( validate_transpose = CompatValidator(
TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0 TRANSPOSE_DEFAULTS, fname="transpose", method="both", max_fname_arg_count=0
) )
@ -353,9 +361,10 @@ def validate_expanding_func(name, args, kwargs) -> None:
def validate_groupby_func(name, args, kwargs, allowed=None) -> None: def validate_groupby_func(name, args, kwargs, allowed=None) -> None:
""" """
'args' and 'kwargs' should be empty, except for allowed kwargs because all 'args' and 'kwargs' should be empty, except for allowed
of their necessary parameters are explicitly listed in the function kwargs because all of
signature their necessary parameters are explicitly listed in
the function signature
""" """
if allowed is None: if allowed is None:
allowed = [] allowed = []
@ -374,8 +383,9 @@ RESAMPLER_NUMPY_OPS = ("min", "max", "sum", "prod", "mean", "std", "var")
def validate_resampler_func(method: str, args, kwargs) -> None: def validate_resampler_func(method: str, args, kwargs) -> None:
""" """
'args' and 'kwargs' should be empty because all of their necessary 'args' and 'kwargs' should be empty because all of
parameters are explicitly listed in the function signature their necessary parameters are explicitly listed in
the function signature
""" """
if len(args) + len(kwargs) > 0: if len(args) + len(kwargs) > 0:
if method in RESAMPLER_NUMPY_OPS: if method in RESAMPLER_NUMPY_OPS:
@ -387,20 +397,20 @@ def validate_resampler_func(method: str, args, kwargs) -> None:
raise TypeError("too many arguments passed in") raise TypeError("too many arguments passed in")
def validate_minmax_axis(axis: Optional[int], ndim: int = 1) -> None: def validate_minmax_axis(axis: Optional[int]) -> None:
""" """
Ensure that the axis argument passed to min, max, argmin, or argmax is zero Ensure that the axis argument passed to min, max, argmin, or argmax is
or None, as otherwise it will be incorrectly ignored. zero or None, as otherwise it will be incorrectly ignored.
Parameters Parameters
---------- ----------
axis : int or None axis : int or None
ndim : int, default 1
Raises Raises
------ ------
ValueError ValueError
""" """
ndim = 1 # hard-coded for Index
if axis is None: if axis is None:
return return
if axis >= ndim or (axis < 0 and ndim + axis < 0): if axis >= ndim or (axis < 0 and ndim + axis < 0):

View file

@ -64,7 +64,7 @@ class _LoadSparseSeries:
# https://github.com/python/mypy/issues/1020 # https://github.com/python/mypy/issues/1020
# error: Incompatible return type for "__new__" (returns "Series", but must return # error: Incompatible return type for "__new__" (returns "Series", but must return
# a subtype of "_LoadSparseSeries") # a subtype of "_LoadSparseSeries")
def __new__(cls) -> "Series": # type: ignore[misc] def __new__(cls) -> "Series": # type: ignore
from pandas import Series from pandas import Series
warnings.warn( warnings.warn(
@ -82,7 +82,7 @@ class _LoadSparseFrame:
# https://github.com/python/mypy/issues/1020 # https://github.com/python/mypy/issues/1020
# error: Incompatible return type for "__new__" (returns "DataFrame", but must # error: Incompatible return type for "__new__" (returns "DataFrame", but must
# return a subtype of "_LoadSparseFrame") # return a subtype of "_LoadSparseFrame")
def __new__(cls) -> "DataFrame": # type: ignore[misc] def __new__(cls) -> "DataFrame": # type: ignore
from pandas import DataFrame from pandas import DataFrame
warnings.warn( warnings.warn(
@ -181,7 +181,7 @@ _class_locations_map = {
# functions for compat and uses a non-public class of the pickle module. # functions for compat and uses a non-public class of the pickle module.
# error: Name 'pkl._Unpickler' is not defined # error: Name 'pkl._Unpickler' is not defined
class Unpickler(pkl._Unpickler): # type: ignore[name-defined] class Unpickler(pkl._Unpickler): # type: ignore
def find_class(self, module, name): def find_class(self, module, name):
# override superclass # override superclass
key = (module, name) key = (module, name)
@ -274,7 +274,7 @@ def patch_pickle():
""" """
orig_loads = pkl.loads orig_loads = pkl.loads
try: try:
setattr(pkl, "loads", loads) pkl.loads = loads
yield yield
finally: finally:
setattr(pkl, "loads", orig_loads) pkl.loads = orig_loads

View file

@ -33,10 +33,8 @@ from pytz import FixedOffset, utc
import pandas.util._test_decorators as td import pandas.util._test_decorators as td
from pandas.core.dtypes.dtypes import DatetimeTZDtype, IntervalDtype
import pandas as pd import pandas as pd
from pandas import DataFrame, Interval, Period, Series, Timedelta, Timestamp from pandas import DataFrame
import pandas._testing as tm import pandas._testing as tm
from pandas.core import ops from pandas.core import ops
from pandas.core.indexes.api import Index, MultiIndex from pandas.core.indexes.api import Index, MultiIndex
@ -57,9 +55,6 @@ def pytest_configure(config):
) )
config.addinivalue_line("markers", "high_memory: mark a test as a high-memory only") config.addinivalue_line("markers", "high_memory: mark a test as a high-memory only")
config.addinivalue_line("markers", "clipboard: mark a pd.read_clipboard test") config.addinivalue_line("markers", "clipboard: mark a pd.read_clipboard test")
config.addinivalue_line(
"markers", "arm_slow: mark a test as slow for arm64 architecture"
)
def pytest_addoption(parser): def pytest_addoption(parser):
@ -176,6 +171,14 @@ def axis(request):
axis_frame = axis axis_frame = axis
@pytest.fixture(params=[0, "index"], ids=lambda x: f"axis {repr(x)}")
def axis_series(request):
"""
Fixture for returning the axis numbers of a Series.
"""
return request.param
@pytest.fixture(params=[True, False, None]) @pytest.fixture(params=[True, False, None])
def observed(request): def observed(request):
""" """
@ -266,7 +269,7 @@ def nselect_method(request):
# ---------------------------------------------------------------- # ----------------------------------------------------------------
# Missing values & co. # Missing values & co.
# ---------------------------------------------------------------- # ----------------------------------------------------------------
@pytest.fixture(params=tm.NULL_OBJECTS, ids=str) @pytest.fixture(params=[None, np.nan, pd.NaT, float("nan"), pd.NA], ids=str)
def nulls_fixture(request): def nulls_fixture(request):
""" """
Fixture for each null type in pandas. Fixture for each null type in pandas.
@ -288,22 +291,11 @@ def unique_nulls_fixture(request):
# Generate cartesian product of unique_nulls_fixture: # Generate cartesian product of unique_nulls_fixture:
unique_nulls_fixture2 = unique_nulls_fixture unique_nulls_fixture2 = unique_nulls_fixture
# ---------------------------------------------------------------- # ----------------------------------------------------------------
# Classes # Classes
# ---------------------------------------------------------------- # ----------------------------------------------------------------
@pytest.fixture(params=[pd.Index, pd.Series], ids=["index", "series"])
@pytest.fixture(params=[pd.DataFrame, pd.Series])
def frame_or_series(request):
"""
Fixture to parametrize over DataFrame and Series.
"""
return request.param
@pytest.fixture(
params=[pd.Index, pd.Series], ids=["index", "series"] # type: ignore[list-item]
)
def index_or_series(request): def index_or_series(request):
""" """
Fixture to parametrize over Index and Series, made necessary by a mypy Fixture to parametrize over Index and Series, made necessary by a mypy
@ -320,16 +312,6 @@ def index_or_series(request):
index_or_series2 = index_or_series index_or_series2 = index_or_series
@pytest.fixture(
params=[pd.Index, pd.Series, pd.array], ids=["index", "series", "array"]
)
def index_or_series_or_array(request):
"""
Fixture to parametrize over Index, Series, and ExtensionArray
"""
return request.param
@pytest.fixture @pytest.fixture
def dict_subclass(): def dict_subclass():
""" """
@ -377,24 +359,11 @@ def multiindex_year_month_day_dataframe_random_data():
tdf = tm.makeTimeDataFrame(100) tdf = tm.makeTimeDataFrame(100)
ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum() ymd = tdf.groupby([lambda x: x.year, lambda x: x.month, lambda x: x.day]).sum()
# use Int64Index, to make sure things work # use Int64Index, to make sure things work
ymd.index = ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels]) ymd.index.set_levels([lev.astype("i8") for lev in ymd.index.levels], inplace=True)
ymd.index.set_names(["year", "month", "day"], inplace=True) ymd.index.set_names(["year", "month", "day"], inplace=True)
return ymd return ymd
@pytest.fixture
def multiindex_dataframe_random_data():
"""DataFrame with 2 level MultiIndex with random data"""
index = MultiIndex(
levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],
codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],
names=["first", "second"],
)
return DataFrame(
np.random.randn(10, 3), index=index, columns=Index(["A", "B", "C"], name="exp")
)
def _create_multiindex(): def _create_multiindex():
""" """
MultiIndex used to test the general functionality of this object MultiIndex used to test the general functionality of this object
@ -407,12 +376,13 @@ def _create_multiindex():
major_codes = np.array([0, 0, 1, 2, 3, 3]) major_codes = np.array([0, 0, 1, 2, 3, 3])
minor_codes = np.array([0, 1, 0, 1, 0, 1]) minor_codes = np.array([0, 1, 0, 1, 0, 1])
index_names = ["first", "second"] index_names = ["first", "second"]
return MultiIndex( mi = MultiIndex(
levels=[major_axis, minor_axis], levels=[major_axis, minor_axis],
codes=[major_codes, minor_codes], codes=[major_codes, minor_codes],
names=index_names, names=index_names,
verify_integrity=False, verify_integrity=False,
) )
return mi
def _create_mi_with_dt64tz_level(): def _create_mi_with_dt64tz_level():
@ -467,29 +437,6 @@ def index(request):
index_fixture2 = index index_fixture2 = index
@pytest.fixture(params=indices_dict.keys())
def index_with_missing(request):
"""
Fixture for indices with missing values
"""
if request.param in ["int", "uint", "range", "empty", "repeats"]:
pytest.xfail("missing values not supported")
# GH 35538. Use deep copy to avoid illusive bug on np-dev
# Azure pipeline that writes into indices_dict despite copy
ind = indices_dict[request.param].copy(deep=True)
vals = ind.values
if request.param in ["tuples", "mi-with-dt64tz-level", "multi"]:
# For setting missing values in the top level of MultiIndex
vals = ind.tolist()
vals[0] = (None,) + vals[0][1:]
vals[-1] = (None,) + vals[-1][1:]
return MultiIndex.from_tuples(vals)
else:
vals[0] = None
vals[-1] = None
return type(ind)(vals)
# ---------------------------------------------------------------- # ----------------------------------------------------------------
# Series' # Series'
# ---------------------------------------------------------------- # ----------------------------------------------------------------
@ -549,23 +496,6 @@ def series_with_simple_index(index):
return _create_series(index) return _create_series(index)
@pytest.fixture
def series_with_multilevel_index():
"""
Fixture with a Series with a 2-level MultiIndex.
"""
arrays = [
["bar", "bar", "baz", "baz", "qux", "qux", "foo", "foo"],
["one", "two", "one", "two", "one", "two", "one", "two"],
]
tuples = zip(*arrays)
index = MultiIndex.from_tuples(tuples)
data = np.random.randn(8)
ser = Series(data, index=index)
ser[3] = np.NaN
return ser
_narrow_dtypes = [ _narrow_dtypes = [
np.float16, np.float16,
np.float32, np.float32,
@ -698,26 +628,6 @@ def float_frame():
return DataFrame(tm.getSeriesData()) return DataFrame(tm.getSeriesData())
# ----------------------------------------------------------------
# Scalars
# ----------------------------------------------------------------
@pytest.fixture(
params=[
(Interval(left=0, right=5), IntervalDtype("int64")),
(Interval(left=0.1, right=0.5), IntervalDtype("float64")),
(Period("2012-01", freq="M"), "period[M]"),
(Period("2012-02-01", freq="D"), "period[D]"),
(
Timestamp("2011-01-01", tz="US/Eastern"),
DatetimeTZDtype(tz="US/Eastern"),
),
(Timedelta(seconds=500), "timedelta64[ns]"),
]
)
def ea_scalar_and_dtype(request):
return request.param
# ---------------------------------------------------------------- # ----------------------------------------------------------------
# Operators & Operations # Operators & Operations
# ---------------------------------------------------------------- # ----------------------------------------------------------------
@ -747,43 +657,6 @@ def all_arithmetic_operators(request):
return request.param return request.param
@pytest.fixture(
params=[
operator.add,
ops.radd,
operator.sub,
ops.rsub,
operator.mul,
ops.rmul,
operator.truediv,
ops.rtruediv,
operator.floordiv,
ops.rfloordiv,
operator.mod,
ops.rmod,
operator.pow,
ops.rpow,
operator.eq,
operator.ne,
operator.lt,
operator.le,
operator.gt,
operator.ge,
operator.and_,
ops.rand_,
operator.xor,
ops.rxor,
operator.or_,
ops.ror_,
]
)
def all_binary_operators(request):
"""
Fixture for operator and roperator arithmetic, comparison, and logical ops.
"""
return request.param
@pytest.fixture( @pytest.fixture(
params=[ params=[
operator.add, operator.add,
@ -964,10 +837,6 @@ TIMEZONES = [
"Asia/Tokyo", "Asia/Tokyo",
"dateutil/US/Pacific", "dateutil/US/Pacific",
"dateutil/Asia/Singapore", "dateutil/Asia/Singapore",
"+01:15",
"-02:15",
"UTC+01:15",
"UTC-02:15",
tzutc(), tzutc(),
tzlocal(), tzlocal(),
FixedOffset(300), FixedOffset(300),
@ -1089,31 +958,6 @@ def float_dtype(request):
return request.param return request.param
@pytest.fixture(params=tm.FLOAT_EA_DTYPES)
def float_ea_dtype(request):
"""
Parameterized fixture for float dtypes.
* 'Float32'
* 'Float64'
"""
return request.param
@pytest.fixture(params=tm.FLOAT_DTYPES + tm.FLOAT_EA_DTYPES)
def any_float_allowed_nullable_dtype(request):
"""
Parameterized fixture for float dtypes.
* float
* 'float32'
* 'float64'
* 'Float32'
* 'Float64'
"""
return request.param
@pytest.fixture(params=tm.COMPLEX_DTYPES) @pytest.fixture(params=tm.COMPLEX_DTYPES)
def complex_dtype(request): def complex_dtype(request):
""" """
@ -1188,26 +1032,6 @@ def any_nullable_int_dtype(request):
return request.param return request.param
@pytest.fixture(params=tm.ALL_EA_INT_DTYPES + tm.FLOAT_EA_DTYPES)
def any_numeric_dtype(request):
"""
Parameterized fixture for any nullable integer dtype and
any float ea dtypes.
* 'UInt8'
* 'Int8'
* 'UInt16'
* 'Int16'
* 'UInt32'
* 'Int32'
* 'UInt64'
* 'Int64'
* 'Float32'
* 'Float64'
"""
return request.param
@pytest.fixture(params=tm.SIGNED_EA_INT_DTYPES) @pytest.fixture(params=tm.SIGNED_EA_INT_DTYPES)
def any_signed_nullable_int_dtype(request): def any_signed_nullable_int_dtype(request):
""" """
@ -1370,13 +1194,7 @@ def ip():
pytest.importorskip("IPython", minversion="6.0.0") pytest.importorskip("IPython", minversion="6.0.0")
from IPython.core.interactiveshell import InteractiveShell from IPython.core.interactiveshell import InteractiveShell
# GH#35711 make sure sqlite history file handle is not leaked return InteractiveShell()
from traitlets.config import Config # isort:skip
c = Config()
c.HistoryManager.hist_file = ":memory:"
return InteractiveShell(config=c)
@pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"]) @pytest.fixture(params=["bsr", "coo", "csc", "csr", "dia", "dok", "lil"])
@ -1389,6 +1207,15 @@ def spmatrix(request):
return getattr(sparse, request.param + "_matrix") return getattr(sparse, request.param + "_matrix")
@pytest.fixture(params=list(tm.cython_table))
def cython_table_items(request):
"""
Yields a tuple of a function and its corresponding name. Correspond to
the list of aggregator "Cython functions" used on selected table items.
"""
return request.param
@pytest.fixture( @pytest.fixture(
params=[ params=[
getattr(pd.offsets, o) getattr(pd.offsets, o)
@ -1410,39 +1237,3 @@ def sort_by_key(request):
Tests None (no key) and the identity key. Tests None (no key) and the identity key.
""" """
return request.param return request.param
@pytest.fixture()
def fsspectest():
pytest.importorskip("fsspec")
from fsspec import register_implementation
from fsspec.implementations.memory import MemoryFileSystem
from fsspec.registry import _registry as registry
class TestMemoryFS(MemoryFileSystem):
protocol = "testmem"
test = [None]
def __init__(self, **kwargs):
self.test[0] = kwargs.pop("test", None)
super().__init__(**kwargs)
register_implementation("testmem", TestMemoryFS, clobber=True)
yield TestMemoryFS()
registry.pop("testmem", None)
TestMemoryFS.test[0] = None
TestMemoryFS.store.clear()
@pytest.fixture(
params=[
("foo", None, None),
("Egon", "Venkman", None),
("NCC1701D", "NCC1701D", "NCC1701D"),
]
)
def names(request):
"""
A 3-tuple of names, the first two for operands, the last for a result.
"""
return request.param

View file

@ -4,7 +4,7 @@ accessor.py contains base classes for implementing accessor properties
that can be mixed into or pinned onto other pandas classes. that can be mixed into or pinned onto other pandas classes.
""" """
from typing import FrozenSet, List, Set from typing import FrozenSet, Set
import warnings import warnings
from pandas.util._decorators import doc from pandas.util._decorators import doc
@ -12,21 +12,28 @@ from pandas.util._decorators import doc
class DirNamesMixin: class DirNamesMixin:
_accessors: Set[str] = set() _accessors: Set[str] = set()
_hidden_attrs: FrozenSet[str] = frozenset() _deprecations: FrozenSet[str] = frozenset()
def _dir_deletions(self) -> Set[str]: def _dir_deletions(self):
""" """
Delete unwanted __dir__ for this object. Delete unwanted __dir__ for this object.
""" """
return self._accessors | self._hidden_attrs return self._accessors | self._deprecations
def _dir_additions(self) -> Set[str]: def _dir_additions(self):
""" """
Add additional __dir__ for this object. Add additional __dir__ for this object.
""" """
return {accessor for accessor in self._accessors if hasattr(self, accessor)} rv = set()
for accessor in self._accessors:
try:
getattr(self, accessor)
rv.add(accessor)
except AttributeError:
pass
return rv
def __dir__(self) -> List[str]: def __dir__(self):
""" """
Provide method name lookup and completion. Provide method name lookup and completion.
@ -34,7 +41,7 @@ class DirNamesMixin:
----- -----
Only provide 'public' methods. Only provide 'public' methods.
""" """
rv = set(super().__dir__()) rv = set(dir(type(self)))
rv = (rv - self._dir_deletions()) | self._dir_additions() rv = (rv - self._dir_deletions()) | self._dir_additions()
return sorted(rv) return sorted(rv)

View file

@ -6,46 +6,32 @@ kwarg aggregations in groupby and DataFrame/Series aggregation
from collections import defaultdict from collections import defaultdict
from functools import partial from functools import partial
from typing import ( from typing import (
TYPE_CHECKING,
Any, Any,
Callable, Callable,
DefaultDict, DefaultDict,
Dict, Dict,
Iterable,
List, List,
Optional, Optional,
Sequence, Sequence,
Tuple, Tuple,
Union, Union,
cast,
) )
from pandas._typing import ( from pandas._typing import AggFuncType, Label
AggFuncType,
AggFuncTypeBase,
AggFuncTypeDict,
AggObjType,
Axis,
FrameOrSeries,
FrameOrSeriesUnion,
Label,
)
from pandas.core.dtypes.cast import is_nested_object
from pandas.core.dtypes.common import is_dict_like, is_list_like from pandas.core.dtypes.common import is_dict_like, is_list_like
from pandas.core.dtypes.generic import ABCDataFrame, ABCNDFrame, ABCSeries
from pandas.core.base import DataError, SpecificationError from pandas.core.base import SpecificationError
import pandas.core.common as com import pandas.core.common as com
from pandas.core.indexes.api import Index from pandas.core.indexes.api import Index
from pandas.core.series import FrameOrSeriesUnion, Series
if TYPE_CHECKING:
from pandas.core.series import Series
def reconstruct_func( def reconstruct_func(
func: Optional[AggFuncType], **kwargs func: Optional[AggFuncType], **kwargs,
) -> Tuple[bool, Optional[AggFuncType], Optional[List[str]], Optional[List[int]]]: ) -> Tuple[
bool, Optional[AggFuncType], Optional[List[str]], Optional[List[int]],
]:
""" """
This is the internal function to reconstruct func given if there is relabeling This is the internal function to reconstruct func given if there is relabeling
or not and also normalize the keyword to get new order of columns. or not and also normalize the keyword to get new order of columns.
@ -291,13 +277,12 @@ def maybe_mangle_lambdas(agg_spec: Any) -> Any:
def relabel_result( def relabel_result(
result: FrameOrSeries, result: FrameOrSeriesUnion,
func: Dict[str, List[Union[Callable, str]]], func: Dict[str, List[Union[Callable, str]]],
columns: Iterable[Label], columns: Tuple,
order: Iterable[int], order: List[int],
) -> Dict[Label, "Series"]: ) -> Dict[Label, Series]:
""" """Internal function to reorder result if relabelling is True for
Internal function to reorder result if relabelling is True for
dataframe.agg, and return the reordered result in dict. dataframe.agg, and return the reordered result in dict.
Parameters: Parameters:
@ -322,10 +307,10 @@ def relabel_result(
reordered_indexes = [ reordered_indexes = [
pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1]) pair[0] for pair in sorted(zip(columns, order), key=lambda t: t[1])
] ]
reordered_result_in_dict: Dict[Label, "Series"] = {} reordered_result_in_dict: Dict[Label, Series] = {}
idx = 0 idx = 0
reorder_mask = not isinstance(result, ABCSeries) and len(result.columns) > 1 reorder_mask = not isinstance(result, Series) and len(result.columns) > 1
for col, fun in func.items(): for col, fun in func.items():
s = result[col].dropna() s = result[col].dropna()
@ -388,7 +373,7 @@ def validate_func_kwargs(
(['one', 'two'], ['min', 'max']) (['one', 'two'], ['min', 'max'])
""" """
no_arg_message = "Must provide 'func' or named aggregation **kwargs." no_arg_message = "Must provide 'func' or named aggregation **kwargs."
tuple_given_message = "func is expected but received {} in **kwargs." tuple_given_message = "func is expected but recieved {} in **kwargs."
columns = list(kwargs) columns = list(kwargs)
func = [] func = []
for col_func in kwargs.values(): for col_func in kwargs.values():
@ -398,390 +383,3 @@ def validate_func_kwargs(
if not columns: if not columns:
raise TypeError(no_arg_message) raise TypeError(no_arg_message)
return columns, func return columns, func
def transform(
obj: FrameOrSeries, func: AggFuncType, axis: Axis, *args, **kwargs
) -> FrameOrSeriesUnion:
"""
Transform a DataFrame or Series
Parameters
----------
obj : DataFrame or Series
Object to compute the transform on.
func : string, function, list, or dictionary
Function(s) to compute the transform with.
axis : {0 or 'index', 1 or 'columns'}
Axis along which the function is applied:
* 0 or 'index': apply function to each column.
* 1 or 'columns': apply function to each row.
Returns
-------
DataFrame or Series
Result of applying ``func`` along the given axis of the
Series or DataFrame.
Raises
------
ValueError
If the transform function fails or does not transform.
"""
is_series = obj.ndim == 1
if obj._get_axis_number(axis) == 1:
assert not is_series
return transform(obj.T, func, 0, *args, **kwargs).T
if is_list_like(func) and not is_dict_like(func):
func = cast(List[AggFuncTypeBase], func)
# Convert func equivalent dict
if is_series:
func = {com.get_callable_name(v) or v: v for v in func}
else:
func = {col: func for col in obj}
if is_dict_like(func):
func = cast(AggFuncTypeDict, func)
return transform_dict_like(obj, func, *args, **kwargs)
# func is either str or callable
func = cast(AggFuncTypeBase, func)
try:
result = transform_str_or_callable(obj, func, *args, **kwargs)
except Exception:
raise ValueError("Transform function failed")
# Functions that transform may return empty Series/DataFrame
# when the dtype is not appropriate
if isinstance(result, (ABCSeries, ABCDataFrame)) and result.empty:
raise ValueError("Transform function failed")
if not isinstance(result, (ABCSeries, ABCDataFrame)) or not result.index.equals(
obj.index
):
raise ValueError("Function did not transform")
return result
def transform_dict_like(
obj: FrameOrSeries,
func: AggFuncTypeDict,
*args,
**kwargs,
):
"""
Compute transform in the case of a dict-like func
"""
from pandas.core.reshape.concat import concat
if len(func) == 0:
raise ValueError("No transform functions were provided")
if obj.ndim != 1:
# Check for missing columns on a frame
cols = sorted(set(func.keys()) - set(obj.columns))
if len(cols) > 0:
raise SpecificationError(f"Column(s) {cols} do not exist")
# Can't use func.values(); wouldn't work for a Series
if any(is_dict_like(v) for _, v in func.items()):
# GH 15931 - deprecation of renaming keys
raise SpecificationError("nested renamer is not supported")
results: Dict[Label, FrameOrSeriesUnion] = {}
for name, how in func.items():
colg = obj._gotitem(name, ndim=1)
try:
results[name] = transform(colg, how, 0, *args, **kwargs)
except Exception as err:
if (
str(err) == "Function did not transform"
or str(err) == "No transform functions were provided"
):
raise err
# combine results
if len(results) == 0:
raise ValueError("Transform function failed")
return concat(results, axis=1)
def transform_str_or_callable(
obj: FrameOrSeries, func: AggFuncTypeBase, *args, **kwargs
) -> FrameOrSeriesUnion:
"""
Compute transform in the case of a string or callable func
"""
if isinstance(func, str):
return obj._try_aggregate_string_function(func, *args, **kwargs)
if not args and not kwargs:
f = obj._get_cython_func(func)
if f:
return getattr(obj, f)()
# Two possible ways to use a UDF - apply or call directly
try:
return obj.apply(func, args=args, **kwargs)
except Exception:
return func(obj, *args, **kwargs)
def aggregate(
obj: AggObjType,
arg: AggFuncType,
*args,
**kwargs,
):
"""
Provide an implementation for the aggregators.
Parameters
----------
obj : Pandas object to compute aggregation on.
arg : string, dict, function.
*args : args to pass on to the function.
**kwargs : kwargs to pass on to the function.
Returns
-------
tuple of result, how.
Notes
-----
how can be a string describe the required post-processing, or
None if not required.
"""
_axis = kwargs.pop("_axis", None)
if _axis is None:
_axis = getattr(obj, "axis", 0)
if isinstance(arg, str):
return obj._try_aggregate_string_function(arg, *args, **kwargs), None
elif is_dict_like(arg):
arg = cast(AggFuncTypeDict, arg)
return agg_dict_like(obj, arg, _axis), True
elif is_list_like(arg):
# we require a list, but not an 'str'
arg = cast(List[AggFuncTypeBase], arg)
return agg_list_like(obj, arg, _axis=_axis), None
else:
result = None
if callable(arg):
f = obj._get_cython_func(arg)
if f and not args and not kwargs:
return getattr(obj, f)(), None
# caller can react
return result, True
def agg_list_like(
obj: AggObjType,
arg: List[AggFuncTypeBase],
_axis: int,
) -> FrameOrSeriesUnion:
"""
Compute aggregation in the case of a list-like argument.
Parameters
----------
obj : Pandas object to compute aggregation on.
arg : list
Aggregations to compute.
_axis : int, 0 or 1
Axis to compute aggregation on.
Returns
-------
Result of aggregation.
"""
from pandas.core.reshape.concat import concat
if _axis != 0:
raise NotImplementedError("axis other than 0 is not supported")
if obj._selected_obj.ndim == 1:
selected_obj = obj._selected_obj
else:
selected_obj = obj._obj_with_exclusions
results = []
keys = []
# degenerate case
if selected_obj.ndim == 1:
for a in arg:
colg = obj._gotitem(selected_obj.name, ndim=1, subset=selected_obj)
try:
new_res = colg.aggregate(a)
except TypeError:
pass
else:
results.append(new_res)
# make sure we find a good name
name = com.get_callable_name(a) or a
keys.append(name)
# multiples
else:
for index, col in enumerate(selected_obj):
colg = obj._gotitem(col, ndim=1, subset=selected_obj.iloc[:, index])
try:
new_res = colg.aggregate(arg)
except (TypeError, DataError):
pass
except ValueError as err:
# cannot aggregate
if "Must produce aggregated value" in str(err):
# raised directly in _aggregate_named
pass
elif "no results" in str(err):
# raised directly in _aggregate_multiple_funcs
pass
else:
raise
else:
results.append(new_res)
keys.append(col)
# if we are empty
if not len(results):
raise ValueError("no results")
try:
return concat(results, keys=keys, axis=1, sort=False)
except TypeError as err:
# we are concatting non-NDFrame objects,
# e.g. a list of scalars
from pandas import Series
result = Series(results, index=keys, name=obj.name)
if is_nested_object(result):
raise ValueError(
"cannot combine transform and aggregation operations"
) from err
return result
def agg_dict_like(
obj: AggObjType,
arg: AggFuncTypeDict,
_axis: int,
) -> FrameOrSeriesUnion:
"""
Compute aggregation in the case of a dict-like argument.
Parameters
----------
obj : Pandas object to compute aggregation on.
arg : dict
label-aggregation pairs to compute.
_axis : int, 0 or 1
Axis to compute aggregation on.
Returns
-------
Result of aggregation.
"""
is_aggregator = lambda x: isinstance(x, (list, tuple, dict))
if _axis != 0: # pragma: no cover
raise ValueError("Can only pass dict with axis=0")
selected_obj = obj._selected_obj
# if we have a dict of any non-scalars
# eg. {'A' : ['mean']}, normalize all to
# be list-likes
if any(is_aggregator(x) for x in arg.values()):
new_arg: AggFuncTypeDict = {}
for k, v in arg.items():
if not isinstance(v, (tuple, list, dict)):
new_arg[k] = [v]
else:
new_arg[k] = v
# the keys must be in the columns
# for ndim=2, or renamers for ndim=1
# ok for now, but deprecated
# {'A': { 'ra': 'mean' }}
# {'A': { 'ra': ['mean'] }}
# {'ra': ['mean']}
# not ok
# {'ra' : { 'A' : 'mean' }}
if isinstance(v, dict):
raise SpecificationError("nested renamer is not supported")
elif isinstance(selected_obj, ABCSeries):
raise SpecificationError("nested renamer is not supported")
elif (
isinstance(selected_obj, ABCDataFrame) and k not in selected_obj.columns
):
raise KeyError(f"Column '{k}' does not exist!")
arg = new_arg
else:
# deprecation of renaming keys
# GH 15931
keys = list(arg.keys())
if isinstance(selected_obj, ABCDataFrame) and len(
selected_obj.columns.intersection(keys)
) != len(keys):
cols = sorted(set(keys) - set(selected_obj.columns.intersection(keys)))
raise SpecificationError(f"Column(s) {cols} do not exist")
from pandas.core.reshape.concat import concat
if selected_obj.ndim == 1:
# key only used for output
colg = obj._gotitem(obj._selection, ndim=1)
results = {key: colg.agg(how) for key, how in arg.items()}
else:
# key used for column selection and output
results = {key: obj._gotitem(key, ndim=1).agg(how) for key, how in arg.items()}
# set the final keys
keys = list(arg.keys())
# Avoid making two isinstance calls in all and any below
is_ndframe = [isinstance(r, ABCNDFrame) for r in results.values()]
# combine results
if all(is_ndframe):
keys_to_use = [k for k in keys if not results[k].empty]
# Have to check, if at least one DataFrame is not empty.
keys_to_use = keys_to_use if keys_to_use != [] else keys
axis = 0 if isinstance(obj, ABCSeries) else 1
result = concat({k: results[k] for k in keys_to_use}, axis=axis)
elif any(is_ndframe):
# There is a mix of NDFrames and scalars
raise ValueError(
"cannot perform both aggregation "
"and transformation operations "
"simultaneously"
)
else:
from pandas import Series
# we have a dict of scalars
# GH 36212 use name only if obj is a series
if obj.ndim == 1:
obj = cast("Series", obj)
name = obj.name
else:
name = None
result = Series(results, name=name)
return result

View file

@ -2,17 +2,15 @@
Generic data algorithms. This module is experimental at the moment and not Generic data algorithms. This module is experimental at the moment and not
intended for public consumption intended for public consumption
""" """
from __future__ import annotations
import operator import operator
from textwrap import dedent from textwrap import dedent
from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union, cast from typing import TYPE_CHECKING, Dict, Optional, Tuple, Union
from warnings import catch_warnings, simplefilter, warn from warnings import catch_warnings, simplefilter, warn
import numpy as np import numpy as np
from pandas._libs import Timestamp, algos, hashtable as htable, iNaT, lib from pandas._libs import Timestamp, algos, hashtable as htable, iNaT, lib
from pandas._typing import AnyArrayLike, ArrayLike, DtypeObj, FrameOrSeriesUnion from pandas._typing import AnyArrayLike, ArrayLike, DtypeObj
from pandas.util._decorators import doc from pandas.util._decorators import doc
from pandas.core.dtypes.cast import ( from pandas.core.dtypes.cast import (
@ -50,9 +48,9 @@ from pandas.core.dtypes.common import (
from pandas.core.dtypes.generic import ( from pandas.core.dtypes.generic import (
ABCDatetimeArray, ABCDatetimeArray,
ABCExtensionArray, ABCExtensionArray,
ABCIndex,
ABCIndexClass, ABCIndexClass,
ABCMultiIndex, ABCMultiIndex,
ABCRangeIndex,
ABCSeries, ABCSeries,
ABCTimedeltaArray, ABCTimedeltaArray,
) )
@ -62,7 +60,7 @@ from pandas.core.construction import array, extract_array
from pandas.core.indexers import validate_indices from pandas.core.indexers import validate_indices
if TYPE_CHECKING: if TYPE_CHECKING:
from pandas import Categorical, DataFrame, Index, Series from pandas import Series
_shared_docs: Dict[str, str] = {} _shared_docs: Dict[str, str] = {}
@ -71,7 +69,7 @@ _shared_docs: Dict[str, str] = {}
# dtype access # # dtype access #
# --------------- # # --------------- #
def _ensure_data( def _ensure_data(
values: ArrayLike, dtype: Optional[DtypeObj] = None values, dtype: Optional[DtypeObj] = None
) -> Tuple[np.ndarray, DtypeObj]: ) -> Tuple[np.ndarray, DtypeObj]:
""" """
routine to ensure that our data is of the correct routine to ensure that our data is of the correct
@ -97,12 +95,6 @@ def _ensure_data(
pandas_dtype : np.dtype or ExtensionDtype pandas_dtype : np.dtype or ExtensionDtype
""" """
if dtype is not None:
# We only have non-None dtype when called from `isin`, and
# both Datetimelike and Categorical dispatch before getting here.
assert not needs_i8_conversion(dtype)
assert not is_categorical_dtype(dtype)
if not isinstance(values, ABCMultiIndex): if not isinstance(values, ABCMultiIndex):
# extract_array would raise # extract_array would raise
values = extract_array(values, extract_numpy=True) values = extract_array(values, extract_numpy=True)
@ -139,20 +131,21 @@ def _ensure_data(
return ensure_object(values), np.dtype("object") return ensure_object(values), np.dtype("object")
# datetimelike # datetimelike
if needs_i8_conversion(values.dtype) or needs_i8_conversion(dtype): vals_dtype = getattr(values, "dtype", None)
if is_period_dtype(values.dtype) or is_period_dtype(dtype): if needs_i8_conversion(vals_dtype) or needs_i8_conversion(dtype):
if is_period_dtype(vals_dtype) or is_period_dtype(dtype):
from pandas import PeriodIndex from pandas import PeriodIndex
values = PeriodIndex(values)._data values = PeriodIndex(values)
dtype = values.dtype dtype = values.dtype
elif is_timedelta64_dtype(values.dtype) or is_timedelta64_dtype(dtype): elif is_timedelta64_dtype(vals_dtype) or is_timedelta64_dtype(dtype):
from pandas import TimedeltaIndex from pandas import TimedeltaIndex
values = TimedeltaIndex(values)._data values = TimedeltaIndex(values)
dtype = values.dtype dtype = values.dtype
else: else:
# Datetime # Datetime
if values.ndim > 1 and is_datetime64_ns_dtype(values.dtype): if values.ndim > 1 and is_datetime64_ns_dtype(vals_dtype):
# Avoid calling the DatetimeIndex constructor as it is 1D only # Avoid calling the DatetimeIndex constructor as it is 1D only
# Note: this is reached by DataFrame.rank calls GH#27027 # Note: this is reached by DataFrame.rank calls GH#27027
# TODO(EA2D): special case not needed with 2D EAs # TODO(EA2D): special case not needed with 2D EAs
@ -162,15 +155,14 @@ def _ensure_data(
from pandas import DatetimeIndex from pandas import DatetimeIndex
values = DatetimeIndex(values)._data values = DatetimeIndex(values)
dtype = values.dtype dtype = values.dtype
return values.asi8, dtype return values.asi8, dtype
elif is_categorical_dtype(values.dtype) and ( elif is_categorical_dtype(vals_dtype) and (
is_categorical_dtype(dtype) or dtype is None is_categorical_dtype(dtype) or dtype is None
): ):
values = cast("Categorical", values)
values = values.codes values = values.codes
dtype = pandas_dtype("category") dtype = pandas_dtype("category")
@ -234,8 +226,7 @@ def _ensure_arraylike(values):
""" """
if not is_array_like(values): if not is_array_like(values):
inferred = lib.infer_dtype(values, skipna=False) inferred = lib.infer_dtype(values, skipna=False)
if inferred in ["mixed", "string", "mixed-integer"]: if inferred in ["mixed", "string"]:
# "mixed-integer" to ensure we do not cast ["ss", 42] to str GH#22160
if isinstance(values, tuple): if isinstance(values, tuple):
values = list(values) values = list(values)
values = construct_1d_object_array_from_listlike(values) values = construct_1d_object_array_from_listlike(values)
@ -253,11 +244,11 @@ _hashtables = {
} }
def _get_hashtable_algo(values: np.ndarray): def _get_hashtable_algo(values):
""" """
Parameters Parameters
---------- ----------
values : np.ndarray values : arraylike
Returns Returns
------- -------
@ -271,15 +262,15 @@ def _get_hashtable_algo(values: np.ndarray):
return htable, values return htable, values
def _get_values_for_rank(values: ArrayLike): def _get_values_for_rank(values):
if is_categorical_dtype(values): if is_categorical_dtype(values):
values = cast("Categorical", values)._values_for_rank() values = values._values_for_rank()
values, _ = _ensure_data(values) values, _ = _ensure_data(values)
return values return values
def get_data_algo(values: ArrayLike): def _get_data_algo(values):
values = _get_values_for_rank(values) values = _get_values_for_rank(values)
ndtype = _check_object_for_strings(values) ndtype = _check_object_for_strings(values)
@ -295,6 +286,7 @@ def _check_object_for_strings(values) -> str:
Parameters Parameters
---------- ----------
values : ndarray values : ndarray
ndtype : str
Returns Returns
------- -------
@ -437,64 +429,54 @@ def isin(comps: AnyArrayLike, values: AnyArrayLike) -> np.ndarray:
f"to isin(), you passed a [{type(values).__name__}]" f"to isin(), you passed a [{type(values).__name__}]"
) )
if not isinstance( if not isinstance(values, (ABCIndex, ABCSeries, ABCExtensionArray, np.ndarray)):
values, (ABCIndexClass, ABCSeries, ABCExtensionArray, np.ndarray) values = construct_1d_object_array_from_listlike(list(values))
): # TODO: could use ensure_arraylike here
values = _ensure_arraylike(list(values))
elif isinstance(values, ABCMultiIndex):
# Avoid raising in extract_array
values = np.array(values)
else:
values = extract_array(values, extract_numpy=True)
comps = _ensure_arraylike(comps)
comps = extract_array(comps, extract_numpy=True) comps = extract_array(comps, extract_numpy=True)
if is_categorical_dtype(comps.dtype): if is_categorical_dtype(comps):
# TODO(extension) # TODO(extension)
# handle categoricals # handle categoricals
return cast("Categorical", comps).isin(values) return comps.isin(values) # type: ignore
if needs_i8_conversion(comps.dtype): comps, dtype = _ensure_data(comps)
# Dispatch to DatetimeLikeArrayMixin.isin values, _ = _ensure_data(values, dtype=dtype)
return array(comps).isin(values)
elif needs_i8_conversion(values.dtype) and not is_object_dtype(comps.dtype):
# e.g. comps are integers and values are datetime64s
return np.zeros(comps.shape, dtype=bool)
# TODO: not quite right ... Sparse/Categorical
elif needs_i8_conversion(values.dtype):
return isin(comps, values.astype(object))
elif is_extension_array_dtype(comps.dtype) or is_extension_array_dtype( # faster for larger cases to use np.in1d
values.dtype f = htable.ismember_object
):
return isin(np.asarray(comps), np.asarray(values))
# GH16012 # GH16012
# Ensure np.in1d doesn't get object types or it *may* throw an exception # Ensure np.in1d doesn't get object types or it *may* throw an exception
# Albeit hashmap has O(1) look-up (vs. O(logn) in sorted array), if len(comps) > 1_000_000 and not is_object_dtype(comps):
# in1d is faster for small sizes # If the the values include nan we need to check for nan explicitly
if len(comps) > 1_000_000 and len(values) <= 26 and not is_object_dtype(comps):
# If the values include nan we need to check for nan explicitly
# since np.nan it not equal to np.nan # since np.nan it not equal to np.nan
if isna(values).any(): if isna(values).any():
f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c)) f = lambda c, v: np.logical_or(np.in1d(c, v), np.isnan(c))
else: else:
f = np.in1d f = np.in1d
elif is_integer_dtype(comps):
try:
values = values.astype("int64", copy=False)
comps = comps.astype("int64", copy=False)
f = htable.ismember_int64
except (TypeError, ValueError, OverflowError):
values = values.astype(object)
comps = comps.astype(object)
else: elif is_float_dtype(comps):
common = np.find_common_type([values.dtype, comps.dtype], []) try:
values = values.astype(common, copy=False) values = values.astype("float64", copy=False)
comps = comps.astype(common, copy=False) comps = comps.astype("float64", copy=False)
name = common.name f = htable.ismember_float64
if name == "bool": except (TypeError, ValueError):
name = "uint8" values = values.astype(object)
f = getattr(htable, f"ismember_{name}") comps = comps.astype(object)
return f(comps, values) return f(comps, values)
def factorize_array( def _factorize_array(
values: np.ndarray, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None values, na_sentinel: int = -1, size_hint=None, na_value=None, mask=None,
) -> Tuple[np.ndarray, np.ndarray]: ) -> Tuple[np.ndarray, np.ndarray]:
""" """
Factorize an array-like to codes and uniques. Factorize an array-like to codes and uniques.
@ -522,7 +504,7 @@ def factorize_array(
codes : ndarray codes : ndarray
uniques : ndarray uniques : ndarray
""" """
hash_klass, values = get_data_algo(values) hash_klass, values = _get_data_algo(values)
table = hash_klass(size_hint or len(values)) table = hash_klass(size_hint or len(values))
uniques, codes = table.factorize( uniques, codes = table.factorize(
@ -560,7 +542,7 @@ def factorize(
sort: bool = False, sort: bool = False,
na_sentinel: Optional[int] = -1, na_sentinel: Optional[int] = -1,
size_hint: Optional[int] = None, size_hint: Optional[int] = None,
) -> Tuple[np.ndarray, Union[np.ndarray, "Index"]]: ) -> Tuple[np.ndarray, Union[np.ndarray, ABCIndex]]:
""" """
Encode the object as an enumerated type or categorical variable. Encode the object as an enumerated type or categorical variable.
@ -680,9 +662,6 @@ def factorize(
# responsible only for factorization. All data coercion, sorting and boxing # responsible only for factorization. All data coercion, sorting and boxing
# should happen here. # should happen here.
if isinstance(values, ABCRangeIndex):
return values.factorize(sort=sort)
values = _ensure_arraylike(values) values = _ensure_arraylike(values)
original = values original = values
if not isinstance(values, ABCMultiIndex): if not isinstance(values, ABCMultiIndex):
@ -719,7 +698,7 @@ def factorize(
else: else:
na_value = None na_value = None
codes, uniques = factorize_array( codes, uniques = _factorize_array(
values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value values, na_sentinel=na_sentinel, size_hint=size_hint, na_value=na_value
) )
@ -740,8 +719,6 @@ def factorize(
# return original tenor # return original tenor
if isinstance(original, ABCIndexClass): if isinstance(original, ABCIndexClass):
if original.dtype.kind in ["m", "M"] and isinstance(uniques, np.ndarray):
uniques = type(original._data)._simple_new(uniques, dtype=original.dtype)
uniques = original._shallow_copy(uniques, name=None) uniques = original._shallow_copy(uniques, name=None)
elif isinstance(original, ABCSeries): elif isinstance(original, ABCSeries):
from pandas import Index from pandas import Index
@ -758,7 +735,7 @@ def value_counts(
normalize: bool = False, normalize: bool = False,
bins=None, bins=None,
dropna: bool = True, dropna: bool = True,
) -> Series: ) -> "Series":
""" """
Compute a histogram of the counts of non-null values. Compute a histogram of the counts of non-null values.
@ -817,7 +794,7 @@ def value_counts(
counts = result._values counts = result._values
else: else:
keys, counts = value_counts_arraylike(values, dropna) keys, counts = _value_counts_arraylike(values, dropna)
result = Series(counts, index=keys, name=name) result = Series(counts, index=keys, name=name)
@ -830,8 +807,8 @@ def value_counts(
return result return result
# Called once from SparseArray, otherwise could be private # Called once from SparseArray
def value_counts_arraylike(values, dropna: bool): def _value_counts_arraylike(values, dropna: bool):
""" """
Parameters Parameters
---------- ----------
@ -875,7 +852,7 @@ def value_counts_arraylike(values, dropna: bool):
return keys, counts return keys, counts
def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray: def duplicated(values, keep="first") -> np.ndarray:
""" """
Return boolean ndarray denoting duplicate values. Return boolean ndarray denoting duplicate values.
@ -900,7 +877,7 @@ def duplicated(values: ArrayLike, keep: str = "first") -> np.ndarray:
return f(values, keep=keep) return f(values, keep=keep)
def mode(values, dropna: bool = True) -> Series: def mode(values, dropna: bool = True) -> "Series":
""" """
Returns the mode(s) of an array. Returns the mode(s) of an array.
@ -1068,10 +1045,11 @@ def checked_add_with_arr(arr, b, arr_mask=None, b_mask=None):
to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any() to_raise = ((np.iinfo(np.int64).max - b2 < arr) & not_nan).any()
else: else:
to_raise = ( to_raise = (
(np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1] ((np.iinfo(np.int64).max - b2[mask1] < arr[mask1]) & not_nan[mask1]).any()
).any() or ( or (
(np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2] (np.iinfo(np.int64).min - b2[mask2] > arr[mask2]) & not_nan[mask2]
).any() ).any()
)
if to_raise: if to_raise:
raise OverflowError("Overflow in int64 addition") raise OverflowError("Overflow in int64 addition")
@ -1176,9 +1154,6 @@ class SelectN:
if self.keep not in ("first", "last", "all"): if self.keep not in ("first", "last", "all"):
raise ValueError('keep must be either "first", "last" or "all"') raise ValueError('keep must be either "first", "last" or "all"')
def compute(self, method: str) -> FrameOrSeriesUnion:
raise NotImplementedError
def nlargest(self): def nlargest(self):
return self.compute("nlargest") return self.compute("nlargest")
@ -1211,7 +1186,7 @@ class SelectNSeries(SelectN):
nordered : Series nordered : Series
""" """
def compute(self, method: str) -> Series: def compute(self, method):
n = self.n n = self.n
dtype = self.obj.dtype dtype = self.obj.dtype
@ -1225,8 +1200,10 @@ class SelectNSeries(SelectN):
# slow method # slow method
if n >= len(self.obj): if n >= len(self.obj):
reverse_it = self.keep == "last" or method == "nlargest"
ascending = method == "nsmallest" ascending = method == "nsmallest"
return dropped.sort_values(ascending=ascending).head(n) slc = np.s_[::-1] if reverse_it else np.s_[:]
return dropped[slc].sort_values(ascending=ascending).head(n)
# fast method # fast method
arr, pandas_dtype = _ensure_data(dropped.values) arr, pandas_dtype = _ensure_data(dropped.values)
@ -1283,7 +1260,7 @@ class SelectNFrame(SelectN):
columns = list(columns) columns = list(columns)
self.columns = columns self.columns = columns
def compute(self, method: str) -> DataFrame: def compute(self, method):
from pandas import Int64Index from pandas import Int64Index
@ -1571,6 +1548,8 @@ def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None)
""" """
Take elements from an array. Take elements from an array.
.. versionadded:: 0.23.0
Parameters Parameters
---------- ----------
arr : sequence arr : sequence
@ -1588,7 +1567,7 @@ def take(arr, indices, axis: int = 0, allow_fill: bool = False, fill_value=None)
* True: negative values in `indices` indicate * True: negative values in `indices` indicate
missing values. These values are set to `fill_value`. Any other missing values. These values are set to `fill_value`. Any other
negative values raise a ``ValueError``. other negative values raise a ``ValueError``.
fill_value : any, optional fill_value : any, optional
Fill value to use for NA-indices when `allow_fill` is True. Fill value to use for NA-indices when `allow_fill` is True.
@ -1694,8 +1673,7 @@ def take_nd(
""" """
mask_info = None mask_info = None
if isinstance(arr, ABCExtensionArray): if is_extension_array_dtype(arr):
# Check for EA to catch DatetimeArray, TimedeltaArray
return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill) return arr.take(indexer, fill_value=fill_value, allow_fill=allow_fill)
arr = extract_array(arr) arr = extract_array(arr)
@ -1826,7 +1804,7 @@ def take_2d_multi(arr, indexer, fill_value=np.nan):
# ------------ # # ------------ #
def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray: def searchsorted(arr, value, side="left", sorter=None):
""" """
Find indices where elements should be inserted to maintain order. Find indices where elements should be inserted to maintain order.
@ -1875,7 +1853,7 @@ def searchsorted(arr, value, side="left", sorter=None) -> np.ndarray:
if ( if (
isinstance(arr, np.ndarray) isinstance(arr, np.ndarray)
and is_integer_dtype(arr.dtype) and is_integer_dtype(arr)
and (is_integer(value) or is_integer_dtype(value)) and (is_integer(value) or is_integer_dtype(value))
): ):
# if `arr` and `value` have different dtypes, `arr` would be # if `arr` and `value` have different dtypes, `arr` would be
@ -1953,8 +1931,6 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
if is_extension_array_dtype(dtype): if is_extension_array_dtype(dtype):
if hasattr(arr, f"__{op.__name__}__"): if hasattr(arr, f"__{op.__name__}__"):
if axis != 0:
raise ValueError(f"cannot diff {type(arr).__name__} on axis={axis}")
return op(arr, arr.shift(n)) return op(arr, arr.shift(n))
else: else:
warn( warn(
@ -1969,26 +1945,18 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
is_timedelta = False is_timedelta = False
is_bool = False is_bool = False
if needs_i8_conversion(arr.dtype): if needs_i8_conversion(arr.dtype):
dtype = np.int64 dtype = np.float64
arr = arr.view("i8") arr = arr.view("i8")
na = iNaT na = iNaT
is_timedelta = True is_timedelta = True
elif is_bool_dtype(dtype): elif is_bool_dtype(dtype):
# We have to cast in order to be able to hold np.nan
dtype = np.object_ dtype = np.object_
is_bool = True is_bool = True
elif is_integer_dtype(dtype): elif is_integer_dtype(dtype):
# We have to cast in order to be able to hold np.nan
dtype = np.float64 dtype = np.float64
orig_ndim = arr.ndim
if orig_ndim == 1:
# reshape so we can always use algos.diff_2d
arr = arr.reshape(-1, 1)
# TODO: require axis == 0
dtype = np.dtype(dtype) dtype = np.dtype(dtype)
out_arr = np.empty(arr.shape, dtype=dtype) out_arr = np.empty(arr.shape, dtype=dtype)
@ -1999,7 +1967,7 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
if arr.ndim == 2 and arr.dtype.name in _diff_special: if arr.ndim == 2 and arr.dtype.name in _diff_special:
# TODO: can diff_2d dtype specialization troubles be fixed by defining # TODO: can diff_2d dtype specialization troubles be fixed by defining
# out_arr inside diff_2d? # out_arr inside diff_2d?
algos.diff_2d(arr, out_arr, n, axis, datetimelike=is_timedelta) algos.diff_2d(arr, out_arr, n, axis)
else: else:
# To keep mypy happy, _res_indexer is a list while res_indexer is # To keep mypy happy, _res_indexer is a list while res_indexer is
# a tuple, ditto for lag_indexer. # a tuple, ditto for lag_indexer.
@ -2033,10 +2001,8 @@ def diff(arr, n: int, axis: int = 0, stacklevel=3):
out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer] out_arr[res_indexer] = arr[res_indexer] - arr[lag_indexer]
if is_timedelta: if is_timedelta:
out_arr = out_arr.view("timedelta64[ns]") out_arr = out_arr.astype("int64").view("timedelta64[ns]")
if orig_ndim == 1:
out_arr = out_arr[:, 0]
return out_arr return out_arr
@ -2100,30 +2066,32 @@ def safe_sort(
"Only list-like objects are allowed to be passed to safe_sort as values" "Only list-like objects are allowed to be passed to safe_sort as values"
) )
if not isinstance(values, (np.ndarray, ABCExtensionArray)): if not isinstance(values, np.ndarray) and not is_extension_array_dtype(values):
# don't convert to string types # don't convert to string types
dtype, _ = infer_dtype_from_array(values) dtype, _ = infer_dtype_from_array(values)
values = np.asarray(values, dtype=dtype) values = np.asarray(values, dtype=dtype)
sorter = None def sort_mixed(values):
# order ints before strings, safe in py3
str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
return np.concatenate([nums, np.asarray(strs, dtype=object)])
sorter = None
if ( if (
not is_extension_array_dtype(values) not is_extension_array_dtype(values)
and lib.infer_dtype(values, skipna=False) == "mixed-integer" and lib.infer_dtype(values, skipna=False) == "mixed-integer"
): ):
ordered = _sort_mixed(values) # unorderable in py3 if mixed str/int
ordered = sort_mixed(values)
else: else:
try: try:
sorter = values.argsort() sorter = values.argsort()
ordered = values.take(sorter) ordered = values.take(sorter)
except TypeError: except TypeError:
# Previous sorters failed or were not applicable, try `_sort_mixed` # try this anyway
# which would work, but which fails for special case of 1d arrays ordered = sort_mixed(values)
# with tuples.
if values.size and isinstance(values[0], tuple):
ordered = _sort_tuples(values)
else:
ordered = _sort_mixed(values)
# codes: # codes:
@ -2142,7 +2110,7 @@ def safe_sort(
if sorter is None: if sorter is None:
# mixed types # mixed types
hash_klass, values = get_data_algo(values) hash_klass, values = _get_data_algo(values)
t = hash_klass(len(values)) t = hash_klass(len(values))
t.map_locations(values) t.map_locations(values)
sorter = ensure_platform_int(t.lookup(ordered)) sorter = ensure_platform_int(t.lookup(ordered))
@ -2170,26 +2138,3 @@ def safe_sort(
np.putmask(new_codes, mask, na_sentinel) np.putmask(new_codes, mask, na_sentinel)
return ordered, ensure_platform_int(new_codes) return ordered, ensure_platform_int(new_codes)
def _sort_mixed(values):
""" order ints before strings in 1d arrays, safe in py3 """
str_pos = np.array([isinstance(x, str) for x in values], dtype=bool)
nums = np.sort(values[~str_pos])
strs = np.sort(values[str_pos])
return np.concatenate([nums, np.asarray(strs, dtype=object)])
def _sort_tuples(values: np.ndarray[tuple]):
"""
Convert array of tuples (1d) to array or array (2d).
We need to keep the columns separately as they contain different types and
nans (can't use `np.sort` as it may fail when str and nan are mixed in a
column as types cannot be compared).
"""
from pandas.core.internals.construction import to_arrays
from pandas.core.sorting import lexsort_indexer
arrays, _ = to_arrays(values, None)
indexer = lexsort_indexer(arrays, orders=True)
return values[indexer]

View file

@ -14,7 +14,6 @@ from pandas.core.dtypes.missing import isna, isnull, notna, notnull
from pandas.core.algorithms import factorize, unique, value_counts from pandas.core.algorithms import factorize, unique, value_counts
from pandas.core.arrays import Categorical from pandas.core.arrays import Categorical
from pandas.core.arrays.boolean import BooleanDtype from pandas.core.arrays.boolean import BooleanDtype
from pandas.core.arrays.floating import Float32Dtype, Float64Dtype
from pandas.core.arrays.integer import ( from pandas.core.arrays.integer import (
Int8Dtype, Int8Dtype,
Int16Dtype, Int16Dtype,
@ -27,7 +26,6 @@ from pandas.core.arrays.integer import (
) )
from pandas.core.arrays.string_ import StringDtype from pandas.core.arrays.string_ import StringDtype
from pandas.core.construction import array from pandas.core.construction import array
from pandas.core.flags import Flags
from pandas.core.groupby import Grouper, NamedAgg from pandas.core.groupby import Grouper, NamedAgg
from pandas.core.indexes.api import ( from pandas.core.indexes.api import (
CategoricalIndex, CategoricalIndex,

View file

@ -1,12 +1,12 @@
import abc import abc
import inspect import inspect
from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Tuple, Type from typing import TYPE_CHECKING, Any, Dict, Iterator, Optional, Tuple, Type, Union
import numpy as np import numpy as np
from pandas._config import option_context from pandas._config import option_context
from pandas._typing import Axis, FrameOrSeriesUnion from pandas._typing import Axis
from pandas.util._decorators import cache_readonly from pandas.util._decorators import cache_readonly
from pandas.core.dtypes.common import ( from pandas.core.dtypes.common import (
@ -31,6 +31,7 @@ def frame_apply(
axis: Axis = 0, axis: Axis = 0,
raw: bool = False, raw: bool = False,
result_type: Optional[str] = None, result_type: Optional[str] = None,
ignore_failures: bool = False,
args=None, args=None,
kwds=None, kwds=None,
): ):
@ -47,6 +48,7 @@ def frame_apply(
func, func,
raw=raw, raw=raw,
result_type=result_type, result_type=result_type,
ignore_failures=ignore_failures,
args=args, args=args,
kwds=kwds, kwds=kwds,
) )
@ -76,7 +78,7 @@ class FrameApply(metaclass=abc.ABCMeta):
@abc.abstractmethod @abc.abstractmethod
def wrap_results_for_axis( def wrap_results_for_axis(
self, results: ResType, res_index: "Index" self, results: ResType, res_index: "Index"
) -> FrameOrSeriesUnion: ) -> Union["Series", "DataFrame"]:
pass pass
# --------------------------------------------------------------- # ---------------------------------------------------------------
@ -87,11 +89,13 @@ class FrameApply(metaclass=abc.ABCMeta):
func, func,
raw: bool, raw: bool,
result_type: Optional[str], result_type: Optional[str],
ignore_failures: bool,
args, args,
kwds, kwds,
): ):
self.obj = obj self.obj = obj
self.raw = raw self.raw = raw
self.ignore_failures = ignore_failures
self.args = args or () self.args = args or ()
self.kwds = kwds or {} self.kwds = kwds or {}
@ -142,11 +146,7 @@ class FrameApply(metaclass=abc.ABCMeta):
""" compute the results """ """ compute the results """
# dispatch to agg # dispatch to agg
if is_list_like(self.f) or is_dict_like(self.f): if is_list_like(self.f) or is_dict_like(self.f):
# pandas\core\apply.py:144: error: "aggregate" of "DataFrame" gets return self.obj.aggregate(self.f, axis=self.axis, *self.args, **self.kwds)
# multiple values for keyword argument "axis"
return self.obj.aggregate( # type: ignore[misc]
self.f, axis=self.axis, *self.args, **self.kwds
)
# all empty # all empty
if len(self.columns) == 0 and len(self.index) == 0: if len(self.columns) == 0 and len(self.index) == 0:
@ -284,18 +284,35 @@ class FrameApply(metaclass=abc.ABCMeta):
results = {} results = {}
with option_context("mode.chained_assignment", None): if self.ignore_failures:
successes = []
for i, v in enumerate(series_gen): for i, v in enumerate(series_gen):
# ignore SettingWithCopy here in case the user mutates try:
results[i] = self.f(v) results[i] = self.f(v)
if isinstance(results[i], ABCSeries): except Exception:
# If we have a view on v, we need to make a copy because pass
# series_generator will swap out the underlying data else:
results[i] = results[i].copy(deep=False) successes.append(i)
# so will work with MultiIndex
if len(successes) < len(res_index):
res_index = res_index.take(successes)
else:
with option_context("mode.chained_assignment", None):
for i, v in enumerate(series_gen):
# ignore SettingWithCopy here in case the user mutates
results[i] = self.f(v)
if isinstance(results[i], ABCSeries):
# If we have a view on v, we need to make a copy because
# series_generator will swap out the underlying data
results[i] = results[i].copy(deep=False)
return results, res_index return results, res_index
def wrap_results(self, results: ResType, res_index: "Index") -> FrameOrSeriesUnion: def wrap_results(
self, results: ResType, res_index: "Index"
) -> Union["Series", "DataFrame"]:
from pandas import Series from pandas import Series
# see if we can infer the results # see if we can infer the results
@ -339,7 +356,7 @@ class FrameRowApply(FrameApply):
def wrap_results_for_axis( def wrap_results_for_axis(
self, results: ResType, res_index: "Index" self, results: ResType, res_index: "Index"
) -> FrameOrSeriesUnion: ) -> Union["Series", "DataFrame"]:
""" return the results for the rows """ """ return the results for the rows """
if self.result_type == "reduce": if self.result_type == "reduce":
@ -352,10 +369,8 @@ class FrameRowApply(FrameApply):
isinstance(x, dict) for x in results.values() isinstance(x, dict) for x in results.values()
): ):
# Our operation was a to_dict op e.g. # Our operation was a to_dict op e.g.
# test_apply_dict GH#8735, test_apply_reduce_to_dict GH#25196 #37544 # test_apply_dict GH#8735, test_apply_reduce_rows_to_dict GH#25196
res = self.obj._constructor_sliced(results) return self.obj._constructor_sliced(results)
res.index = res_index
return res
try: try:
result = self.obj._constructor(data=results) result = self.obj._constructor(data=results)
@ -422,9 +437,9 @@ class FrameColumnApply(FrameApply):
def wrap_results_for_axis( def wrap_results_for_axis(
self, results: ResType, res_index: "Index" self, results: ResType, res_index: "Index"
) -> FrameOrSeriesUnion: ) -> Union["Series", "DataFrame"]:
""" return the results for the columns """ """ return the results for the columns """
result: FrameOrSeriesUnion result: Union["Series", "DataFrame"]
# we have requested to expand # we have requested to expand
if self.result_type == "expand": if self.result_type == "expand":

View file

@ -8,7 +8,7 @@ from typing import Callable
import numpy as np import numpy as np
from pandas._libs import missing as libmissing from pandas._libs import missing as libmissing
from pandas.compat.numpy import np_version_under1p17 from pandas.compat.numpy import _np_version_under1p17
from pandas.core.nanops import check_below_min_count from pandas.core.nanops import check_below_min_count
@ -17,7 +17,6 @@ def _sumprod(
func: Callable, func: Callable,
values: np.ndarray, values: np.ndarray,
mask: np.ndarray, mask: np.ndarray,
*,
skipna: bool = True, skipna: bool = True,
min_count: int = 0, min_count: int = 0,
): ):
@ -47,31 +46,25 @@ def _sumprod(
if check_below_min_count(values.shape, mask, min_count): if check_below_min_count(values.shape, mask, min_count):
return libmissing.NA return libmissing.NA
if np_version_under1p17: if _np_version_under1p17:
return func(values[~mask]) return func(values[~mask])
else: else:
return func(values, where=~mask) return func(values, where=~mask)
def sum( def sum(values: np.ndarray, mask: np.ndarray, skipna: bool = True, min_count: int = 0):
values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0
):
return _sumprod( return _sumprod(
np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count np.sum, values=values, mask=mask, skipna=skipna, min_count=min_count
) )
def prod( def prod(values: np.ndarray, mask: np.ndarray, skipna: bool = True, min_count: int = 0):
values: np.ndarray, mask: np.ndarray, *, skipna: bool = True, min_count: int = 0
):
return _sumprod( return _sumprod(
np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count np.prod, values=values, mask=mask, skipna=skipna, min_count=min_count
) )
def _minmax( def _minmax(func: Callable, values: np.ndarray, mask: np.ndarray, skipna: bool = True):
func: Callable, values: np.ndarray, mask: np.ndarray, *, skipna: bool = True
):
""" """
Reduction for 1D masked array. Reduction for 1D masked array.
@ -101,9 +94,9 @@ def _minmax(
return libmissing.NA return libmissing.NA
def min(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True): def min(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
return _minmax(np.min, values=values, mask=mask, skipna=skipna) return _minmax(np.min, values=values, mask=mask, skipna=skipna)
def max(values: np.ndarray, mask: np.ndarray, *, skipna: bool = True): def max(values: np.ndarray, mask: np.ndarray, skipna: bool = True):
return _minmax(np.max, values=values, mask=mask, skipna=skipna) return _minmax(np.max, values=values, mask=mask, skipna=skipna)

View file

@ -1,133 +0,0 @@
"""
Methods used by Block.replace and related methods.
"""
import operator
import re
from typing import Optional, Pattern, Union
import numpy as np
from pandas._typing import ArrayLike, Scalar
from pandas.core.dtypes.common import (
is_datetimelike_v_numeric,
is_numeric_v_string_like,
is_re,
is_scalar,
)
from pandas.core.dtypes.missing import isna
def compare_or_regex_search(
a: ArrayLike, b: Union[Scalar, Pattern], regex: bool, mask: ArrayLike
) -> Union[ArrayLike, bool]:
"""
Compare two array_like inputs of the same shape or two scalar values
Calls operator.eq or re.search, depending on regex argument. If regex is
True, perform an element-wise regex matching.
Parameters
----------
a : array_like
b : scalar or regex pattern
regex : bool
mask : array_like
Returns
-------
mask : array_like of bool
"""
def _check_comparison_types(
result: Union[ArrayLike, bool], a: ArrayLike, b: Union[Scalar, Pattern]
):
"""
Raises an error if the two arrays (a,b) cannot be compared.
Otherwise, returns the comparison result as expected.
"""
if is_scalar(result) and isinstance(a, np.ndarray):
type_names = [type(a).__name__, type(b).__name__]
if isinstance(a, np.ndarray):
type_names[0] = f"ndarray(dtype={a.dtype})"
raise TypeError(
f"Cannot compare types {repr(type_names[0])} and {repr(type_names[1])}"
)
if not regex:
op = lambda x: operator.eq(x, b)
else:
op = np.vectorize(
lambda x: bool(re.search(b, x))
if isinstance(x, str) and isinstance(b, (str, Pattern))
else False
)
# GH#32621 use mask to avoid comparing to NAs
if isinstance(a, np.ndarray):
a = a[mask]
if is_numeric_v_string_like(a, b):
# GH#29553 avoid deprecation warnings from numpy
return np.zeros(a.shape, dtype=bool)
elif is_datetimelike_v_numeric(a, b):
# GH#29553 avoid deprecation warnings from numpy
_check_comparison_types(False, a, b)
return False
result = op(a)
if isinstance(result, np.ndarray) and mask is not None:
# The shape of the mask can differ to that of the result
# since we may compare only a subset of a's or b's elements
tmp = np.zeros(mask.shape, dtype=np.bool_)
tmp[mask] = result
result = tmp
_check_comparison_types(result, a, b)
return result
def replace_regex(values: ArrayLike, rx: re.Pattern, value, mask: Optional[np.ndarray]):
"""
Parameters
----------
values : ArrayLike
Object dtype.
rx : re.Pattern
value : Any
mask : np.ndarray[bool], optional
Notes
-----
Alters values in-place.
"""
# deal with replacing values with objects (strings) that match but
# whose replacement is not a string (numeric, nan, object)
if isna(value) or not isinstance(value, str):
def re_replacer(s):
if is_re(rx) and isinstance(s, str):
return value if rx.search(s) is not None else s
else:
return s
else:
# value is guaranteed to be a string here, s can be either a string
# or null if it's null it gets returned
def re_replacer(s):
if is_re(rx) and isinstance(s, str):
return rx.sub(value, s)
else:
return s
f = np.vectorize(re_replacer, otypes=[values.dtype])
if mask is None:
values[:] = f(values)
else:
values[mask] = f(values[mask])

View file

@ -1,284 +0,0 @@
"""
Methods that can be shared by many array-like classes or subclasses:
Series
Index
ExtensionArray
"""
import operator
from typing import Any, Callable
import warnings
import numpy as np
from pandas._libs import lib
from pandas.core.construction import extract_array
from pandas.core.ops import maybe_dispatch_ufunc_to_dunder_op, roperator
from pandas.core.ops.common import unpack_zerodim_and_defer
class OpsMixin:
# -------------------------------------------------------------
# Comparisons
def _cmp_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__eq__")
def __eq__(self, other):
return self._cmp_method(other, operator.eq)
@unpack_zerodim_and_defer("__ne__")
def __ne__(self, other):
return self._cmp_method(other, operator.ne)
@unpack_zerodim_and_defer("__lt__")
def __lt__(self, other):
return self._cmp_method(other, operator.lt)
@unpack_zerodim_and_defer("__le__")
def __le__(self, other):
return self._cmp_method(other, operator.le)
@unpack_zerodim_and_defer("__gt__")
def __gt__(self, other):
return self._cmp_method(other, operator.gt)
@unpack_zerodim_and_defer("__ge__")
def __ge__(self, other):
return self._cmp_method(other, operator.ge)
# -------------------------------------------------------------
# Logical Methods
def _logical_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__and__")
def __and__(self, other):
return self._logical_method(other, operator.and_)
@unpack_zerodim_and_defer("__rand__")
def __rand__(self, other):
return self._logical_method(other, roperator.rand_)
@unpack_zerodim_and_defer("__or__")
def __or__(self, other):
return self._logical_method(other, operator.or_)
@unpack_zerodim_and_defer("__ror__")
def __ror__(self, other):
return self._logical_method(other, roperator.ror_)
@unpack_zerodim_and_defer("__xor__")
def __xor__(self, other):
return self._logical_method(other, operator.xor)
@unpack_zerodim_and_defer("__rxor__")
def __rxor__(self, other):
return self._logical_method(other, roperator.rxor)
# -------------------------------------------------------------
# Arithmetic Methods
def _arith_method(self, other, op):
return NotImplemented
@unpack_zerodim_and_defer("__add__")
def __add__(self, other):
return self._arith_method(other, operator.add)
@unpack_zerodim_and_defer("__radd__")
def __radd__(self, other):
return self._arith_method(other, roperator.radd)
@unpack_zerodim_and_defer("__sub__")
def __sub__(self, other):
return self._arith_method(other, operator.sub)
@unpack_zerodim_and_defer("__rsub__")
def __rsub__(self, other):
return self._arith_method(other, roperator.rsub)
@unpack_zerodim_and_defer("__mul__")
def __mul__(self, other):
return self._arith_method(other, operator.mul)
@unpack_zerodim_and_defer("__rmul__")
def __rmul__(self, other):
return self._arith_method(other, roperator.rmul)
@unpack_zerodim_and_defer("__truediv__")
def __truediv__(self, other):
return self._arith_method(other, operator.truediv)
@unpack_zerodim_and_defer("__rtruediv__")
def __rtruediv__(self, other):
return self._arith_method(other, roperator.rtruediv)
@unpack_zerodim_and_defer("__floordiv__")
def __floordiv__(self, other):
return self._arith_method(other, operator.floordiv)
@unpack_zerodim_and_defer("__rfloordiv")
def __rfloordiv__(self, other):
return self._arith_method(other, roperator.rfloordiv)
@unpack_zerodim_and_defer("__mod__")
def __mod__(self, other):
return self._arith_method(other, operator.mod)
@unpack_zerodim_and_defer("__rmod__")
def __rmod__(self, other):
return self._arith_method(other, roperator.rmod)
@unpack_zerodim_and_defer("__divmod__")
def __divmod__(self, other):
return self._arith_method(other, divmod)
@unpack_zerodim_and_defer("__rdivmod__")
def __rdivmod__(self, other):
return self._arith_method(other, roperator.rdivmod)
@unpack_zerodim_and_defer("__pow__")
def __pow__(self, other):
return self._arith_method(other, operator.pow)
@unpack_zerodim_and_defer("__rpow__")
def __rpow__(self, other):
return self._arith_method(other, roperator.rpow)
def array_ufunc(self, ufunc: Callable, method: str, *inputs: Any, **kwargs: Any):
"""
Compatibility with numpy ufuncs.
See also
--------
numpy.org/doc/stable/reference/arrays.classes.html#numpy.class.__array_ufunc__
"""
from pandas.core.generic import NDFrame
from pandas.core.internals import BlockManager
cls = type(self)
# for binary ops, use our custom dunder methods
result = maybe_dispatch_ufunc_to_dunder_op(self, ufunc, method, *inputs, **kwargs)
if result is not NotImplemented:
return result
# Determine if we should defer.
no_defer = (np.ndarray.__array_ufunc__, cls.__array_ufunc__)
for item in inputs:
higher_priority = (
hasattr(item, "__array_priority__")
and item.__array_priority__ > self.__array_priority__
)
has_array_ufunc = (
hasattr(item, "__array_ufunc__")
and type(item).__array_ufunc__ not in no_defer
and not isinstance(item, self._HANDLED_TYPES)
)
if higher_priority or has_array_ufunc:
return NotImplemented
# align all the inputs.
types = tuple(type(x) for x in inputs)
alignable = [x for x, t in zip(inputs, types) if issubclass(t, NDFrame)]
if len(alignable) > 1:
# This triggers alignment.
# At the moment, there aren't any ufuncs with more than two inputs
# so this ends up just being x1.index | x2.index, but we write
# it to handle *args.
if len(set(types)) > 1:
# We currently don't handle ufunc(DataFrame, Series)
# well. Previously this raised an internal ValueError. We might
# support it someday, so raise a NotImplementedError.
raise NotImplementedError(
"Cannot apply ufunc {} to mixed DataFrame and Series "
"inputs.".format(ufunc)
)
axes = self.axes
for obj in alignable[1:]:
# this relies on the fact that we aren't handling mixed
# series / frame ufuncs.
for i, (ax1, ax2) in enumerate(zip(axes, obj.axes)):
axes[i] = ax1.union(ax2)
reconstruct_axes = dict(zip(self._AXIS_ORDERS, axes))
inputs = tuple(
x.reindex(**reconstruct_axes) if issubclass(t, NDFrame) else x
for x, t in zip(inputs, types)
)
else:
reconstruct_axes = dict(zip(self._AXIS_ORDERS, self.axes))
if self.ndim == 1:
names = [getattr(x, "name") for x in inputs if hasattr(x, "name")]
name = names[0] if len(set(names)) == 1 else None
reconstruct_kwargs = {"name": name}
else:
reconstruct_kwargs = {}
def reconstruct(result):
if lib.is_scalar(result):
return result
if result.ndim != self.ndim:
if method == "outer":
if self.ndim == 2:
# we already deprecated for Series
msg = (
"outer method for ufunc {} is not implemented on "
"pandas objects. Returning an ndarray, but in the "
"future this will raise a 'NotImplementedError'. "
"Consider explicitly converting the DataFrame "
"to an array with '.to_numpy()' first."
)
warnings.warn(msg.format(ufunc), FutureWarning, stacklevel=4)
return result
raise NotImplementedError
return result
if isinstance(result, BlockManager):
# we went through BlockManager.apply
result = self._constructor(result, **reconstruct_kwargs, copy=False)
else:
# we converted an array, lost our axes
result = self._constructor(
result, **reconstruct_axes, **reconstruct_kwargs, copy=False
)
# TODO: When we support multiple values in __finalize__, this
# should pass alignable to `__fianlize__` instead of self.
# Then `np.add(a, b)` would consider attrs from both a and b
# when a and b are NDFrames.
if len(alignable) == 1:
result = result.__finalize__(self)
return result
if self.ndim > 1 and (
len(inputs) > 1 or ufunc.nout > 1 # type: ignore[attr-defined]
):
# Just give up on preserving types in the complex case.
# In theory we could preserve them for them.
# * nout>1 is doable if BlockManager.apply took nout and
# returned a Tuple[BlockManager].
# * len(inputs) > 1 is doable when we know that we have
# aligned blocks / dtypes.
inputs = tuple(np.asarray(x) for x in inputs)
result = getattr(ufunc, method)(*inputs)
elif self.ndim == 1:
# ufunc(series, ...)
inputs = tuple(extract_array(x, extract_numpy=True) for x in inputs)
result = getattr(ufunc, method)(*inputs, **kwargs)
else:
# ufunc(dataframe)
mgr = inputs[0]._mgr
result = mgr.apply(getattr(ufunc, method))
if ufunc.nout > 1: # type: ignore[attr-defined]
result = tuple(reconstruct(x) for x in result)
else:
result = reconstruct(result)
return result

Some files were not shown because too many files have changed in this diff Show more